dashclaw 2.11.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +330 -35
  2. package/dashclaw.js +96 -1
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -18,58 +18,207 @@ pip install dashclaw
18
18
 
19
19
  ## The Governance Loop
20
20
 
21
- DashClaw v2 is designed around a single 4-step loop.
21
+ DashClaw v2 is designed around a 4-step loop, with an optional
22
+ human-in-the-loop (HITL) branch when policy requires approval.
23
+
24
+ ```
25
+ guard ─▶ createAction ─▶ (if pending_approval: waitForApproval) ─▶ updateOutcome
26
+ ```
22
27
 
23
28
  ### Node.js
24
29
  ```javascript
25
- import { DashClaw } from 'dashclaw';
30
+ import { DashClaw, GuardBlockedError, ApprovalDeniedError } from 'dashclaw';
26
31
 
27
32
  const claw = new DashClaw({
28
33
  baseUrl: process.env.DASHCLAW_BASE_URL,
29
34
  apiKey: process.env.DASHCLAW_API_KEY,
30
- agentId: 'my-agent'
35
+ agentId: 'my-agent',
36
+ agentName: 'My Agent', // optional — stored in audit trail for attribution
31
37
  });
32
38
 
33
39
  // 1. Ask permission
34
- const res = await claw.guard({ action_type: 'deploy' });
40
+ const decision = await claw.guard({
41
+ action_type: 'deploy',
42
+ declared_goal: 'Ship v2.4.0 to production',
43
+ risk_score: 90,
44
+ });
45
+ if (decision.decision === 'block') {
46
+ throw new GuardBlockedError(decision);
47
+ }
35
48
 
36
- // 2. Log intent
37
- const { action_id } = await claw.createAction({ action_type: 'deploy' });
49
+ // 2. Log intent. Server may gate this if policy requires approval —
50
+ // check action.status before assuming you're clear to execute.
51
+ const { action, action_id } = await claw.createAction({
52
+ action_type: 'deploy',
53
+ declared_goal: 'Ship v2.4.0 to production',
54
+ risk_score: 90,
55
+ });
38
56
 
39
- // 3. Log evidence
40
- await claw.recordAssumption({ action_id, assumption: 'Tests passed' });
57
+ // 3. If the server flagged this for human review, wait for an operator.
58
+ if (action?.status === 'pending_approval') {
59
+ try {
60
+ await claw.waitForApproval(action_id);
61
+ } catch (err) {
62
+ if (err instanceof ApprovalDeniedError) return; // operator denied
63
+ throw err;
64
+ }
65
+ }
41
66
 
42
- // 4. Update result
43
- await claw.updateOutcome(action_id, { status: 'completed' });
67
+ // 4. Execute the real work, then record the outcome
68
+ await claw.recordAssumption({ action_id, assumption: 'Staging tests passed' });
69
+ try {
70
+ const result = await myLlmCall();
71
+ await claw.updateOutcome(action_id, {
72
+ status: 'completed',
73
+ // Optional — populate Analytics cost/token charts. Cost is derived
74
+ // server-side from the configured pricing table when model + tokens
75
+ // are provided without an explicit cost_estimate.
76
+ tokens_in: result.usage.input_tokens,
77
+ tokens_out: result.usage.output_tokens,
78
+ model: result.model,
79
+ });
80
+ } catch (err) {
81
+ await claw.updateOutcome(action_id, { status: 'failed', error_message: err.message });
82
+ }
44
83
  ```
45
84
 
46
85
  ### Python
47
86
  ```python
48
87
  import os
49
- from dashclaw import DashClaw
88
+ from dashclaw import DashClaw, GuardBlockedError, ApprovalDeniedError
50
89
 
51
90
  claw = DashClaw(
52
91
  base_url=os.environ["DASHCLAW_BASE_URL"],
53
92
  api_key=os.environ["DASHCLAW_API_KEY"],
54
- agent_id="my-agent"
93
+ agent_id="my-agent",
94
+ agent_name="My Agent", # optional — stored in audit trail for attribution
55
95
  )
56
96
 
57
97
  # 1. Ask permission
58
- res = claw.guard({"action_type": "deploy"})
98
+ decision = claw.guard({
99
+ "action_type": "deploy",
100
+ "declared_goal": "Ship v2.4.0 to production",
101
+ "risk_score": 90,
102
+ })
103
+ if decision["decision"] == "block":
104
+ raise GuardBlockedError(decision)
59
105
 
60
106
  # 2. Log intent
61
- action = claw.create_action(action_type="deploy")
107
+ action = claw.create_action(
108
+ action_type="deploy",
109
+ declared_goal="Ship v2.4.0 to production",
110
+ risk_score=90,
111
+ )
62
112
  action_id = action["action_id"]
63
113
 
64
- # 3. Log evidence
65
- claw.record_assumption({"action_id": action_id, "assumption": "Tests passed"})
114
+ # 3. If the server flagged this for human review, wait for an operator.
115
+ if action.get("action", {}).get("status") == "pending_approval":
116
+ try:
117
+ claw.wait_for_approval(action_id)
118
+ except ApprovalDeniedError:
119
+ pass # operator denied — stop here
66
120
 
67
- # 4. Update result
121
+ # 4. Execute and record outcome
122
+ claw.record_assumption({"action_id": action_id, "assumption": "Staging tests passed"})
68
123
  claw.update_outcome(action_id, status="completed")
69
124
  ```
70
125
 
71
126
  ---
72
127
 
128
+ ## Human-in-the-Loop (HITL) Approval Flow
129
+
130
+ When a guard policy, a capability `requires_approval` flag, or any server-side
131
+ rule triggers human review, the server responds to `createAction()` with
132
+ `action.status === 'pending_approval'` and HTTP **202**. Your agent's job is to
133
+ pause on `waitForApproval()` until an operator clicks **Approve** or **Deny** from the dashboard, the
134
+ CLI, the mobile PWA, or — on instances with Telegram configured — an inline
135
+ Telegram button.
136
+
137
+ ### The rule every agent author needs to know
138
+
139
+ **`waitForApproval()` must be called with the `action_id` returned by
140
+ `createAction()`, NOT with the `action_id` returned by `guard()`.**
141
+
142
+ These are two different records in two different tables:
143
+
144
+ | Call | Returns `action_id` that refers to… | Prefix |
145
+ |---|---|---|
146
+ | `guard()` | A row in `guard_decisions` (the decision log) | `act_gd_…` |
147
+ | `createAction()` | A row in `action_records` (the thing you're actually doing) | `act_…` |
148
+
149
+ `waitForApproval()` polls `GET /api/actions/:id`, which is the
150
+ `action_records` table. Passing it a `guard_decisions` ID (`act_gd_…`) will
151
+ either return 404 or time out waiting on a row that doesn't exist. This was a
152
+ real bug in an early version of the OpenClaw plugin — don't reproduce it.
153
+
154
+ ### Correct sequence
155
+
156
+ ```javascript
157
+ // 1. Guard — advisory; may return 'allow', 'block', 'warn', or 'require_approval'
158
+ const decision = await claw.guard({
159
+ action_type: 'post_message',
160
+ declared_goal: 'Notify #ops of deploy start',
161
+ risk_score: 40,
162
+ });
163
+ if (decision.decision === 'block') {
164
+ throw new GuardBlockedError(decision);
165
+ }
166
+
167
+ // 2. Create the action. The server re-evaluates policy at this point and is
168
+ // the authoritative source for whether human review is required. Even if
169
+ // guard returned 'allow', the server may still set status='pending_approval'
170
+ // (for example, if a capability has requires_approval=true).
171
+ const { action, action_id } = await claw.createAction({
172
+ action_type: 'post_message',
173
+ declared_goal: 'Notify #ops of deploy start',
174
+ risk_score: 40,
175
+ });
176
+
177
+ // 3. Check the SERVER's verdict, not the guard decision.
178
+ if (action?.status === 'pending_approval') {
179
+ try {
180
+ // Use createAction's action_id, never the guard decision's action_id.
181
+ await claw.waitForApproval(action_id, { timeout: 600_000 });
182
+ } catch (err) {
183
+ if (err instanceof ApprovalDeniedError) {
184
+ // Operator denied — do NOT execute the action
185
+ return { denied: true, reason: err.message };
186
+ }
187
+ throw err;
188
+ }
189
+ }
190
+
191
+ // 4. Execute and record outcome
192
+ await doTheWork();
193
+ await claw.updateOutcome(action_id, { status: 'completed' });
194
+ ```
195
+
196
+ ### What `waitForApproval()` does under the hood
197
+
198
+ - Opens an SSE connection to `/api/stream` and watches for
199
+ `action.updated` events scoped to the given `actionId`.
200
+ - Falls back to HTTP polling of `GET /api/actions/:id` every 5 seconds if
201
+ SSE is unavailable.
202
+ - Resolves when `action.approved_by` is set (operator approved).
203
+ - Throws `ApprovalDeniedError` when `action.status` becomes `failed` or
204
+ `cancelled` (operator denied).
205
+ - Throws a timeout error after `options.timeout` milliseconds (default
206
+ `300_000` = 5 minutes).
207
+
208
+ ### Why guard and the server can disagree
209
+
210
+ `guard()` is fast, in-memory, advisory. The server's `createAction` handler
211
+ re-runs the exact same `evaluateGuard()` pipeline against the **persisted**
212
+ action record, plus any capability-specific `requires_approval` flags and
213
+ org-scoped rules that can only be resolved at write time. So the authoritative
214
+ answer to "does this need human review?" is always `action.status` on the
215
+ `createAction()` response — not `decision.decision` on the `guard()` response.
216
+
217
+ Short version: **trust `action.status`, not `decision.decision`, for HITL
218
+ branching.**
219
+
220
+ ---
221
+
73
222
  ## SDK Tiers
74
223
 
75
224
  DashClaw currently exposes a canonical Node SDK surface plus a legacy compatibility layer:
@@ -103,28 +252,38 @@ See:
103
252
  The v2 SDK exposes the stable governance runtime plus promoted execution domains in the canonical Node client:
104
253
 
105
254
  ### Core Runtime
106
- - `guard(context)` -- Policy evaluation ("Can I do X?"). Returns `risk_score` (server-computed) and `agent_risk_score` (raw agent value)
107
- - `createAction(action)` -- Lifecycle tracking ("I am doing X")
108
- - `updateOutcome(id, outcome)` -- Result recording ("X finished with Y")
255
+ - `guard(context)` -- Policy evaluation ("Can I do X?"). Returns `risk_score` (server-computed) and `agent_risk_score` (raw agent value). Automatically includes `agent_name` from the constructor if not overridden in the call context.
256
+ - `createAction(action)` -- Lifecycle tracking ("I am doing X"). Accepts optional `idempotency_key`; on collision returns the existing row with `{ idempotent_replay: true }` instead of inserting a duplicate.
257
+ - `updateOutcome(id, outcome)` -- Result recording ("X finished with Y"). `outcome` accepts `status`, `output_summary`, `side_effects`, `artifacts_created`, `error_message`, `duration_ms`, `tokens_in`, `tokens_out`, `model`, `cost_estimate`. When `tokens_in` / `tokens_out` are reported without an explicit `cost_estimate`, the server derives cost from `model` using the configured pricing table.
109
258
  - `recordAssumption(assumption)` -- Integrity tracking ("I believe Z while doing X")
110
259
  - `waitForApproval(id)` -- Real-time SSE listener for human-in-the-loop approvals (automatic polling fallback)
111
260
  - `approveAction(id, decision, reasoning?)` -- Submit approval decisions from code
112
261
  - `getPendingApprovals()` -- List actions awaiting human review
113
262
 
263
+ ### Durable Execution Finality (v2.13.3+)
264
+ Terminal outcome reporting that is one-shot, retry-safe, and immutable once non-pending. Separate from `updateOutcome`, which remains the lifecycle-PATCH path. Full spec: [`docs/architecture/durable-execution-finality.md`](../docs/architecture/durable-execution-finality.md). Detailed examples in the [Action Outcome](#action-outcome-durable-execution-finality) subsection of Execution Studio below.
265
+
266
+ - `reportActionOutcome(id, { status, summary?, error_message?, progress? })` -- Record the terminal outcome. `status` must be `completed`, `partial`, or `failed`; `lost_confirmation` is reserved for the system sweep. First call wins; subsequent POSTs return 409 with `current_status`.
267
+ - `getActionOutcome(id)` -- Read the current outcome state. Returns `status` (one of `pending` / `completed` / `partial` / `failed` / `lost_confirmation`), `outcome_at`, `summary`, `error_message`, `progress`, `elapsed_ms`. Poll this before retrying any approved action.
268
+ - `reportActionSuccess(id, summary?)` -- Convenience wrapper for `completed`.
269
+ - `reportActionFailure(id, errorMessage, summary?)` -- Convenience wrapper for `failed`. `error_message` is required.
270
+ - `reportActionPartial(id, progress, summary?)` -- Convenience wrapper for `partial`. `progress` (object) is required.
271
+ - `deriveIdempotencyKey(parts)` -- SHA-256 hex digest of intent-fields for the `idempotency_key` field on `createAction`. Order-independent. Derive from intent (agent, action_type, scope, request_id), not timestamps.
272
+
114
273
  ### Decision Integrity
115
274
  - `registerOpenLoop(actionId, type, desc)` -- Register unresolved dependencies.
116
275
  - `resolveOpenLoop(loopId, status, res)` -- Resolve pending loops.
117
276
  - `getSignals()` -- Get current risk signals across all agents.
118
277
 
119
278
  ### Swarm & Connectivity
120
- - `heartbeat(status, metadata)` -- Report agent presence and health. **As of DashClaw 2.13.0, heartbeats are implicit on `createAction()` — you only need this if you want to report presence without recording an action.**
279
+ - `heartbeat(status, metadata)` -- Report agent presence and health. **As of DashClaw platform 2.13.0 (server-side change, independent of SDK version), heartbeats are implicit on `createAction()` — you only need this if you want to report presence without recording an action.**
121
280
  - `reportConnections(connections)` -- Report active provider connections.
122
281
 
123
282
  ### Learning & Optimization
124
283
  - `getLearningVelocity()` -- Track agent improvement rate.
125
284
  - `getLearningCurves()` -- Measure efficiency gains per action type.
126
285
  - `getLessons({ actionType, limit })` -- Fetch consolidated lessons from scored outcomes.
127
- - `renderPrompt(context)` -- Fetch rendered prompt templates from DashClaw.
286
+ - `renderPrompt({ template_id, version_id, variables, record })` -- Fetch a rendered prompt template from DashClaw. `template_id` is required; `version_id` defaults to the active version; `variables` is an object of mustache values; `record: true` persists the render as a governance event.
128
287
 
129
288
  ### Learning Loop
130
289
 
@@ -367,30 +526,55 @@ Messages sent through the context are automatically correlated with the action i
367
526
 
368
527
  DashClaw uses standard HTTP status codes and custom error classes:
369
528
 
370
- - `GuardBlockedError` -- Thrown when `claw.guard()` returns a `block` decision.
371
- - `ApprovalDeniedError` -- Thrown when an operator denies an action during `waitForApproval()`.
529
+ - `GuardBlockedError` -- Thrown by **any** SDK call when the server returns HTTP 403 with `{ decision: { decision: 'block' } }`. Note that a successful `guard()` call returning `{ decision: 'block' }` in a **200** body does **not** throw — it just returns the decision object. Always check `decision.decision === 'block'` after `guard()` and throw `new GuardBlockedError(decision)` yourself if you want to abort early, as shown in the governance loop above.
530
+ - `ApprovalDeniedError` -- Thrown by `waitForApproval()` when an operator denies the action (server sets `status` to `failed` or `cancelled`).
372
531
 
373
532
  ---
374
533
 
375
- ## CLI Approval Channel
534
+ ## CLI (`@dashclaw/cli`)
376
535
 
377
- Install the DashClaw CLI to approve agent actions from the terminal:
536
+ Install the DashClaw CLI for terminal approvals and self-host diagnostics:
378
537
 
379
538
  ```bash
380
539
  npm install -g @dashclaw/cli
381
540
  ```
382
541
 
542
+ **Approvals:**
543
+
383
544
  ```bash
384
545
  dashclaw approvals # interactive approval inbox
385
546
  dashclaw approve <actionId> # approve a specific action
386
547
  dashclaw deny <actionId> # deny a specific action
387
548
  ```
388
549
 
389
- When an agent calls `waitForApproval()`, it prints the action ID and replay link to stdout. Approve from any terminal or the dashboard, and the agent unblocks instantly.
550
+ **Diagnostics:**
551
+
552
+ ```bash
553
+ dashclaw doctor # diagnose + auto-fix safe issues (database, config, auth, deployment, SDK, governance, drift)
554
+ dashclaw doctor --json # CI/machine-readable
555
+ dashclaw doctor --no-fix # diagnose only
556
+ dashclaw doctor --category database,config
557
+ ```
558
+
559
+ Config resolution order: env vars (`DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, optional `DASHCLAW_AGENT_ID`) → `~/.dashclaw/config.json` (`600`, persisted after interactive prompt) → first-run prompt. `dashclaw logout` removes saved config.
560
+
561
+ When an agent calls `waitForApproval()`, it prints the action ID and replay link to stdout. Approve from any terminal, the browser dashboard, the `/approve` mobile PWA, or — if the instance has Telegram configured — via an inline Telegram Approve/Reject button pushed to the admin chat — decisions sync over Redis SSE within ~1 second.
562
+
563
+ ## Self-Host Doctor (`npm run doctor`)
564
+
565
+ For operators running a self-hosted DashClaw instance, Doctor is also available as a local script with filesystem-level fix powers:
566
+
567
+ ```bash
568
+ npm run doctor # can write .env, run migrations, seed default policy
569
+ ```
570
+
571
+ Doctor check modules are emitted from the livingcode shape (`app/lib/doctor/generated/checks-from-shape.mjs`) and run against `GET /api/doctor` / `POST /api/doctor/fix`. The `.env` is always backed up before any write. Includes a drift guard that flags when shape-derived artifacts are out of sync — fix with `npm run livingcode:refresh`.
572
+
573
+ ## MCP Server (`@dashclaw/mcp-server`)
390
574
 
391
- ## MCP Server (Zero-Code Integration)
575
+ If your agent supports Model Context Protocol (Claude Code, Claude Desktop, Managed Agents, MCP Inspector), skip the SDK entirely and let the MCP server wire governance into your agent loop.
392
576
 
393
- If your agent supports MCP (Claude Code, Claude Desktop, Managed Agents), you can skip the SDK entirely:
577
+ **stdio transport** (recommended for Claude Desktop / Claude Code):
394
578
 
395
579
  ```json
396
580
  {
@@ -404,21 +588,39 @@ If your agent supports MCP (Claude Code, Claude Desktop, Managed Agents), you ca
404
588
  }
405
589
  ```
406
590
 
407
- The MCP server exposes the same governance surface as the SDK (guard, record, invoke, wait for approval) plus discovery (capabilities, policies) and session lifecycle.
591
+ **Streamable HTTP transport** (same surface, served by your DashClaw instance at `POST /api/mcp`).
592
+
593
+ **8 tools:** `dashclaw_guard`, `dashclaw_record`, `dashclaw_invoke`, `dashclaw_capabilities_list`, `dashclaw_policies_list`, `dashclaw_wait_for_approval`, `dashclaw_session_start`, `dashclaw_session_end`.
594
+
595
+ **4 resources:** `dashclaw://policies`, `dashclaw://capabilities`, `dashclaw://agent/{agent_id}/history`, `dashclaw://status`.
596
+
597
+ ## OpenClaw Plugin (`@dashclaw/openclaw-plugin`)
598
+
599
+ For teams using the OpenClaw agent framework, the governance plugin intercepts `PreToolUse` / `PostToolUse` lifecycle hooks and runs guard → record → wait-for-approval automatically. Tool classification vocabulary aligns with DashClaw's guard action types. Install via the openclaw CLI which picks up the bundled `HOOK.md` pack.
600
+
601
+ ## Governance Skill for Claude (Anthropic)
602
+
603
+ For Anthropic Managed Agents or Claude Code sessions, the `@dashclaw/governance` skill teaches the agent how to use the MCP tools correctly — risk thresholds, decision handling, recording rules, session lifecycle. Pairs with `@dashclaw/mcp-server`. Download at `https://<your-instance>/downloads/dashclaw-governance.zip` or see `public/downloads/dashclaw-governance/`.
408
604
 
409
605
  ---
410
606
 
411
607
  ## Claude Code Hooks
412
608
 
413
- Govern Claude Code tool calls without any SDK instrumentation. Copy two files from the `hooks/` directory in the repo into your `.claude/hooks/` folder:
609
+ Govern Claude Code tool calls without any SDK instrumentation. One command from anywhere DashClaw is cloned:
414
610
 
415
611
  ```bash
416
- # In your project directory
417
- cp path/to/DashClaw/hooks/dashclaw_pretool.py .claude/hooks/
418
- cp path/to/DashClaw/hooks/dashclaw_posttool.py .claude/hooks/
612
+ # From a DashClaw checkout
613
+ npm run hooks:install
614
+
615
+ # From any other project, pointing at a DashClaw checkout
616
+ node /path/to/DashClaw/scripts/install-hooks.mjs --target=.
419
617
  ```
420
618
 
421
- Then merge the hooks block from `hooks/settings.json` into your `.claude/settings.json`. Set `DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, and optionally `DASHCLAW_HOOK_MODE=enforce`.
619
+ This installs three hooks (`dashclaw_pretool.py`, `dashclaw_posttool.py`, `dashclaw_stop.py`) plus the bundled `dashclaw_agent_intel/` tool-classification module into `.claude/hooks/`, then merges the `PreToolUse`, `PostToolUse`, and `Stop` blocks into `.claude/settings.json`. Idempotent: re-run after `git pull` to upgrade.
620
+
621
+ The Stop hook captures per-turn LLM token usage from the session transcript and PATCHes it onto the action records the pretool opened during the turn, so cost analytics light up without per-agent instrumentation.
622
+
623
+ Set `DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, and optionally `DASHCLAW_HOOK_MODE=enforce`. Full guide and per-hook details in [`hooks/README.md`](../hooks/README.md).
422
624
 
423
625
  ---
424
626
 
@@ -463,6 +665,69 @@ const { rootActionId, nodes, edges } = await claw.getActionGraph(actionId);
463
665
  // edges: parent_child | related | assumption_of | loop_from
464
666
  ```
465
667
 
668
+ ### Action Outcome (durable execution finality)
669
+
670
+ Every approved action carries a terminal outcome: `pending`, `completed`, `partial`, `failed`, or `lost_confirmation`. Agents call `reportActionOutcome` to record finality, and `getActionOutcome` before retry to avoid re-executing already-completed work. Outcomes are one-shot — once non-pending, they cannot be rewritten.
671
+
672
+ ```javascript
673
+ // Report success
674
+ await claw.reportActionOutcome(actionId, {
675
+ status: 'completed',
676
+ summary: 'Deployed dashclaw 2.13.4 to production'
677
+ });
678
+
679
+ // Convenience wrappers
680
+ await claw.reportActionSuccess(actionId, 'Deployed dashclaw 2.13.4');
681
+ await claw.reportActionFailure(actionId, 'Downstream API returned 503');
682
+ await claw.reportActionPartial(actionId, { step: 2, of: 5 });
683
+
684
+ // Report failure (error_message required)
685
+ await claw.reportActionOutcome(actionId, {
686
+ status: 'failed',
687
+ error_message: 'Downstream API returned 503'
688
+ });
689
+
690
+ // Report partial progress (progress object required)
691
+ await claw.reportActionOutcome(actionId, {
692
+ status: 'partial',
693
+ progress: { step: 2, of: 5 }
694
+ });
695
+
696
+ // Retry-safe poll before re-trying any approved action
697
+ const outcome = await claw.getActionOutcome(actionId);
698
+ switch (outcome.status) {
699
+ case 'pending': /* still in flight, WAIT */ break;
700
+ case 'completed': /* already executed, SKIP */ break;
701
+ case 'failed': /* safe to RETRY */ break;
702
+ case 'lost_confirmation': /* sweep gave up, safe to RETRY */ break;
703
+ case 'partial': /* clean up then retry */ break;
704
+ }
705
+ ```
706
+
707
+ HTTP surface (when the SDK isn't available):
708
+
709
+ ```bash
710
+ curl -X POST "$BASE_URL/api/actions/$ACTION_ID/outcome" \
711
+ -H "x-api-key: $API_KEY" -H "Content-Type: application/json" \
712
+ -d '{"status":"completed","summary":"shipped"}'
713
+ # 200 → { outcome: { ... } }
714
+ # 409 → { error: "outcome already set", current_status: "completed" }
715
+ ```
716
+
717
+ Pending outcomes that never get reported get swept to `lost_confirmation` by `/api/cron/outcome-sweep`. Vercel runs it daily on Hobby; the `lost_confirmation` event fires a `signal.detected` webhook so subscribers can see and recover. Per-org timeout (minutes) is configurable via the `DASHCLAW_OUTCOME_TIMEOUT_MINUTES` setting (default 15).
718
+
719
+ **Idempotency keys.** Network errors on the *create* side of the create-then-execute flow used to leave duplicate `action_records` behind. Pass `idempotency_key` on `POST /api/actions` to make creates retry-safe — a second POST with the same `(org_id, idempotency_key)` returns the original row with `{ idempotent_replay: true }` instead of inserting a duplicate. Derive keys from intent, not timestamps:
720
+
721
+ ```javascript
722
+ const idempotency_key = claw.deriveIdempotencyKey({
723
+ agent_id: 'deploy-bot',
724
+ action_type: 'deploy',
725
+ scope: 'prod-us-east',
726
+ request_id: requestId, // your own attempt discriminator
727
+ });
728
+ await claw.createAction({ /* ... */, idempotency_key });
729
+ ```
730
+
466
731
  ### Workflow Templates
467
732
 
468
733
  ```javascript
@@ -704,5 +969,35 @@ Health responses now include certification and recency fields such as:
704
969
 
705
970
  ---
706
971
 
972
+ ## Hosted provisioning (operator surface — not an SDK method)
973
+
974
+ When `DASHCLAW_HOSTED=true` the deployment exposes `/api/hosted/*` routes for one-click trial provisioning. These are operator-facing routes, not SDK methods — they produce the API key the SDK consumes.
975
+
976
+ ```bash
977
+ # Mint a trial workspace (no auth required; Turnstile-gated in production)
978
+ curl -X POST https://hosted.example.com/api/hosted/workspaces \
979
+ -H "content-type: application/json" \
980
+ -d '{"turnstile_token": "..."}'
981
+ # → { "workspace_id": "org_...", "api_key": "oc_live_...", "endpoint": "...",
982
+ # "expires_at": "...", "trial_action_cap": 10000, "key_prefix": "oc_live_",
983
+ # "next_steps_url": "https://hosted.example.com/connect?hosted=org_..." }
984
+
985
+ # Admin: inspect a trial workspace (x-api-key with admin role)
986
+ curl https://hosted.example.com/api/hosted/workspaces/org_abc \
987
+ -H "x-api-key: <admin_key>"
988
+
989
+ # Admin: delete a trial workspace
990
+ curl -X DELETE https://hosted.example.com/api/hosted/workspaces/org_abc \
991
+ -H "x-api-key: <admin_key>"
992
+
993
+ # Cron: sweep expired trials (admin role OR X-Cleanup-Secret)
994
+ curl -X POST https://hosted.example.com/api/hosted/cleanup \
995
+ -H "X-Cleanup-Secret: $HOSTED_CLEANUP_SECRET"
996
+ ```
997
+
998
+ These routes return 404 when `DASHCLAW_HOSTED` is unset — self-host deploys are unaffected.
999
+
1000
+ ---
1001
+
707
1002
  ## License
708
1003
  MIT
package/dashclaw.js CHANGED
@@ -3,6 +3,8 @@
3
3
  * Focused governance runtime client for AI agents.
4
4
  */
5
5
 
6
+ import { createHash } from 'crypto';
7
+
6
8
  class ApprovalDeniedError extends Error {
7
9
  constructor(message, decision) {
8
10
  super(message);
@@ -25,8 +27,9 @@ class DashClaw {
25
27
  * @param {string} options.baseUrl - DashClaw base URL
26
28
  * @param {string} options.apiKey - API key for authentication
27
29
  * @param {string} options.agentId - Unique identifier for this agent
30
+ * @param {string} [options.agentName] - Human-readable label for this agent (stored in audit trail)
28
31
  */
29
- constructor({ baseUrl, apiKey, agentId }) {
32
+ constructor({ baseUrl, apiKey, agentId, agentName }) {
30
33
  if (!baseUrl) throw new Error('baseUrl is required');
31
34
  if (!apiKey) throw new Error('apiKey is required');
32
35
  if (!agentId) throw new Error('agentId is required');
@@ -34,6 +37,7 @@ class DashClaw {
34
37
  this.baseUrl = baseUrl.replace(/\/$/, '');
35
38
  this.apiKey = apiKey;
36
39
  this.agentId = agentId;
40
+ this.agentName = agentName || null;
37
41
 
38
42
  this.execution = {
39
43
  capabilities: {
@@ -96,6 +100,8 @@ class DashClaw {
96
100
  return this._request('/api/guard', 'POST', {
97
101
  ...context,
98
102
  agent_id: context.agent_id || this.agentId,
103
+ // Include agent_name for audit attribution if not already provided by caller
104
+ ...(context.agent_name == null && this.agentName ? { agent_name: this.agentName } : {}),
99
105
  });
100
106
  }
101
107
 
@@ -749,6 +755,95 @@ class DashClaw {
749
755
  return this._request(`/api/actions/${actionId}/graph`, 'GET');
750
756
  }
751
757
 
758
+ // ---------------------------------------------------------------------------
759
+ // Durable execution finality — terminal outcome reporting
760
+ // See docs/architecture/durable-execution-finality.md
761
+ // ---------------------------------------------------------------------------
762
+
763
+ /**
764
+ * POST /api/actions/:id/outcome — Record the terminal outcome of an action.
765
+ *
766
+ * @param {string} actionId
767
+ * @param {Object} payload
768
+ * @param {'completed'|'partial'|'failed'} payload.status
769
+ * @param {string} [payload.summary]
770
+ * @param {string} [payload.error_message] — required when status=failed
771
+ * @param {Object} [payload.progress] — required when status=partial
772
+ * @returns {Promise<{ outcome: object, security: object }>}
773
+ * @throws on 409 when the outcome is already terminal — inspect the response
774
+ * body for `current_status` before deciding what to do next.
775
+ */
776
+ async reportActionOutcome(actionId, payload) {
777
+ return this._request(`/api/actions/${actionId}/outcome`, 'POST', payload);
778
+ }
779
+
780
+ /**
781
+ * GET /api/actions/:id/outcome — Read the current outcome state of an action.
782
+ *
783
+ * Returns `{ action_id, status, outcome_at, summary, error_message, progress, elapsed_ms }`.
784
+ * Status is one of: pending, completed, partial, failed, lost_confirmation.
785
+ * Use this BEFORE retrying any approved action to avoid double-execution.
786
+ */
787
+ async getActionOutcome(actionId) {
788
+ return this._request(`/api/actions/${actionId}/outcome`, 'GET');
789
+ }
790
+
791
+ /**
792
+ * Convenience: report a successful terminal outcome.
793
+ */
794
+ async reportActionSuccess(actionId, summary) {
795
+ return this.reportActionOutcome(actionId, { status: 'completed', summary });
796
+ }
797
+
798
+ /**
799
+ * Convenience: report a failed terminal outcome. `error_message` is required.
800
+ */
801
+ async reportActionFailure(actionId, errorMessage, summary) {
802
+ return this.reportActionOutcome(actionId, {
803
+ status: 'failed',
804
+ error_message: errorMessage,
805
+ summary,
806
+ });
807
+ }
808
+
809
+ /**
810
+ * Convenience: report a partial outcome with progress state. Progress is
811
+ * required (an object describing where the agent stopped).
812
+ */
813
+ async reportActionPartial(actionId, progress, summary) {
814
+ return this.reportActionOutcome(actionId, {
815
+ status: 'partial',
816
+ progress,
817
+ summary,
818
+ });
819
+ }
820
+
821
+ /**
822
+ * Derive a stable idempotency key from the *intent* of an action so a
823
+ * retried `createAction` call returns the original row instead of creating
824
+ * a duplicate. Pass the same `parts` for the same logical action; vary at
825
+ * least one part for distinct actions.
826
+ *
827
+ * The hash function uses SHA-256 hex via Node's built-in crypto. In
828
+ * browser-only environments lacking `require`, callers should compute the
829
+ * key themselves and pass it directly to `createAction({ idempotency_key }).`
830
+ *
831
+ * @param {Object} parts — at minimum agent_id + action_type + a request
832
+ * discriminator that uniquely identifies this attempt. Reusing the key
833
+ * for a logically distinct action is the agent's bug, not DashClaw's.
834
+ * @returns {string} SHA-256 hex digest
835
+ */
836
+ deriveIdempotencyKey(parts) {
837
+ if (!parts || typeof parts !== 'object') {
838
+ throw new TypeError('deriveIdempotencyKey: parts must be an object');
839
+ }
840
+ const ordered = Object.keys(parts)
841
+ .sort()
842
+ .map((k) => `${k}=${parts[k] ?? ''}`)
843
+ .join('|');
844
+ return createHash('sha256').update(ordered).digest('hex');
845
+ }
846
+
752
847
  // ---------------------------------------------------------------------------
753
848
  // Execution Studio — Workflow Templates
754
849
  // ---------------------------------------------------------------------------
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dashclaw",
3
- "version": "2.11.1",
3
+ "version": "2.12.0",
4
4
  "description": "Minimal governance runtime for AI agents. Intercept, govern, and verify agent actions.",
5
5
  "type": "module",
6
6
  "publishConfig": {