dashclaw 2.11.1 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +356 -37
  2. package/dashclaw.js +121 -4
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # DashClaw SDK (v2.11.1)
1
+ # DashClaw SDK (v2.12.0)
2
2
 
3
3
  **Minimal governance runtime for AI agents.**
4
4
 
@@ -18,58 +18,211 @@ pip install dashclaw
18
18
 
19
19
  ## The Governance Loop
20
20
 
21
- DashClaw v2 is designed around a single 4-step loop.
21
+ DashClaw v2 is designed around a 4-step loop, with an optional
22
+ human-in-the-loop (HITL) branch when policy requires approval.
23
+
24
+ ```
25
+ guard ─▶ createAction ─▶ (if pending_approval: waitForApproval) ─▶ updateOutcome
26
+ ```
22
27
 
23
28
  ### Node.js
24
29
  ```javascript
25
- import { DashClaw } from 'dashclaw';
30
+ import { DashClaw, GuardBlockedError, ApprovalDeniedError } from 'dashclaw';
26
31
 
27
32
  const claw = new DashClaw({
28
33
  baseUrl: process.env.DASHCLAW_BASE_URL,
29
34
  apiKey: process.env.DASHCLAW_API_KEY,
30
- agentId: 'my-agent'
35
+ agentId: 'my-agent',
36
+ agentName: 'My Agent', // optional — stored in audit trail for attribution
37
+ // Phase 2 (optional): attach a JWT from your OIDC provider for cryptographic
38
+ // attribution. When set, the server verifies the signature via JWKS and the
39
+ // JWT sub claim overrides agentId in the audit record.
40
+ // authToken: process.env.MY_AGENT_JWT,
31
41
  });
32
42
 
33
43
  // 1. Ask permission
34
- const res = await claw.guard({ action_type: 'deploy' });
44
+ const decision = await claw.guard({
45
+ action_type: 'deploy',
46
+ declared_goal: 'Ship v2.4.0 to production',
47
+ risk_score: 90,
48
+ });
49
+ if (decision.decision === 'block') {
50
+ throw new GuardBlockedError(decision);
51
+ }
35
52
 
36
- // 2. Log intent
37
- const { action_id } = await claw.createAction({ action_type: 'deploy' });
53
+ // 2. Log intent. Server may gate this if policy requires approval —
54
+ // check action.status before assuming you're clear to execute.
55
+ const { action, action_id } = await claw.createAction({
56
+ action_type: 'deploy',
57
+ declared_goal: 'Ship v2.4.0 to production',
58
+ risk_score: 90,
59
+ });
38
60
 
39
- // 3. Log evidence
40
- await claw.recordAssumption({ action_id, assumption: 'Tests passed' });
61
+ // 3. If the server flagged this for human review, wait for an operator.
62
+ if (action?.status === 'pending_approval') {
63
+ try {
64
+ await claw.waitForApproval(action_id);
65
+ } catch (err) {
66
+ if (err instanceof ApprovalDeniedError) return; // operator denied
67
+ throw err;
68
+ }
69
+ }
41
70
 
42
- // 4. Update result
43
- await claw.updateOutcome(action_id, { status: 'completed' });
71
+ // 4. Execute the real work, then record the outcome
72
+ await claw.recordAssumption({ action_id, assumption: 'Staging tests passed' });
73
+ try {
74
+ const result = await myLlmCall();
75
+ await claw.updateOutcome(action_id, {
76
+ status: 'completed',
77
+ // Optional — populate Analytics cost/token charts. Cost is derived
78
+ // server-side from the configured pricing table when model + tokens
79
+ // are provided without an explicit cost_estimate.
80
+ tokens_in: result.usage.input_tokens,
81
+ tokens_out: result.usage.output_tokens,
82
+ model: result.model,
83
+ });
84
+ } catch (err) {
85
+ await claw.updateOutcome(action_id, { status: 'failed', error_message: err.message });
86
+ }
44
87
  ```
45
88
 
46
89
  ### Python
47
90
  ```python
48
91
  import os
49
- from dashclaw import DashClaw
92
+ from dashclaw import DashClaw, GuardBlockedError, ApprovalDeniedError
50
93
 
51
94
  claw = DashClaw(
52
95
  base_url=os.environ["DASHCLAW_BASE_URL"],
53
96
  api_key=os.environ["DASHCLAW_API_KEY"],
54
- agent_id="my-agent"
97
+ agent_id="my-agent",
98
+ agent_name="My Agent", # optional — stored in audit trail for attribution
55
99
  )
56
100
 
57
101
  # 1. Ask permission
58
- res = claw.guard({"action_type": "deploy"})
102
+ decision = claw.guard({
103
+ "action_type": "deploy",
104
+ "declared_goal": "Ship v2.4.0 to production",
105
+ "risk_score": 90,
106
+ })
107
+ if decision["decision"] == "block":
108
+ raise GuardBlockedError(decision)
59
109
 
60
110
  # 2. Log intent
61
- action = claw.create_action(action_type="deploy")
111
+ action = claw.create_action(
112
+ action_type="deploy",
113
+ declared_goal="Ship v2.4.0 to production",
114
+ risk_score=90,
115
+ )
62
116
  action_id = action["action_id"]
63
117
 
64
- # 3. Log evidence
65
- claw.record_assumption({"action_id": action_id, "assumption": "Tests passed"})
118
+ # 3. If the server flagged this for human review, wait for an operator.
119
+ if action.get("action", {}).get("status") == "pending_approval":
120
+ try:
121
+ claw.wait_for_approval(action_id)
122
+ except ApprovalDeniedError:
123
+ pass # operator denied — stop here
66
124
 
67
- # 4. Update result
125
+ # 4. Execute and record outcome
126
+ claw.record_assumption({"action_id": action_id, "assumption": "Staging tests passed"})
68
127
  claw.update_outcome(action_id, status="completed")
69
128
  ```
70
129
 
71
130
  ---
72
131
 
132
+ ## Human-in-the-Loop (HITL) Approval Flow
133
+
134
+ When a guard policy, a capability `requires_approval` flag, or any server-side
135
+ rule triggers human review, the server responds to `createAction()` with
136
+ `action.status === 'pending_approval'` and HTTP **202**. Your agent's job is to
137
+ pause on `waitForApproval()` until an operator clicks **Approve** or **Deny** from the dashboard, the
138
+ CLI, the mobile PWA, or — on instances with Telegram configured — an inline
139
+ Telegram button.
140
+
141
+ ### The rule every agent author needs to know
142
+
143
+ **`waitForApproval()` must be called with the `action_id` returned by
144
+ `createAction()`, NOT with the `action_id` returned by `guard()`.**
145
+
146
+ These are two different records in two different tables:
147
+
148
+ | Call | Returns `action_id` that refers to… | Prefix |
149
+ |---|---|---|
150
+ | `guard()` | A row in `guard_decisions` (the decision log) | `act_gd_…` |
151
+ | `createAction()` | A row in `action_records` (the thing you're actually doing) | `act_…` |
152
+
153
+ `waitForApproval()` polls `GET /api/actions/:id`, which is the
154
+ `action_records` table. Passing it a `guard_decisions` ID (`act_gd_…`) will
155
+ either return 404 or time out waiting on a row that doesn't exist. This was a
156
+ real bug in an early version of the OpenClaw plugin — don't reproduce it.
157
+
158
+ ### Correct sequence
159
+
160
+ ```javascript
161
+ // 1. Guard — advisory; may return 'allow', 'block', 'warn', or 'require_approval'
162
+ const decision = await claw.guard({
163
+ action_type: 'post_message',
164
+ declared_goal: 'Notify #ops of deploy start',
165
+ risk_score: 40,
166
+ });
167
+ if (decision.decision === 'block') {
168
+ throw new GuardBlockedError(decision);
169
+ }
170
+
171
+ // 2. Create the action. The server re-evaluates policy at this point and is
172
+ // the authoritative source for whether human review is required. Even if
173
+ // guard returned 'allow', the server may still set status='pending_approval'
174
+ // (for example, if a capability has requires_approval=true).
175
+ const { action, action_id } = await claw.createAction({
176
+ action_type: 'post_message',
177
+ declared_goal: 'Notify #ops of deploy start',
178
+ risk_score: 40,
179
+ });
180
+
181
+ // 3. Check the SERVER's verdict, not the guard decision.
182
+ if (action?.status === 'pending_approval') {
183
+ try {
184
+ // Use createAction's action_id, never the guard decision's action_id.
185
+ await claw.waitForApproval(action_id, { timeout: 600_000 });
186
+ } catch (err) {
187
+ if (err instanceof ApprovalDeniedError) {
188
+ // Operator denied — do NOT execute the action
189
+ return { denied: true, reason: err.message };
190
+ }
191
+ throw err;
192
+ }
193
+ }
194
+
195
+ // 4. Execute and record outcome
196
+ await doTheWork();
197
+ await claw.updateOutcome(action_id, { status: 'completed' });
198
+ ```
199
+
200
+ ### What `waitForApproval()` does under the hood
201
+
202
+ - Opens an SSE connection to `/api/stream` and watches for
203
+ `action.updated` events scoped to the given `actionId`.
204
+ - Falls back to HTTP polling of `GET /api/actions/:id` every 5 seconds if
205
+ SSE is unavailable.
206
+ - Resolves when `action.approved_by` is set (operator approved).
207
+ - Throws `ApprovalDeniedError` when `action.status` becomes `failed` or
208
+ `cancelled` (operator denied).
209
+ - Throws a timeout error after `options.timeout` milliseconds (default
210
+ `300_000` = 5 minutes).
211
+
212
+ ### Why guard and the server can disagree
213
+
214
+ `guard()` is fast, in-memory, advisory. The server's `createAction` handler
215
+ re-runs the exact same `evaluateGuard()` pipeline against the **persisted**
216
+ action record, plus any capability-specific `requires_approval` flags and
217
+ org-scoped rules that can only be resolved at write time. So the authoritative
218
+ answer to "does this need human review?" is always `action.status` on the
219
+ `createAction()` response — not `decision.decision` on the `guard()` response.
220
+
221
+ Short version: **trust `action.status`, not `decision.decision`, for HITL
222
+ branching.**
223
+
224
+ ---
225
+
73
226
  ## SDK Tiers
74
227
 
75
228
  DashClaw currently exposes a canonical Node SDK surface plus a legacy compatibility layer:
@@ -98,33 +251,43 @@ See:
98
251
 
99
252
  ---
100
253
 
101
- ## SDK Surface Area (v2.11.1)
254
+ ## SDK Surface Area (v2.12.0)
102
255
 
103
256
  The v2 SDK exposes the stable governance runtime plus promoted execution domains in the canonical Node client:
104
257
 
105
258
  ### Core Runtime
106
- - `guard(context)` -- Policy evaluation ("Can I do X?"). Returns `risk_score` (server-computed) and `agent_risk_score` (raw agent value)
107
- - `createAction(action)` -- Lifecycle tracking ("I am doing X")
108
- - `updateOutcome(id, outcome)` -- Result recording ("X finished with Y")
259
+ - `guard(context)` -- Policy evaluation ("Can I do X?"). Returns `risk_score` (server-computed), `agent_risk_score` (raw agent value), and `verification_status` (`verified` | `unverified` | `expired` | `failed` | `unknown_issuer`). Automatically includes `agent_name` from the constructor if not overridden in the call context. Pass `authToken` in the constructor to enable JWKS-backed cryptographic attribution (Phase 2 — see `docs/agent-identity.md`).
260
+ - `createAction(action)` -- Lifecycle tracking ("I am doing X"). Accepts optional `idempotency_key`; on collision returns the existing row with `{ idempotent_replay: true }` instead of inserting a duplicate.
261
+ - `updateOutcome(id, outcome)` -- Result recording ("X finished with Y"). `outcome` accepts `status`, `output_summary`, `side_effects`, `artifacts_created`, `error_message`, `duration_ms`, `tokens_in`, `tokens_out`, `model`, `cost_estimate`. When `tokens_in` / `tokens_out` are reported without an explicit `cost_estimate`, the server derives cost from `model` using the configured pricing table.
109
262
  - `recordAssumption(assumption)` -- Integrity tracking ("I believe Z while doing X")
110
263
  - `waitForApproval(id)` -- Real-time SSE listener for human-in-the-loop approvals (automatic polling fallback)
111
264
  - `approveAction(id, decision, reasoning?)` -- Submit approval decisions from code
112
265
  - `getPendingApprovals()` -- List actions awaiting human review
113
266
 
267
+ ### Durable Execution Finality (v2.13.3+)
268
+ Terminal outcome reporting that is one-shot, retry-safe, and immutable once non-pending. Separate from `updateOutcome`, which remains the lifecycle-PATCH path. Full spec: [`docs/architecture/durable-execution-finality.md`](../docs/architecture/durable-execution-finality.md). Detailed examples in the [Action Outcome](#action-outcome-durable-execution-finality) subsection of Execution Studio below.
269
+
270
+ - `reportActionOutcome(id, { status, summary?, error_message?, progress? })` -- Record the terminal outcome. `status` must be `completed`, `partial`, or `failed`; `lost_confirmation` is reserved for the system sweep. First call wins; subsequent POSTs return 409 with `current_status`.
271
+ - `getActionOutcome(id)` -- Read the current outcome state. Returns `status` (one of `pending` / `completed` / `partial` / `failed` / `lost_confirmation`), `outcome_at`, `summary`, `error_message`, `progress`, `elapsed_ms`. Poll this before retrying any approved action.
272
+ - `reportActionSuccess(id, summary?)` -- Convenience wrapper for `completed`.
273
+ - `reportActionFailure(id, errorMessage, summary?)` -- Convenience wrapper for `failed`. `error_message` is required.
274
+ - `reportActionPartial(id, progress, summary?)` -- Convenience wrapper for `partial`. `progress` (object) is required.
275
+ - `deriveIdempotencyKey(parts)` -- SHA-256 hex digest of intent-fields for the `idempotency_key` field on `createAction`. Order-independent. Derive from intent (agent, action_type, scope, request_id), not timestamps.
276
+
114
277
  ### Decision Integrity
115
278
  - `registerOpenLoop(actionId, type, desc)` -- Register unresolved dependencies.
116
279
  - `resolveOpenLoop(loopId, status, res)` -- Resolve pending loops.
117
280
  - `getSignals()` -- Get current risk signals across all agents.
118
281
 
119
282
  ### Swarm & Connectivity
120
- - `heartbeat(status, metadata)` -- Report agent presence and health. **As of DashClaw 2.13.0, heartbeats are implicit on `createAction()` — you only need this if you want to report presence without recording an action.**
283
+ - `heartbeat(status, metadata)` -- Report agent presence and health. **As of DashClaw platform 2.13.0 (server-side change, independent of SDK version), heartbeats are implicit on `createAction()` — you only need this if you want to report presence without recording an action.**
121
284
  - `reportConnections(connections)` -- Report active provider connections.
122
285
 
123
286
  ### Learning & Optimization
124
287
  - `getLearningVelocity()` -- Track agent improvement rate.
125
288
  - `getLearningCurves()` -- Measure efficiency gains per action type.
126
289
  - `getLessons({ actionType, limit })` -- Fetch consolidated lessons from scored outcomes.
127
- - `renderPrompt(context)` -- Fetch rendered prompt templates from DashClaw.
290
+ - `renderPrompt({ template_id, version_id, variables, record })` -- Fetch a rendered prompt template from DashClaw. `template_id` is required; `version_id` defaults to the active version; `variables` is an object of mustache values; `record: true` persists the render as a governance event.
128
291
 
129
292
  ### Learning Loop
130
293
 
@@ -367,30 +530,55 @@ Messages sent through the context are automatically correlated with the action i
367
530
 
368
531
  DashClaw uses standard HTTP status codes and custom error classes:
369
532
 
370
- - `GuardBlockedError` -- Thrown when `claw.guard()` returns a `block` decision.
371
- - `ApprovalDeniedError` -- Thrown when an operator denies an action during `waitForApproval()`.
533
+ - `GuardBlockedError` -- Thrown by **any** SDK call when the server returns HTTP 403 with `{ decision: { decision: 'block' } }`. Note that a successful `guard()` call returning `{ decision: 'block' }` in a **200** body does **not** throw — it just returns the decision object. Always check `decision.decision === 'block'` after `guard()` and throw `new GuardBlockedError(decision)` yourself if you want to abort early, as shown in the governance loop above.
534
+ - `ApprovalDeniedError` -- Thrown by `waitForApproval()` when an operator denies the action (server sets `status` to `failed` or `cancelled`).
372
535
 
373
536
  ---
374
537
 
375
- ## CLI Approval Channel
538
+ ## CLI (`@dashclaw/cli`)
376
539
 
377
- Install the DashClaw CLI to approve agent actions from the terminal:
540
+ Install the DashClaw CLI for terminal approvals and self-host diagnostics:
378
541
 
379
542
  ```bash
380
543
  npm install -g @dashclaw/cli
381
544
  ```
382
545
 
546
+ **Approvals:**
547
+
383
548
  ```bash
384
549
  dashclaw approvals # interactive approval inbox
385
550
  dashclaw approve <actionId> # approve a specific action
386
551
  dashclaw deny <actionId> # deny a specific action
387
552
  ```
388
553
 
389
- When an agent calls `waitForApproval()`, it prints the action ID and replay link to stdout. Approve from any terminal or the dashboard, and the agent unblocks instantly.
554
+ **Diagnostics:**
390
555
 
391
- ## MCP Server (Zero-Code Integration)
556
+ ```bash
557
+ dashclaw doctor # diagnose + auto-fix safe issues (database, config, auth, deployment, SDK, governance, drift)
558
+ dashclaw doctor --json # CI/machine-readable
559
+ dashclaw doctor --no-fix # diagnose only
560
+ dashclaw doctor --category database,config
561
+ ```
392
562
 
393
- If your agent supports MCP (Claude Code, Claude Desktop, Managed Agents), you can skip the SDK entirely:
563
+ Config resolution order: env vars (`DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, optional `DASHCLAW_AGENT_ID`) `~/.dashclaw/config.json` (`600`, persisted after interactive prompt) first-run prompt. `dashclaw logout` removes saved config.
564
+
565
+ When an agent calls `waitForApproval()`, it prints the action ID and replay link to stdout. Approve from any terminal, the browser dashboard, the `/approve` mobile PWA, or — if the instance has Telegram configured — via an inline Telegram Approve/Reject button pushed to the admin chat — decisions sync over Redis SSE within ~1 second.
566
+
567
+ ## Self-Host Doctor (`npm run doctor`)
568
+
569
+ For operators running a self-hosted DashClaw instance, Doctor is also available as a local script with filesystem-level fix powers:
570
+
571
+ ```bash
572
+ npm run doctor # can write .env, run migrations, seed default policy
573
+ ```
574
+
575
+ Doctor check modules are emitted from the livingcode shape (`app/lib/doctor/generated/checks-from-shape.mjs`) and run against `GET /api/doctor` / `POST /api/doctor/fix`. The `.env` is always backed up before any write. Includes a drift guard that flags when shape-derived artifacts are out of sync — fix with `npm run livingcode:refresh`.
576
+
577
+ ## MCP Server (`@dashclaw/mcp-server`)
578
+
579
+ If your agent supports Model Context Protocol (Claude Code, Claude Desktop, Managed Agents, MCP Inspector), skip the SDK entirely and let the MCP server wire governance into your agent loop.
580
+
581
+ **stdio transport** (recommended for Claude Desktop / Claude Code):
394
582
 
395
583
  ```json
396
584
  {
@@ -404,21 +592,59 @@ If your agent supports MCP (Claude Code, Claude Desktop, Managed Agents), you ca
404
592
  }
405
593
  ```
406
594
 
407
- The MCP server exposes the same governance surface as the SDK (guard, record, invoke, wait for approval) plus discovery (capabilities, policies) and session lifecycle.
595
+ **Streamable HTTP transport** (same surface, served by your DashClaw instance at `POST /api/mcp`).
596
+
597
+ **23 tools** in 7 groups:
598
+
599
+ - **Core governance (8):** `dashclaw_guard`, `dashclaw_record`, `dashclaw_invoke`, `dashclaw_capabilities_list`, `dashclaw_policies_list`, `dashclaw_wait_for_approval`, `dashclaw_session_start`, `dashclaw_session_end`.
600
+ - **Optimal files (2):** `dashclaw_optimal_files_preview`, `dashclaw_optimal_files_manifest` — Code Sessions optimizer output (root CLAUDE.md, path-scoped rules, hooks, skill packs).
601
+ - **Session continuity (3):** `dashclaw_handoff_create`, `dashclaw_handoff_latest`, `dashclaw_handoff_consume` — agent-runtime handoff bundle for the next session.
602
+ - **Credential hygiene (3):** `dashclaw_secret_list`, `dashclaw_secret_due`, `dashclaw_secret_mark_rotated` — check rotation due-dates before acting on tracked credentials.
603
+ - **Skill safety (1):** `dashclaw_skill_scan` — static safety scan of untrusted skill files; results cached by content hash.
604
+ - **Open loops (3):** `dashclaw_loop_add`, `dashclaw_loop_list`, `dashclaw_loop_close` — action-scoped commitments (the "I will X later" tracker).
605
+ - **Learning + retrospection (3):** `dashclaw_learning_log`, `dashclaw_learning_query`, `dashclaw_decisions_recent` — log + query non-obvious decisions; recent governed-action ledger.
606
+
607
+ **4 resources:** `dashclaw://policies`, `dashclaw://capabilities`, `dashclaw://agent/{agent_id}/history`, `dashclaw://status`.
608
+
609
+ ### Agent runtime endpoints (server-side, no SDK wrapper)
610
+
611
+ DashClaw 2.17 (platform) added three route families that are **agent-runtime infrastructure, not developer SDK methods**. They are called by the MCP server (the tools listed above), by Hermes Agent hooks, and by other governance plumbing — never directly from agent code. By design, they are not exposed on `claw.*`:
612
+
613
+ | Family | Endpoints | Where called from |
614
+ |---|---|---|
615
+ | Session handoffs | `POST/GET /api/handoffs`, `GET /api/handoffs/latest`, `GET /api/handoffs/{id}`, `POST /api/handoffs/{id}/consume` | Hermes `on_session_end` / `on_session_start` / `pre_llm_call` hooks; MCP `dashclaw_handoff_*` tools |
616
+ | Operator-tracked secrets | `GET/POST /api/secrets`, `PATCH/DELETE /api/secrets/{id}`, `GET /api/secrets/rotation-due` | MCP `dashclaw_secret_*` tools; operator UI |
617
+ | Skill safety scan | `POST /api/skills/scan`, `GET /api/skills/scans/{id}` | MCP `dashclaw_skill_scan` tool; agents before loading an untrusted skill |
618
+
619
+ If you're building a custom integration that needs these without MCP, call them as plain HTTP — see `docs/api-inventory.md` and the OpenAPI spec at `docs/openapi/critical-stable.openapi.json`.
620
+
621
+ ## OpenClaw Plugin (`@dashclaw/openclaw-plugin`)
622
+
623
+ For teams using the OpenClaw agent framework, the governance plugin intercepts `PreToolUse` / `PostToolUse` lifecycle hooks and runs guard → record → wait-for-approval automatically. Tool classification vocabulary aligns with DashClaw's guard action types. Install via the openclaw CLI which picks up the bundled `HOOK.md` pack.
624
+
625
+ ## Governance Skill for Claude (Anthropic)
626
+
627
+ For Anthropic Managed Agents or Claude Code sessions, the `@dashclaw/governance` skill teaches the agent how to use the MCP tools correctly — risk thresholds, decision handling, recording rules, session lifecycle. Pairs with `@dashclaw/mcp-server`. Download at `https://<your-instance>/downloads/dashclaw-governance.zip` or see `public/downloads/dashclaw-governance/`.
408
628
 
409
629
  ---
410
630
 
411
631
  ## Claude Code Hooks
412
632
 
413
- Govern Claude Code tool calls without any SDK instrumentation. Copy two files from the `hooks/` directory in the repo into your `.claude/hooks/` folder:
633
+ Govern Claude Code tool calls without any SDK instrumentation. One command from anywhere DashClaw is cloned:
414
634
 
415
635
  ```bash
416
- # In your project directory
417
- cp path/to/DashClaw/hooks/dashclaw_pretool.py .claude/hooks/
418
- cp path/to/DashClaw/hooks/dashclaw_posttool.py .claude/hooks/
636
+ # From a DashClaw checkout
637
+ npm run hooks:install
638
+
639
+ # From any other project, pointing at a DashClaw checkout
640
+ node /path/to/DashClaw/scripts/install-hooks.mjs --target=.
419
641
  ```
420
642
 
421
- Then merge the hooks block from `hooks/settings.json` into your `.claude/settings.json`. Set `DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, and optionally `DASHCLAW_HOOK_MODE=enforce`.
643
+ This installs three hooks (`dashclaw_pretool.py`, `dashclaw_posttool.py`, `dashclaw_stop.py`) plus the bundled `dashclaw_agent_intel/` tool-classification module into `.claude/hooks/`, then merges the `PreToolUse`, `PostToolUse`, and `Stop` blocks into `.claude/settings.json`. Idempotent: re-run after `git pull` to upgrade.
644
+
645
+ The Stop hook captures per-turn LLM token usage from the session transcript and PATCHes it onto the action records the pretool opened during the turn, so cost analytics light up without per-agent instrumentation.
646
+
647
+ Set `DASHCLAW_BASE_URL`, `DASHCLAW_API_KEY`, and optionally `DASHCLAW_HOOK_MODE=enforce`. Full guide and per-hook details in [`hooks/README.md`](../hooks/README.md).
422
648
 
423
649
  ---
424
650
 
@@ -463,6 +689,69 @@ const { rootActionId, nodes, edges } = await claw.getActionGraph(actionId);
463
689
  // edges: parent_child | related | assumption_of | loop_from
464
690
  ```
465
691
 
692
+ ### Action Outcome (durable execution finality)
693
+
694
+ Every approved action carries a terminal outcome: `pending`, `completed`, `partial`, `failed`, or `lost_confirmation`. Agents call `reportActionOutcome` to record finality, and `getActionOutcome` before retry to avoid re-executing already-completed work. Outcomes are one-shot — once non-pending, they cannot be rewritten.
695
+
696
+ ```javascript
697
+ // Report success
698
+ await claw.reportActionOutcome(actionId, {
699
+ status: 'completed',
700
+ summary: 'Deployed dashclaw 2.13.4 to production'
701
+ });
702
+
703
+ // Convenience wrappers
704
+ await claw.reportActionSuccess(actionId, 'Deployed dashclaw 2.13.4');
705
+ await claw.reportActionFailure(actionId, 'Downstream API returned 503');
706
+ await claw.reportActionPartial(actionId, { step: 2, of: 5 });
707
+
708
+ // Report failure (error_message required)
709
+ await claw.reportActionOutcome(actionId, {
710
+ status: 'failed',
711
+ error_message: 'Downstream API returned 503'
712
+ });
713
+
714
+ // Report partial progress (progress object required)
715
+ await claw.reportActionOutcome(actionId, {
716
+ status: 'partial',
717
+ progress: { step: 2, of: 5 }
718
+ });
719
+
720
+ // Retry-safe poll before re-trying any approved action
721
+ const outcome = await claw.getActionOutcome(actionId);
722
+ switch (outcome.status) {
723
+ case 'pending': /* still in flight, WAIT */ break;
724
+ case 'completed': /* already executed, SKIP */ break;
725
+ case 'failed': /* safe to RETRY */ break;
726
+ case 'lost_confirmation': /* sweep gave up, safe to RETRY */ break;
727
+ case 'partial': /* clean up then retry */ break;
728
+ }
729
+ ```
730
+
731
+ HTTP surface (when the SDK isn't available):
732
+
733
+ ```bash
734
+ curl -X POST "$BASE_URL/api/actions/$ACTION_ID/outcome" \
735
+ -H "x-api-key: $API_KEY" -H "Content-Type: application/json" \
736
+ -d '{"status":"completed","summary":"shipped"}'
737
+ # 200 → { outcome: { ... } }
738
+ # 409 → { error: "outcome already set", current_status: "completed" }
739
+ ```
740
+
741
+ Pending outcomes that never get reported get swept to `lost_confirmation` by `/api/cron/outcome-sweep`. Vercel runs it daily on Hobby; the `lost_confirmation` event fires a `signal.detected` webhook so subscribers can see and recover. Per-org timeout (minutes) is configurable via the `DASHCLAW_OUTCOME_TIMEOUT_MINUTES` setting (default 15).
742
+
743
+ **Idempotency keys.** Network errors on the *create* side of the create-then-execute flow used to leave duplicate `action_records` behind. Pass `idempotency_key` on `POST /api/actions` to make creates retry-safe — a second POST with the same `(org_id, idempotency_key)` returns the original row with `{ idempotent_replay: true }` instead of inserting a duplicate. Derive keys from intent, not timestamps:
744
+
745
+ ```javascript
746
+ const idempotency_key = claw.deriveIdempotencyKey({
747
+ agent_id: 'deploy-bot',
748
+ action_type: 'deploy',
749
+ scope: 'prod-us-east',
750
+ request_id: requestId, // your own attempt discriminator
751
+ });
752
+ await claw.createAction({ /* ... */, idempotency_key });
753
+ ```
754
+
466
755
  ### Workflow Templates
467
756
 
468
757
  ```javascript
@@ -704,5 +993,35 @@ Health responses now include certification and recency fields such as:
704
993
 
705
994
  ---
706
995
 
996
+ ## Hosted provisioning (operator surface — not an SDK method)
997
+
998
+ When `DASHCLAW_HOSTED=true` the deployment exposes `/api/hosted/*` routes for one-click trial provisioning. These are operator-facing routes, not SDK methods — they produce the API key the SDK consumes.
999
+
1000
+ ```bash
1001
+ # Mint a trial workspace (no auth required; Turnstile-gated in production)
1002
+ curl -X POST https://hosted.example.com/api/hosted/workspaces \
1003
+ -H "content-type: application/json" \
1004
+ -d '{"turnstile_token": "..."}'
1005
+ # → { "workspace_id": "org_...", "api_key": "oc_live_...", "endpoint": "...",
1006
+ # "expires_at": "...", "trial_action_cap": 10000, "key_prefix": "oc_live_",
1007
+ # "next_steps_url": "https://hosted.example.com/connect?hosted=org_..." }
1008
+
1009
+ # Admin: inspect a trial workspace (x-api-key with admin role)
1010
+ curl https://hosted.example.com/api/hosted/workspaces/org_abc \
1011
+ -H "x-api-key: <admin_key>"
1012
+
1013
+ # Admin: delete a trial workspace
1014
+ curl -X DELETE https://hosted.example.com/api/hosted/workspaces/org_abc \
1015
+ -H "x-api-key: <admin_key>"
1016
+
1017
+ # Cron: sweep expired trials (admin role OR X-Cleanup-Secret)
1018
+ curl -X POST https://hosted.example.com/api/hosted/cleanup \
1019
+ -H "X-Cleanup-Secret: $HOSTED_CLEANUP_SECRET"
1020
+ ```
1021
+
1022
+ These routes return 404 when `DASHCLAW_HOSTED` is unset — self-host deploys are unaffected.
1023
+
1024
+ ---
1025
+
707
1026
  ## License
708
1027
  MIT
package/dashclaw.js CHANGED
@@ -1,8 +1,10 @@
1
1
  /**
2
- * DashClaw SDK v2.11.0 (Stable Runtime API)
2
+ * DashClaw SDK v2.12.0 (Stable Runtime API)
3
3
  * Focused governance runtime client for AI agents.
4
4
  */
5
5
 
6
+ import { createHash } from 'crypto';
7
+
6
8
  class ApprovalDeniedError extends Error {
7
9
  constructor(message, decision) {
8
10
  super(message);
@@ -25,8 +27,13 @@ class DashClaw {
25
27
  * @param {string} options.baseUrl - DashClaw base URL
26
28
  * @param {string} options.apiKey - API key for authentication
27
29
  * @param {string} options.agentId - Unique identifier for this agent
30
+ * @param {string} [options.agentName] - Human-readable label for this agent (stored in audit trail)
31
+ * @param {string} [options.authToken] - Phase 2: JWT bearer token from your OIDC provider.
32
+ * When set, DashClaw server verifies the token via JWKS and returns `verification_status`
33
+ * in every guard response. The JWT `sub` claim overrides agentId in the audit record
34
+ * when verification succeeds — cryptographic proof beats self-assertion.
28
35
  */
29
- constructor({ baseUrl, apiKey, agentId }) {
36
+ constructor({ baseUrl, apiKey, agentId, agentName, authToken }) {
30
37
  if (!baseUrl) throw new Error('baseUrl is required');
31
38
  if (!apiKey) throw new Error('apiKey is required');
32
39
  if (!agentId) throw new Error('agentId is required');
@@ -34,6 +41,8 @@ class DashClaw {
34
41
  this.baseUrl = baseUrl.replace(/\/$/, '');
35
42
  this.apiKey = apiKey;
36
43
  this.agentId = agentId;
44
+ this.agentName = agentName || null;
45
+ this.authToken = authToken || null;
37
46
 
38
47
  this.execution = {
39
48
  capabilities: {
@@ -59,7 +68,8 @@ class DashClaw {
59
68
 
60
69
  const headers = {
61
70
  'Content-Type': 'application/json',
62
- 'x-api-key': this.apiKey
71
+ 'x-api-key': this.apiKey,
72
+ ...(this.authToken ? { 'Authorization': `Bearer ${this.authToken}` } : {}),
63
73
  };
64
74
 
65
75
  const res = await fetch(url, {
@@ -90,12 +100,30 @@ class DashClaw {
90
100
  /**
91
101
  * POST /api/guard — "Can I do X?"
92
102
  * @param {Object} context
93
- * @returns {Promise<{decision: 'allow'|'block'|'require_approval', action_id: string, reason: string, signals: string[]}>}
103
+ * @returns {Promise<{
104
+ * decision: 'allow'|'block'|'require_approval'|'warn',
105
+ * action_id: string,
106
+ * reason: string,
107
+ * signals: string[],
108
+ * verification_status: 'verified'|'unverified'|'expired'|'failed'|'unknown_issuer',
109
+ * agent_id: string|null,
110
+ * agent_name: string|null,
111
+ * }>}
112
+ *
113
+ * `verification_status` reflects whether the JWT bearer token (if provided
114
+ * via the `authToken` constructor option) was cryptographically verified:
115
+ * verified — signature valid; audit entry anchored to JWT sub
116
+ * unverified — no token, or issuer temporarily unreachable (fail-soft)
117
+ * expired — token expired; consider refreshing before next call
118
+ * failed — bad signature, malformed token, or audience mismatch
119
+ * unknown_issuer — issuer not in DASHCLAW_ALLOWED_ISSUER (server config)
94
120
  */
95
121
  async guard(context) {
96
122
  return this._request('/api/guard', 'POST', {
97
123
  ...context,
98
124
  agent_id: context.agent_id || this.agentId,
125
+ // Include agent_name for audit attribution if not already provided by caller
126
+ ...(context.agent_name == null && this.agentName ? { agent_name: this.agentName } : {}),
99
127
  });
100
128
  }
101
129
 
@@ -749,6 +777,95 @@ class DashClaw {
749
777
  return this._request(`/api/actions/${actionId}/graph`, 'GET');
750
778
  }
751
779
 
780
+ // ---------------------------------------------------------------------------
781
+ // Durable execution finality — terminal outcome reporting
782
+ // See docs/architecture/durable-execution-finality.md
783
+ // ---------------------------------------------------------------------------
784
+
785
+ /**
786
+ * POST /api/actions/:id/outcome — Record the terminal outcome of an action.
787
+ *
788
+ * @param {string} actionId
789
+ * @param {Object} payload
790
+ * @param {'completed'|'partial'|'failed'} payload.status
791
+ * @param {string} [payload.summary]
792
+ * @param {string} [payload.error_message] — required when status=failed
793
+ * @param {Object} [payload.progress] — required when status=partial
794
+ * @returns {Promise<{ outcome: object, security: object }>}
795
+ * @throws on 409 when the outcome is already terminal — inspect the response
796
+ * body for `current_status` before deciding what to do next.
797
+ */
798
+ async reportActionOutcome(actionId, payload) {
799
+ return this._request(`/api/actions/${actionId}/outcome`, 'POST', payload);
800
+ }
801
+
802
+ /**
803
+ * GET /api/actions/:id/outcome — Read the current outcome state of an action.
804
+ *
805
+ * Returns `{ action_id, status, outcome_at, summary, error_message, progress, elapsed_ms }`.
806
+ * Status is one of: pending, completed, partial, failed, lost_confirmation.
807
+ * Use this BEFORE retrying any approved action to avoid double-execution.
808
+ */
809
+ async getActionOutcome(actionId) {
810
+ return this._request(`/api/actions/${actionId}/outcome`, 'GET');
811
+ }
812
+
813
+ /**
814
+ * Convenience: report a successful terminal outcome.
815
+ */
816
+ async reportActionSuccess(actionId, summary) {
817
+ return this.reportActionOutcome(actionId, { status: 'completed', summary });
818
+ }
819
+
820
+ /**
821
+ * Convenience: report a failed terminal outcome. `error_message` is required.
822
+ */
823
+ async reportActionFailure(actionId, errorMessage, summary) {
824
+ return this.reportActionOutcome(actionId, {
825
+ status: 'failed',
826
+ error_message: errorMessage,
827
+ summary,
828
+ });
829
+ }
830
+
831
+ /**
832
+ * Convenience: report a partial outcome with progress state. Progress is
833
+ * required (an object describing where the agent stopped).
834
+ */
835
+ async reportActionPartial(actionId, progress, summary) {
836
+ return this.reportActionOutcome(actionId, {
837
+ status: 'partial',
838
+ progress,
839
+ summary,
840
+ });
841
+ }
842
+
843
+ /**
844
+ * Derive a stable idempotency key from the *intent* of an action so a
845
+ * retried `createAction` call returns the original row instead of creating
846
+ * a duplicate. Pass the same `parts` for the same logical action; vary at
847
+ * least one part for distinct actions.
848
+ *
849
+ * The hash function uses SHA-256 hex via Node's built-in crypto. In
850
+ * browser-only environments lacking `require`, callers should compute the
851
+ * key themselves and pass it directly to `createAction({ idempotency_key }).`
852
+ *
853
+ * @param {Object} parts — at minimum agent_id + action_type + a request
854
+ * discriminator that uniquely identifies this attempt. Reusing the key
855
+ * for a logically distinct action is the agent's bug, not DashClaw's.
856
+ * @returns {string} SHA-256 hex digest
857
+ */
858
+ deriveIdempotencyKey(parts) {
859
+ if (!parts || typeof parts !== 'object') {
860
+ throw new TypeError('deriveIdempotencyKey: parts must be an object');
861
+ }
862
+ const ordered = Object.keys(parts)
863
+ .sort()
864
+ .map((k) => `${k}=${parts[k] ?? ''}`)
865
+ .join('|');
866
+ return createHash('sha256').update(ordered).digest('hex');
867
+ }
868
+
752
869
  // ---------------------------------------------------------------------------
753
870
  // Execution Studio — Workflow Templates
754
871
  // ---------------------------------------------------------------------------
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dashclaw",
3
- "version": "2.11.1",
3
+ "version": "2.13.0",
4
4
  "description": "Minimal governance runtime for AI agents. Intercept, govern, and verify agent actions.",
5
5
  "type": "module",
6
6
  "publishConfig": {