cool-workflow 0.1.80 → 0.1.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +42 -2
- package/apps/architecture-review/app.json +1 -1
- package/apps/architecture-review-fast/app.json +1 -1
- package/apps/end-to-end-golden-path/app.json +1 -1
- package/apps/pr-review-fix-ci/app.json +1 -1
- package/apps/release-cut/app.json +1 -1
- package/apps/research-synthesis/app.json +1 -1
- package/dist/agent-config.js +21 -7
- package/dist/candidate-scoring.js +42 -22
- package/dist/capability-core.js +94 -17
- package/dist/capability-registry.js +138 -171
- package/dist/cli.js +90 -100
- package/dist/collaboration.js +5 -6
- package/dist/commit.js +20 -6
- package/dist/compare.js +18 -0
- package/dist/coordinator/classify.js +45 -0
- package/dist/coordinator/paths.js +42 -0
- package/dist/coordinator/util.js +129 -0
- package/dist/coordinator.js +127 -300
- package/dist/dispatch.js +35 -0
- package/dist/drive.js +7 -7
- package/dist/error-feedback.js +8 -4
- package/dist/evidence-reasoning.js +1 -1
- package/dist/execution-backend/agent.js +331 -0
- package/dist/execution-backend/probes.js +96 -0
- package/dist/execution-backend/util.js +47 -0
- package/dist/execution-backend.js +67 -420
- package/dist/mcp-server.js +34 -173
- package/dist/multi-agent/graph.js +84 -0
- package/dist/multi-agent/helpers.js +145 -0
- package/dist/multi-agent/paths.js +22 -0
- package/dist/multi-agent-eval/format.js +194 -0
- package/dist/multi-agent-eval/normalize.js +51 -0
- package/dist/multi-agent-eval.js +39 -244
- package/dist/multi-agent-host.js +0 -19
- package/dist/multi-agent.js +125 -314
- package/dist/node-snapshot.js +3 -3
- package/dist/observability/format.js +61 -0
- package/dist/observability/intake.js +98 -0
- package/dist/observability.js +14 -160
- package/dist/operator-ux/format.js +364 -0
- package/dist/operator-ux.js +22 -363
- package/dist/orchestrator/report.js +8 -0
- package/dist/orchestrator.js +25 -8
- package/dist/reclamation.js +26 -21
- package/dist/run-export.js +138 -14
- package/dist/run-registry/derive.js +172 -0
- package/dist/run-registry/format.js +124 -0
- package/dist/run-registry/gc.js +251 -0
- package/dist/run-registry/policy.js +16 -0
- package/dist/run-registry/queue.js +116 -0
- package/dist/run-registry.js +78 -593
- package/dist/run-state-schema.js +1 -0
- package/dist/sandbox-profile.js +43 -2
- package/dist/state-explosion/format.js +159 -0
- package/dist/state-explosion/helpers.js +82 -0
- package/dist/state-explosion.js +65 -283
- package/dist/state-node.js +19 -4
- package/dist/telemetry-attestation.js +55 -0
- package/dist/telemetry-demo.js +15 -3
- package/dist/telemetry-ledger.js +60 -15
- package/dist/topology.js +25 -8
- package/dist/triggers.js +33 -14
- package/dist/trust-audit.js +145 -33
- package/dist/version.js +1 -1
- package/dist/worker-isolation/helpers.js +51 -0
- package/dist/worker-isolation/paths.js +46 -0
- package/dist/worker-isolation.js +39 -115
- package/docs/agent-delegation-drive.7.md +13 -0
- package/docs/cli-mcp-parity.7.md +4 -0
- package/docs/contract-migration-tooling.7.md +2 -0
- package/docs/control-plane-scheduling.7.md +2 -0
- package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
- package/docs/durable-state-and-locking.7.md +4 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +2 -0
- package/docs/execution-backends.7.md +2 -0
- package/docs/index.md +1 -0
- package/docs/launch/launch-kit.md +46 -23
- package/docs/launch/pre-launch-checklist.md +14 -14
- package/docs/multi-agent-cli-mcp-surface.7.md +4 -0
- package/docs/multi-agent-eval-replay-harness.7.md +2 -0
- package/docs/multi-agent-operator-ux.7.md +2 -0
- package/docs/multi-agent-trust-policy-audit.7.md +27 -0
- package/docs/node-snapshot-diff-replay.7.md +2 -0
- package/docs/observability-cost-accounting.7.md +2 -0
- package/docs/project-index.md +18 -5
- package/docs/real-execution-backends.7.md +2 -0
- package/docs/release-and-migration.7.md +4 -0
- package/docs/release-tooling.7.md +2 -0
- package/docs/run-registry-control-plane.7.md +54 -8
- package/docs/run-retention-reclamation.7.md +4 -0
- package/docs/state-explosion-management.7.md +2 -0
- package/docs/team-collaboration.7.md +2 -0
- package/docs/trust-model.md +267 -0
- package/docs/vendor-manifest-loadability.7.md +43 -0
- package/docs/web-desktop-workbench.7.md +2 -0
- package/manifest/plugin.manifest.json +1 -1
- package/package.json +4 -2
- package/scripts/agents/builtin-templates.json +7 -0
- package/scripts/bump-version.js +5 -11
- package/scripts/canonical-apps-list.js +64 -0
- package/scripts/canonical-apps.js +19 -4
- package/scripts/dogfood-release.js +1 -1
- package/scripts/golden-path.js +4 -4
- package/scripts/parity-check.js +5 -0
- package/scripts/release-check.js +5 -1
- package/scripts/version-sync-check.js +5 -8
- package/dist/capability-dispatcher.js +0 -86
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cool-workflow",
|
|
3
3
|
"description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.81",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "COOLWHITE LLC"
|
|
7
7
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cool-workflow",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.81",
|
|
4
4
|
"description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "COOLWHITE LLC"
|
package/README.md
CHANGED
|
@@ -263,6 +263,40 @@ write paths, command execution, network access, and environment exposure. CW
|
|
|
263
263
|
stores and validates the policy, while the agent host enforces OS/process
|
|
264
264
|
runtime controls. See [docs/sandbox-profiles.7.md](docs/sandbox-profiles.7.md).
|
|
265
265
|
|
|
266
|
+
## Quickstart
|
|
267
|
+
|
|
268
|
+
**30-second proof, no install** — see that a recorded telemetry verdict can't be forged:
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
npx cool-workflow demo tamper
|
|
272
|
+
# builds a signed ed25519 ledger, forges it 2 ways, both caught offline
|
|
273
|
+
# -> VERDICT: tamper-evidence holds ✓
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Try a real run** — no clone needed; drive an architecture review with your own agent:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
npx cool-workflow quickstart architecture-review --repo /path/to/repo \
|
|
280
|
+
--question "Is this architecture sound?" --agent-command builtin:claude
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
CW DELEGATES worker execution to your own agent. With no `--agent-command` (or
|
|
284
|
+
`CW_AGENT_COMMAND`) the drive fails closed (status `blocked`) — it never fabricates a
|
|
285
|
+
result. `--agent-command builtin:claude` resolves to a bundled read-only `claude -p`
|
|
286
|
+
wrapper (needs `claude` on your PATH).
|
|
287
|
+
|
|
288
|
+
**Re-prove a finished run, offline** (`cw` is the installed bin; or `npx cool-workflow <cmd>`):
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
cw telemetry verify <run-id> # re-checks the hash-chained ledger
|
|
292
|
+
cw telemetry verify <run-id> --pubkey pub.pem # also re-runs ed25519 signature checks
|
|
293
|
+
cw audit verify <run-id> # re-proves the trust-audit hash chain
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
More: `cw quickstart <app> --preview` (read-only dry run), `cw run resume <run-id> --drive`
|
|
297
|
+
(continue an interrupted run), `cw run inspect-archive <archive>` (integrity-check a
|
|
298
|
+
portable run archive without importing it).
|
|
299
|
+
|
|
266
300
|
## Structure
|
|
267
301
|
|
|
268
302
|
```text
|
|
@@ -300,6 +334,10 @@ cool-workflow
|
|
|
300
334
|
|
|
301
335
|
## Commands
|
|
302
336
|
|
|
337
|
+
Installed via npm, the bin is `cw` (alias `cool-workflow`): e.g. `cw list`,
|
|
338
|
+
`cw quickstart …`. From a cloned source checkout, before `npm run build`, use the
|
|
339
|
+
equivalent `node scripts/cw.js <cmd>` form shown in the examples below.
|
|
340
|
+
|
|
303
341
|
List bundled workflows:
|
|
304
342
|
|
|
305
343
|
```bash
|
|
@@ -619,7 +657,7 @@ Replaces the linear migration chain with a BFS graph path resolver (`findMigrati
|
|
|
619
657
|
|
|
620
658
|
## Vendor-Adapter Registry (v0.1.47)
|
|
621
659
|
|
|
622
|
-
Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data.
|
|
660
|
+
Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data. Cross-vendor is proven by boot, not just by generation: `npm run manifest:load-check` (`node test/vendor-manifest-load-smoke.js`) loads every generated manifest (claude, codex, agents, gemini, opencode) and asserts each exposes the full tool surface (184 tools).
|
|
623
661
|
|
|
624
662
|
## P2 Fixes (v0.1.48)
|
|
625
663
|
|
|
@@ -651,7 +689,7 @@ The orchestration vision landed in one release, all reviewer-gated:
|
|
|
651
689
|
|
|
652
690
|
## Tamper-evidence demo (v0.1.79)
|
|
653
691
|
|
|
654
|
-
`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` is the operator-facing
|
|
692
|
+
`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` (`cw_telemetry_verify` on MCP) is the operator-facing re-proof: by default it recomputes the hash chain on disk so any later edit to a recorded verdict or usage digest is caught; add `--pubkey <pem-or-path>` to re-run each `attested` hop's ed25519 signature check against the stored raw usage too. What this does and does **not** prove — including the single-keyholder ceiling — is documented honestly in [Trust Model & Limitations](docs/trust-model.md); read it before relying on a green verdict.
|
|
655
693
|
|
|
656
694
|
## Opt-in live agent output during a drive (on main, ships next)
|
|
657
695
|
|
|
@@ -662,3 +700,5 @@ v0.1.79
|
|
|
662
700
|
## Fast Architecture Review (v0.1.80)
|
|
663
701
|
|
|
664
702
|
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
703
|
+
|
|
704
|
+
_This documentation tracks Cool Workflow v0.1.81. See [CHANGELOG](../../CHANGELOG.md) for the release notes._
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "architecture-review",
|
|
4
4
|
"title": "Architecture Review",
|
|
5
5
|
"summary": "Map a repository architecture, assess risks, verify important findings, and synthesize an evidence-backed verdict.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "architecture-review-fast",
|
|
4
4
|
"title": "Architecture Review Fast",
|
|
5
5
|
"summary": "Run a shorter architecture review with parallel map and assess phases for faster first results.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "pr-review-fix-ci",
|
|
4
4
|
"title": "PR Review Fix CI",
|
|
5
5
|
"summary": "Review a pull request or branch, inspect CI failures, diagnose actionable issues, optionally patch, verify, and summarize with evidence.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "release-cut",
|
|
4
4
|
"title": "Release Cut",
|
|
5
5
|
"summary": "Prepare a release with checklist discipline: version checks, changelog, tests, packaging, release notes, and final verification.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "research-synthesis",
|
|
4
4
|
"title": "Research Synthesis",
|
|
5
5
|
"summary": "Split a research question into claims, investigate sources, cross-check evidence, verify claims, and synthesize a concise answer.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
package/dist/agent-config.js
CHANGED
|
@@ -139,18 +139,30 @@ function agentConfigFromArgs(args) {
|
|
|
139
139
|
// npx/global install, where $(pwd)-relative paths don't exist) can configure a
|
|
140
140
|
// WORKING agent without knowing where the package landed on disk:
|
|
141
141
|
// --agent-command builtin:claude (or CW_AGENT_COMMAND=builtin:claude)
|
|
142
|
-
// resolves to the packaged
|
|
143
|
-
//
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
142
|
+
// resolves to the packaged wrapper invocation. Still pure config — the template
|
|
143
|
+
// is an out-of-process delegation script; CW never calls a model API.
|
|
144
|
+
//
|
|
145
|
+
// The builtin set is DATA, not a kernel TS literal (FreeBSD-audit L15): it lives
|
|
146
|
+
// in scripts/agents/builtin-templates.json (vendor name -> wrapper script name).
|
|
147
|
+
// Adding a vendor is a content/distribution step (drop a wrapper + a JSON line),
|
|
148
|
+
// not a kernel edit — keeping CW vendor-agnostic at the source level.
|
|
149
|
+
function builtinAgentTemplates() {
|
|
150
|
+
const agentsDir = node_path_1.default.join(__dirname, "..", "scripts", "agents");
|
|
151
|
+
const manifest = JSON.parse(node_fs_1.default.readFileSync(node_path_1.default.join(agentsDir, "builtin-templates.json"), "utf8"));
|
|
152
|
+
const out = {};
|
|
153
|
+
for (const [name, script] of Object.entries(manifest.templates || {})) {
|
|
154
|
+
out[name] = `node ${node_path_1.default.join(agentsDir, script)} {{input}} {{result}}`;
|
|
155
|
+
}
|
|
156
|
+
return out;
|
|
157
|
+
}
|
|
147
158
|
function expandBuiltinAgentCommand(command) {
|
|
148
159
|
if (!command || !command.startsWith("builtin:"))
|
|
149
160
|
return command;
|
|
150
161
|
const name = command.slice("builtin:".length).trim();
|
|
151
|
-
const
|
|
162
|
+
const templates = builtinAgentTemplates();
|
|
163
|
+
const template = templates[name];
|
|
152
164
|
if (!template) {
|
|
153
|
-
throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(
|
|
165
|
+
throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(templates).join(", ")}`);
|
|
154
166
|
}
|
|
155
167
|
return template;
|
|
156
168
|
}
|
|
@@ -200,6 +212,8 @@ function setAgentConfigFile(patch, env = process.env) {
|
|
|
200
212
|
endpoint: firstDefined(incoming.endpoint, current.endpoint),
|
|
201
213
|
model: firstDefined(incoming.model, current.model),
|
|
202
214
|
timeoutMs: firstDefined(incoming.timeoutMs, current.timeoutMs),
|
|
215
|
+
attestPublicKey: firstDefined(incoming.attestPublicKey, current.attestPublicKey),
|
|
216
|
+
requireAttestedTelemetry: firstDefined(incoming.requireAttestedTelemetry, current.requireAttestedTelemetry),
|
|
203
217
|
source: "file"
|
|
204
218
|
};
|
|
205
219
|
const stored = redacted(merged);
|
|
@@ -20,7 +20,14 @@ const state_1 = require("./state");
|
|
|
20
20
|
const state_node_1 = require("./state-node");
|
|
21
21
|
const trust_audit_1 = require("./trust-audit");
|
|
22
22
|
const collaboration_1 = require("./collaboration");
|
|
23
|
+
const compare_1 = require("./compare");
|
|
23
24
|
exports.CANDIDATE_SCHEMA_VERSION = 1;
|
|
25
|
+
/** Verdict thresholds on a score's normalized value [0,1], declared once so the
|
|
26
|
+
* numbers carry intent instead of being buried as literals in verdictFor(). A
|
|
27
|
+
* normalized score at-or-above PASS is "pass"; at-or-above WARN (but below
|
|
28
|
+
* PASS) is "warn"; anything lower is "fail". Same numbers as before. */
|
|
29
|
+
const VERDICT_PASS_THRESHOLD = 0.7;
|
|
30
|
+
const VERDICT_WARN_THRESHOLD = 0.4;
|
|
24
31
|
function createCandidateScoring(options = {}) {
|
|
25
32
|
return {
|
|
26
33
|
registerCandidate: (run, input) => registerCandidate(run, input, options),
|
|
@@ -39,7 +46,7 @@ function registerCandidate(run, input, options = {}) {
|
|
|
39
46
|
if (existing)
|
|
40
47
|
return existing;
|
|
41
48
|
const now = new Date().toISOString();
|
|
42
|
-
const id = input.id || createCandidateId(input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
|
|
49
|
+
const id = input.id || createCandidateId(run, input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
|
|
43
50
|
const candidate = {
|
|
44
51
|
schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
|
|
45
52
|
id,
|
|
@@ -109,7 +116,7 @@ function getCandidate(run, candidateId) {
|
|
|
109
116
|
}
|
|
110
117
|
function scoreCandidate(run, candidateId, input, options = {}) {
|
|
111
118
|
const candidate = requireCandidate(run, candidateId);
|
|
112
|
-
const scoreId = input.id || createScoreId(
|
|
119
|
+
const scoreId = input.id || createScoreId(candidate);
|
|
113
120
|
const evidence = (0, trust_audit_1.normalizeEvidence)(run, input.evidence || [], {
|
|
114
121
|
source: "operator-recorded",
|
|
115
122
|
candidateId,
|
|
@@ -279,7 +286,7 @@ function selectCandidate(run, candidateId, options = {}, scoringOptions = {}) {
|
|
|
279
286
|
const now = new Date().toISOString();
|
|
280
287
|
const selection = {
|
|
281
288
|
schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
|
|
282
|
-
id: createSelectionId(candidateId),
|
|
289
|
+
id: createSelectionId(run, candidateId),
|
|
283
290
|
runId: run.id,
|
|
284
291
|
candidateId,
|
|
285
292
|
selectedAt: now,
|
|
@@ -558,16 +565,16 @@ function inferCandidateKind(input) {
|
|
|
558
565
|
return "manual";
|
|
559
566
|
}
|
|
560
567
|
function bestScore(scores) {
|
|
561
|
-
return [...scores].sort((left, right) => right.normalized - left.normalized || left.createdAt
|
|
568
|
+
return [...scores].sort((left, right) => right.normalized - left.normalized || (0, compare_1.compareBytes)(left.createdAt, right.createdAt))[0];
|
|
562
569
|
}
|
|
563
570
|
function compareRows(left, right, policy) {
|
|
564
571
|
const byScore = right.normalized - left.normalized;
|
|
565
572
|
if (byScore !== 0)
|
|
566
573
|
return byScore;
|
|
567
574
|
if (policy.tieBreaker === "candidateId")
|
|
568
|
-
return left.candidate.id
|
|
569
|
-
const byCreated = left.candidate.createdAt
|
|
570
|
-
return byCreated || left.candidate.id
|
|
575
|
+
return (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
|
|
576
|
+
const byCreated = (0, compare_1.compareBytes)(left.candidate.createdAt, right.candidate.createdAt);
|
|
577
|
+
return byCreated || (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
|
|
571
578
|
}
|
|
572
579
|
function detectTies(candidates) {
|
|
573
580
|
const groups = new Map();
|
|
@@ -578,10 +585,15 @@ function detectTies(candidates) {
|
|
|
578
585
|
return Array.from(groups.values()).filter((group) => group.length > 1);
|
|
579
586
|
}
|
|
580
587
|
function mergePolicy(policy = {}) {
|
|
588
|
+
// NOTE: `policy.criteria` (string[]) is intentionally NOT carried here. A
|
|
589
|
+
// whole-repo grep shows it has no read points — scoring reads each score's
|
|
590
|
+
// own `input.criteria` (Record<string, number>), not this list. Emitting a
|
|
591
|
+
// default `criteria: []` advertised a guarantee the code never honored and
|
|
592
|
+
// could silently drift, so it is dropped. The field stays OPTIONAL on
|
|
593
|
+
// CandidateScoringPolicy / CandidateRanking.policy for forward-compat input.
|
|
581
594
|
return {
|
|
582
595
|
id: policy.id || "cw.candidate.default",
|
|
583
596
|
title: policy.title || "Default Candidate Scoring",
|
|
584
|
-
criteria: policy.criteria || [],
|
|
585
597
|
requireEvidence: policy.requireEvidence ?? true,
|
|
586
598
|
requireVerifierGate: policy.requireVerifierGate ?? true,
|
|
587
599
|
minNormalized: policy.minNormalized,
|
|
@@ -591,9 +603,9 @@ function mergePolicy(policy = {}) {
|
|
|
591
603
|
function verdictFor(normalized, policy) {
|
|
592
604
|
if (policy.minNormalized !== undefined && normalized < policy.minNormalized)
|
|
593
605
|
return "fail";
|
|
594
|
-
if (normalized >=
|
|
606
|
+
if (normalized >= VERDICT_PASS_THRESHOLD)
|
|
595
607
|
return "pass";
|
|
596
|
-
if (normalized >=
|
|
608
|
+
if (normalized >= VERDICT_WARN_THRESHOLD)
|
|
597
609
|
return "warn";
|
|
598
610
|
return "fail";
|
|
599
611
|
}
|
|
@@ -616,18 +628,26 @@ function indexPath(run) {
|
|
|
616
628
|
function rankingPath(run) {
|
|
617
629
|
return node_path_1.default.join(candidateRoot(run), "ranking.json");
|
|
618
630
|
}
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
+
// Deterministic candidate id (FreeBSD-audit L12/L13): the candidate's POSITION in
|
|
632
|
+
// the run's candidate set, qualified by kind + seed (a stable worker/task/result
|
|
633
|
+
// id) for readability. No wall-clock stamp, no PRNG suffix — re-running the same
|
|
634
|
+
// workflow mints byte-identical candidate ids, keeping fingerprints replay-stable.
|
|
635
|
+
function createCandidateId(run, kind, seed) {
|
|
636
|
+
const seq = (run.candidates || []).length + 1;
|
|
637
|
+
return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${String(seq).padStart(4, "0")}`;
|
|
638
|
+
}
|
|
639
|
+
// Deterministic score id (FreeBSD-audit L12/L13): the score's POSITION within its
|
|
640
|
+
// candidate's score list. Scores only ever append, so the sequence is unique per
|
|
641
|
+
// candidate and stable across replays.
|
|
642
|
+
function createScoreId(candidate) {
|
|
643
|
+
const seq = (candidate.scores || []).length + 1;
|
|
644
|
+
return `score-${(0, state_1.safeFileName)(candidate.id)}-${String(seq).padStart(4, "0")}`;
|
|
645
|
+
}
|
|
646
|
+
// Deterministic selection id (FreeBSD-audit L12/L13): the selection's POSITION in
|
|
647
|
+
// the run's append-only selection log. No clock, no PRNG.
|
|
648
|
+
function createSelectionId(run, candidateId) {
|
|
649
|
+
const seq = (run.candidateSelections || []).length + 1;
|
|
650
|
+
return `selection-${(0, state_1.safeFileName)(candidateId)}-${String(seq).padStart(4, "0")}`;
|
|
631
651
|
}
|
|
632
652
|
function shouldPersist(options) {
|
|
633
653
|
return options.persist !== false;
|
package/dist/capability-core.js
CHANGED
|
@@ -9,11 +9,12 @@
|
|
|
9
9
|
// expressed here and called identically by both surfaces. A composite that lives
|
|
10
10
|
// in only one surface is exactly the cross-surface drift v0.1.27 forbids.
|
|
11
11
|
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
// the
|
|
15
|
-
//
|
|
16
|
-
//
|
|
12
|
+
// Capability metadata (which entry is on which surface, tool names, jsonMode) is
|
|
13
|
+
// declared in ONE place — the BUILTIN_CAPABILITIES table in capability-registry.ts,
|
|
14
|
+
// the single source of truth both surfaces and the parity gate read. New
|
|
15
|
+
// capabilities add a row there. (A v0.1.46 "self-register at load time" mechanism
|
|
16
|
+
// was removed: the registry snapshot was taken before those registrations ran, so
|
|
17
|
+
// they were silently dead duplicates of the table — see capability-registry.ts.)
|
|
17
18
|
//
|
|
18
19
|
// See docs/cli-mcp-parity.7.md and src/capability-registry.ts.
|
|
19
20
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
@@ -37,6 +38,7 @@ exports.runArchive = runArchive;
|
|
|
37
38
|
exports.runRerun = runRerun;
|
|
38
39
|
exports.runExportArchive = runExportArchive;
|
|
39
40
|
exports.runImportArchive = runImportArchive;
|
|
41
|
+
exports.runInspectArchive = runInspectArchive;
|
|
40
42
|
exports.runVerifyImport = runVerifyImport;
|
|
41
43
|
exports.queueAdd = queueAdd;
|
|
42
44
|
exports.queueList = queueList;
|
|
@@ -65,13 +67,15 @@ exports.withoutRuntimeKeys = withoutRuntimeKeys;
|
|
|
65
67
|
exports.optionalString = optionalString;
|
|
66
68
|
exports.isRecord = isRecord;
|
|
67
69
|
exports.telemetryVerify = telemetryVerify;
|
|
70
|
+
exports.auditVerify = auditVerify;
|
|
68
71
|
exports.demoTamper = demoTamper;
|
|
69
|
-
const capability_registry_1 = require("./capability-registry");
|
|
70
72
|
const drive_1 = require("./drive");
|
|
71
73
|
const agent_config_1 = require("./agent-config");
|
|
72
74
|
const run_registry_1 = require("./run-registry");
|
|
73
75
|
const observability_1 = require("./observability");
|
|
74
76
|
const telemetry_ledger_1 = require("./telemetry-ledger");
|
|
77
|
+
const telemetry_attestation_1 = require("./telemetry-attestation");
|
|
78
|
+
const trust_audit_1 = require("./trust-audit");
|
|
75
79
|
const telemetry_demo_1 = require("./telemetry-demo");
|
|
76
80
|
const state_1 = require("./state");
|
|
77
81
|
const run_export_1 = require("./run-export");
|
|
@@ -90,8 +94,6 @@ function planSummary(runner, workflowId, options) {
|
|
|
90
94
|
pendingTasks: run.tasks.filter((task) => task.status === "pending").length
|
|
91
95
|
};
|
|
92
96
|
}
|
|
93
|
-
// Auto-register with the capability registry (v0.1.46 — no need to edit capability-registry.ts)
|
|
94
|
-
(0, capability_registry_1.registerCapability)({ capability: "plan", summary: "Plan a workflow run on a repo + app.", entry: "planSummary", surface: "both", cli: { path: ["plan"], jsonMode: "default" }, mcp: { tool: "cw_plan" } });
|
|
95
97
|
// ---- canonical app-run payload --------------------------------------------
|
|
96
98
|
// Both `cw app run` and `cw_app_run` resolve to this exact object. Structured
|
|
97
99
|
// app inputs + optional sandbox resolution, then a compact operator status.
|
|
@@ -117,7 +119,6 @@ function appRun(runner, args) {
|
|
|
117
119
|
sandboxProfile: resolvedSandbox
|
|
118
120
|
};
|
|
119
121
|
}
|
|
120
|
-
(0, capability_registry_1.registerCapability)({ capability: "app.run", summary: "Plan a run on a named app with structured inputs.", entry: "appRun", surface: "both", cli: { path: ["app", "run"], caseTokens: ["app"], jsonMode: "default" }, mcp: { tool: "cw_app_run" } });
|
|
121
122
|
// ---- canonical sandbox choice payload -------------------------------------
|
|
122
123
|
// Both `cw sandbox choose|resolve` and `cw_sandbox_choose|cw_sandbox_resolve`
|
|
123
124
|
// resolve to this exact object.
|
|
@@ -154,7 +155,6 @@ function commitEnvelope(runner, runId, args) {
|
|
|
154
155
|
commit
|
|
155
156
|
};
|
|
156
157
|
}
|
|
157
|
-
(0, capability_registry_1.registerCapability)({ capability: "commit", summary: "Create a verifier-gated state commit with evidence.", entry: "commitEnvelope", surface: "both", cli: { path: ["commit"], jsonMode: "default" }, mcp: { tool: "cw_commit" }, payloadIdentical: false, reason: "CLI renders a human summary with path hints; MCP returns the structured commit payload alone." });
|
|
158
158
|
function compactOperatorStatus(status) {
|
|
159
159
|
return {
|
|
160
160
|
runId: status.runId,
|
|
@@ -242,11 +242,19 @@ function runList(reg, args) {
|
|
|
242
242
|
function runShow(reg, runId, args) {
|
|
243
243
|
return reg.showRun(runId, { scope: scopeOf(args, "home") });
|
|
244
244
|
}
|
|
245
|
-
function runResume(reg, runId, args) {
|
|
246
|
-
|
|
245
|
+
function runResume(reg, runner, runId, args) {
|
|
246
|
+
const base = reg.resume(runId, {
|
|
247
247
|
scope: scopeOf(args, "home"),
|
|
248
248
|
limit: args.limit === undefined ? undefined : Number(args.limit)
|
|
249
249
|
});
|
|
250
|
+
// Default (no --drive/--once): read-only, byte-identical to before.
|
|
251
|
+
if (!isTrue(args.drive) && !isTrue(args.once))
|
|
252
|
+
return base;
|
|
253
|
+
// Opt-in continuation: hand the resolved run to the EXISTING agent-delegation
|
|
254
|
+
// drive loop (re-plans nothing; picks up pending/running tasks from durable
|
|
255
|
+
// state). An unconfigured agent surfaces drive.status="blocked" (fail-closed).
|
|
256
|
+
const drive = runDrive(runner, { ...args, runId: base.runId, repo: base.repo, once: isTrue(args.once) });
|
|
257
|
+
return { ...base, drive };
|
|
250
258
|
}
|
|
251
259
|
function runArchive(reg, runId, args) {
|
|
252
260
|
if (runId) {
|
|
@@ -286,6 +294,17 @@ function runImportArchive(runner, args) {
|
|
|
286
294
|
return { ...imported, registry: registryReport };
|
|
287
295
|
});
|
|
288
296
|
}
|
|
297
|
+
// Read-only: inspect a portable archive's integrity WITHOUT importing it. Routes
|
|
298
|
+
// both surfaces through one shared core entry. The runner is unused (no registry
|
|
299
|
+
// touch — inspection writes nothing) but kept for dispatch-signature symmetry.
|
|
300
|
+
function runInspectArchive(_runner, args) {
|
|
301
|
+
return withInvocationCwd(args, () => {
|
|
302
|
+
const archive = optionalString(args.archive || args.path || args.file);
|
|
303
|
+
if (!archive)
|
|
304
|
+
throw new Error("run inspect-archive requires an archive path (positional, --archive, --path, or --file)");
|
|
305
|
+
return (0, run_export_1.inspectArchive)(node_path_1.default.resolve(archive));
|
|
306
|
+
});
|
|
307
|
+
}
|
|
289
308
|
function runVerifyImport(runner, runId, args) {
|
|
290
309
|
return withInvocationCwd(args, () => (0, run_export_1.verifyImportedRun)(runner.loadRun(runId)));
|
|
291
310
|
}
|
|
@@ -422,7 +441,8 @@ const DRIVE_RUNTIME_KEYS = [
|
|
|
422
441
|
"agentModel",
|
|
423
442
|
"agent-model",
|
|
424
443
|
"agentTimeoutMs",
|
|
425
|
-
"agent-timeout-ms"
|
|
444
|
+
"agent-timeout-ms",
|
|
445
|
+
"resume"
|
|
426
446
|
];
|
|
427
447
|
function planInputsFor(args) {
|
|
428
448
|
const copy = withoutRuntimeKeys(args);
|
|
@@ -474,6 +494,12 @@ exports.QUICKSTART_DEFAULT_APP = "architecture-review";
|
|
|
474
494
|
function quickstart(runner, args) {
|
|
475
495
|
const appId = String(args.appId || args.app || args.workflowId || exports.QUICKSTART_DEFAULT_APP);
|
|
476
496
|
const agentConfigured = Boolean((0, agent_config_1.resolveAgentConfig)(args).command || (0, agent_config_1.resolveAgentConfig)(args).endpoint);
|
|
497
|
+
// `--resume`: a discoverability flag over the existing continuation. With no
|
|
498
|
+
// `--run`, advance exactly ONE step (reuse the `--once` path) and print a
|
|
499
|
+
// copy-pasteable continue line; with `--run <id>`, continue that run to
|
|
500
|
+
// completion (the default drive). It adds no new execution path.
|
|
501
|
+
const resume = isTrue(args.resume);
|
|
502
|
+
const resumeRunId = resume ? optionalString(args.runId || args.run) : undefined;
|
|
477
503
|
// `--preview`: read-only, deterministic next-step projection (no spawn, no commit).
|
|
478
504
|
// Plan a fresh run (the read-only first verb) then project the next drive step.
|
|
479
505
|
if (isTrue(args.preview)) {
|
|
@@ -498,7 +524,10 @@ function quickstart(runner, args) {
|
|
|
498
524
|
// Drive end-to-end (or one `--once` step). runDrive plans the run, delegates each
|
|
499
525
|
// worker to the agent backend, and commits — we add only the report write + a
|
|
500
526
|
// single assembled payload. No orchestration is duplicated here.
|
|
501
|
-
|
|
527
|
+
// `--resume` with no run id advances a single step so a newcomer WITNESSES the
|
|
528
|
+
// stop-then-resume; with a run id it continues to completion. Non-resume paths
|
|
529
|
+
// are untouched (byte-identical default).
|
|
530
|
+
const result = runDrive(runner, { ...args, appId, ...(resume && !resumeRunId ? { once: true } : {}) });
|
|
502
531
|
// Always (re)write the report so the one command yields a report.md on disk, even
|
|
503
532
|
// when the drive blocked/parked (a partial report is still useful triage).
|
|
504
533
|
const cwd0 = process.cwd();
|
|
@@ -530,7 +559,9 @@ function quickstart(runner, args) {
|
|
|
530
559
|
hint = `the drive is blocked — inspect: cw run drive ${result.runId}`;
|
|
531
560
|
}
|
|
532
561
|
else if (result.status === "in-progress") {
|
|
533
|
-
hint =
|
|
562
|
+
hint = resume
|
|
563
|
+
? `one step advanced — continue: cw quickstart ${appId} --run ${result.runId} --resume`
|
|
564
|
+
: `one step advanced (--once) — continue: cw quickstart ${appId} --run ${result.runId} --once`;
|
|
534
565
|
}
|
|
535
566
|
return {
|
|
536
567
|
schemaVersion: 1,
|
|
@@ -546,7 +577,11 @@ function quickstart(runner, args) {
|
|
|
546
577
|
statePath: result.statePath,
|
|
547
578
|
agentConfigured,
|
|
548
579
|
steps: result.steps,
|
|
549
|
-
hint
|
|
580
|
+
hint,
|
|
581
|
+
// Stamp resumedFrom ONLY when we continued an explicit run. Conditional spread
|
|
582
|
+
// keeps the key absent on the default/fresh path (own-property absent + omitted
|
|
583
|
+
// by JSON.stringify), so default output is byte-identical.
|
|
584
|
+
...(resumeRunId ? { resumedFrom: resumeRunId } : {})
|
|
550
585
|
};
|
|
551
586
|
}
|
|
552
587
|
/** Read-only, deterministic projection of the effective agent config (secret-stripped). */
|
|
@@ -680,15 +715,57 @@ function telemetryVerify(runner, args) {
|
|
|
680
715
|
throw new Error("telemetry verify requires a run id (cw telemetry verify <run-id>)");
|
|
681
716
|
const run = runner.loadRun(runId);
|
|
682
717
|
const v = (0, telemetry_ledger_1.verifyTelemetryLedger)(run);
|
|
718
|
+
// Opt-in independent signature re-verification. verifyTelemetryLedger re-proves
|
|
719
|
+
// the chain (so the stored attestation verdicts were not edited); supplying the
|
|
720
|
+
// trust public key (--pubkey / CW_AGENT_ATTEST_PUBKEY) additionally RE-RUNS the
|
|
721
|
+
// ed25519 check over each `attested` record's stored raw usage rather than
|
|
722
|
+
// trusting that verdict, so a forged signature can no longer ride a green chain.
|
|
723
|
+
const trustPublicKeyInput = optionalString(args.pubkey || args.pubKey || args.publicKey) || process.env.CW_AGENT_ATTEST_PUBKEY;
|
|
724
|
+
const trustPublicKey = (0, telemetry_attestation_1.resolveTrustPublicKey)(trustPublicKeyInput);
|
|
725
|
+
const keyChecks = trustPublicKeyInput && !trustPublicKey
|
|
726
|
+
? [{ name: "signature-key", pass: false, code: "telemetry-pubkey-unreadable" }]
|
|
727
|
+
: [];
|
|
728
|
+
const sig = (0, telemetry_attestation_1.verifyTelemetrySignatures)(v.records, trustPublicKey);
|
|
729
|
+
const failedChecks = [...v.checks.filter((c) => !c.pass), ...keyChecks, ...sig.checks.filter((c) => !c.pass)];
|
|
683
730
|
return {
|
|
684
731
|
schemaVersion: 1,
|
|
685
732
|
runId: run.id,
|
|
686
733
|
present: v.present,
|
|
687
|
-
|
|
734
|
+
// Chain integrity AND (when a key was supplied) every attested signature must
|
|
735
|
+
// re-verify. With no key, sig.failed is 0 → unchanged chain-only behavior.
|
|
736
|
+
verified: v.verified && keyChecks.length === 0 && sig.failed === 0,
|
|
688
737
|
records: v.records.length,
|
|
689
738
|
attested: v.attested,
|
|
690
739
|
unattested: v.unattested,
|
|
691
740
|
absent: v.absent,
|
|
741
|
+
signatureKeyProvided: sig.keyProvided,
|
|
742
|
+
signaturesChecked: sig.checked,
|
|
743
|
+
signaturesReverified: sig.reverified,
|
|
744
|
+
signaturesFailed: sig.failed,
|
|
745
|
+
failedChecks: failedChecks.map((c) => ({ name: c.name, code: c.code }))
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
// audit.verify — fail-closed re-prove of a run's trust-audit hash chain. The peer
|
|
749
|
+
// of telemetry.verify for the sandbox/policy/commit-gate decision log: recomputes
|
|
750
|
+
// every event hash from genesis, checks chain linkage, and catches the
|
|
751
|
+
// unchained-event forgery. Exposed as a verb (not just embedded in `audit summary`,
|
|
752
|
+
// which always exits 0) so `cw audit verify <run> && deploy` can gate on the exit
|
|
753
|
+
// code. POLA: a run with no audit log is present:false / verified:true / exit 0.
|
|
754
|
+
function auditVerify(runner, args) {
|
|
755
|
+
const runId = optionalString(args.runId || args.run);
|
|
756
|
+
if (!runId)
|
|
757
|
+
throw new Error("audit verify requires a run id (cw audit verify <run-id>)");
|
|
758
|
+
const run = runner.loadRun(runId);
|
|
759
|
+
const v = (0, trust_audit_1.verifyTrustAudit)(run);
|
|
760
|
+
return {
|
|
761
|
+
schemaVersion: 1,
|
|
762
|
+
runId: run.id,
|
|
763
|
+
present: v.present,
|
|
764
|
+
verified: v.verified,
|
|
765
|
+
eventCount: v.eventCount,
|
|
766
|
+
chained: v.chained,
|
|
767
|
+
unchained: v.unchained,
|
|
768
|
+
corruptLines: v.corruptLines,
|
|
692
769
|
failedChecks: v.checks.filter((c) => !c.pass).map((c) => ({ name: c.name, code: c.code }))
|
|
693
770
|
};
|
|
694
771
|
}
|