cool-workflow 0.1.79 → 0.1.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +51 -3
- package/apps/architecture-review/app.json +1 -1
- package/apps/architecture-review-fast/app.json +64 -0
- package/apps/architecture-review-fast/workflow.js +153 -0
- package/apps/end-to-end-golden-path/app.json +1 -1
- package/apps/pr-review-fix-ci/app.json +1 -1
- package/apps/release-cut/app.json +1 -1
- package/apps/research-synthesis/app.json +1 -1
- package/dist/agent-config.js +21 -7
- package/dist/candidate-scoring.js +42 -22
- package/dist/capability-core.js +132 -17
- package/dist/capability-registry.js +138 -168
- package/dist/cli.js +97 -98
- package/dist/collaboration.js +5 -6
- package/dist/commit.js +20 -6
- package/dist/compare.js +18 -0
- package/dist/coordinator/classify.js +45 -0
- package/dist/coordinator/paths.js +42 -0
- package/dist/coordinator/util.js +129 -0
- package/dist/coordinator.js +127 -300
- package/dist/dispatch.js +35 -0
- package/dist/drive.js +79 -6
- package/dist/error-feedback.js +8 -4
- package/dist/evidence-reasoning.js +3 -3
- package/dist/execution-backend/agent.js +331 -0
- package/dist/execution-backend/probes.js +96 -0
- package/dist/execution-backend/util.js +47 -0
- package/dist/execution-backend.js +73 -421
- package/dist/mcp-server.js +79 -183
- package/dist/multi-agent/graph.js +84 -0
- package/dist/multi-agent/helpers.js +145 -0
- package/dist/multi-agent/paths.js +22 -0
- package/dist/multi-agent-eval/format.js +194 -0
- package/dist/multi-agent-eval/normalize.js +51 -0
- package/dist/multi-agent-eval.js +39 -244
- package/dist/multi-agent-host.js +0 -19
- package/dist/multi-agent.js +125 -314
- package/dist/node-snapshot.js +3 -3
- package/dist/observability/format.js +61 -0
- package/dist/observability/intake.js +98 -0
- package/dist/observability.js +14 -160
- package/dist/operator-ux/format.js +364 -0
- package/dist/operator-ux.js +22 -363
- package/dist/orchestrator/lifecycle-operations.js +2 -1
- package/dist/orchestrator/report.js +8 -0
- package/dist/orchestrator.js +26 -9
- package/dist/reclamation.js +26 -21
- package/dist/run-export.js +494 -25
- package/dist/run-registry/derive.js +172 -0
- package/dist/run-registry/format.js +124 -0
- package/dist/run-registry/gc.js +251 -0
- package/dist/run-registry/policy.js +16 -0
- package/dist/run-registry/queue.js +116 -0
- package/dist/run-registry.js +89 -597
- package/dist/run-state-schema.js +1 -0
- package/dist/sandbox-profile.js +43 -2
- package/dist/state-explosion/format.js +159 -0
- package/dist/state-explosion/helpers.js +82 -0
- package/dist/state-explosion.js +165 -304
- package/dist/state-node.js +19 -4
- package/dist/telemetry-attestation.js +55 -0
- package/dist/telemetry-demo.js +15 -3
- package/dist/telemetry-ledger.js +60 -15
- package/dist/topology.js +25 -8
- package/dist/triggers.js +33 -14
- package/dist/trust-audit.js +145 -33
- package/dist/version.js +1 -1
- package/dist/worker-isolation/helpers.js +51 -0
- package/dist/worker-isolation/paths.js +46 -0
- package/dist/worker-isolation.js +39 -115
- package/docs/agent-delegation-drive.7.md +71 -0
- package/docs/canonical-workflow-apps.7.md +37 -0
- package/docs/cli-mcp-parity.7.md +16 -0
- package/docs/contract-migration-tooling.7.md +6 -0
- package/docs/control-plane-scheduling.7.md +6 -0
- package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
- package/docs/durable-state-and-locking.7.md +8 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
- package/docs/execution-backends.7.md +6 -0
- package/docs/index.md +2 -0
- package/docs/launch/demo.tape +28 -0
- package/docs/launch/launch-kit.md +96 -17
- package/docs/launch/pre-launch-checklist.md +53 -0
- package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
- package/docs/multi-agent-eval-replay-harness.7.md +6 -0
- package/docs/multi-agent-operator-ux.7.md +6 -0
- package/docs/multi-agent-trust-policy-audit.7.md +27 -0
- package/docs/node-snapshot-diff-replay.7.md +6 -0
- package/docs/observability-cost-accounting.7.md +6 -0
- package/docs/project-index.md +27 -6
- package/docs/real-execution-backends.7.md +6 -0
- package/docs/release-and-migration.7.md +8 -0
- package/docs/release-tooling.7.md +6 -0
- package/docs/routines.md +23 -0
- package/docs/run-registry-control-plane.7.md +89 -2
- package/docs/run-retention-reclamation.7.md +8 -0
- package/docs/source-context-profiles.7.md +119 -0
- package/docs/state-explosion-management.7.md +13 -0
- package/docs/team-collaboration.7.md +6 -0
- package/docs/trust-model.md +267 -0
- package/docs/unix-principles.md +49 -1
- package/docs/vendor-manifest-loadability.7.md +43 -0
- package/docs/web-desktop-workbench.7.md +6 -0
- package/manifest/plugin.manifest.json +1 -1
- package/manifest/source-context-profiles.json +142 -0
- package/package.json +4 -1
- package/scripts/agents/builtin-templates.json +7 -0
- package/scripts/agents/claude-p-agent.js +129 -43
- package/scripts/architecture-review-fast.js +362 -0
- package/scripts/bump-version.js +5 -10
- package/scripts/canonical-apps-list.js +64 -0
- package/scripts/canonical-apps.js +36 -4
- package/scripts/coverage-gate.js +211 -0
- package/scripts/dogfood-release.js +1 -1
- package/scripts/golden-path.js +4 -4
- package/scripts/parity-check.js +5 -0
- package/scripts/release-check.js +5 -1
- package/scripts/source-context.js +291 -0
- package/scripts/version-sync-check.js +5 -7
- package/skills/ci-triage/SKILL.md +50 -0
- package/skills/ci-triage/agents/openai.yaml +4 -0
- package/skills/cool-workflow/SKILL.md +4 -1
- package/skills/deploy-check/SKILL.md +55 -0
- package/skills/deploy-check/agents/openai.yaml +4 -0
- package/skills/design-qa/SKILL.md +49 -0
- package/skills/design-qa/agents/openai.yaml +4 -0
- package/skills/pr-review/SKILL.md +45 -0
- package/skills/pr-review/agents/openai.yaml +4 -0
- package/dist/capability-dispatcher.js +0 -86
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cool-workflow",
|
|
3
3
|
"description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.81",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "COOLWHITE LLC"
|
|
7
7
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cool-workflow",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.81",
|
|
4
4
|
"description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "COOLWHITE LLC"
|
package/README.md
CHANGED
|
@@ -263,6 +263,40 @@ write paths, command execution, network access, and environment exposure. CW
|
|
|
263
263
|
stores and validates the policy, while the agent host enforces OS/process
|
|
264
264
|
runtime controls. See [docs/sandbox-profiles.7.md](docs/sandbox-profiles.7.md).
|
|
265
265
|
|
|
266
|
+
## Quickstart
|
|
267
|
+
|
|
268
|
+
**30-second proof, no install** — see that a recorded telemetry verdict can't be forged:
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
npx cool-workflow demo tamper
|
|
272
|
+
# builds a signed ed25519 ledger, forges it 2 ways, both caught offline
|
|
273
|
+
# -> VERDICT: tamper-evidence holds ✓
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Try a real run** — no clone needed; drive an architecture review with your own agent:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
npx cool-workflow quickstart architecture-review --repo /path/to/repo \
|
|
280
|
+
--question "Is this architecture sound?" --agent-command builtin:claude
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
CW DELEGATES worker execution to your own agent. With no `--agent-command` (or
|
|
284
|
+
`CW_AGENT_COMMAND`) the drive fails closed (status `blocked`) — it never fabricates a
|
|
285
|
+
result. `--agent-command builtin:claude` resolves to a bundled read-only `claude -p`
|
|
286
|
+
wrapper (needs `claude` on your PATH).
|
|
287
|
+
|
|
288
|
+
**Re-prove a finished run, offline** (`cw` is the installed bin; or `npx cool-workflow <cmd>`):
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
cw telemetry verify <run-id> # re-checks the hash-chained ledger
|
|
292
|
+
cw telemetry verify <run-id> --pubkey pub.pem # also re-runs ed25519 signature checks
|
|
293
|
+
cw audit verify <run-id> # re-proves the trust-audit hash chain
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
More: `cw quickstart <app> --preview` (read-only dry run), `cw run resume <run-id> --drive`
|
|
297
|
+
(continue an interrupted run), `cw run inspect-archive <archive>` (integrity-check a
|
|
298
|
+
portable run archive without importing it).
|
|
299
|
+
|
|
266
300
|
## Structure
|
|
267
301
|
|
|
268
302
|
```text
|
|
@@ -300,6 +334,10 @@ cool-workflow
|
|
|
300
334
|
|
|
301
335
|
## Commands
|
|
302
336
|
|
|
337
|
+
Installed via npm, the bin is `cw` (alias `cool-workflow`): e.g. `cw list`,
|
|
338
|
+
`cw quickstart …`. From a cloned source checkout, before `npm run build`, use the
|
|
339
|
+
equivalent `node scripts/cw.js <cmd>` form shown in the examples below.
|
|
340
|
+
|
|
303
341
|
List bundled workflows:
|
|
304
342
|
|
|
305
343
|
```bash
|
|
@@ -619,7 +657,7 @@ Replaces the linear migration chain with a BFS graph path resolver (`findMigrati
|
|
|
619
657
|
|
|
620
658
|
## Vendor-Adapter Registry (v0.1.47)
|
|
621
659
|
|
|
622
|
-
Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data.
|
|
660
|
+
Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data. Cross-vendor is proven by boot, not just by generation: `npm run manifest:load-check` (`node test/vendor-manifest-load-smoke.js`) loads every generated manifest (claude, codex, agents, gemini, opencode) and asserts each exposes the full tool surface (184 tools).
|
|
623
661
|
|
|
624
662
|
## P2 Fixes (v0.1.48)
|
|
625
663
|
|
|
@@ -649,8 +687,18 @@ The orchestration vision landed in one release, all reviewer-gated:
|
|
|
649
687
|
|
|
650
688
|
`--agent-command builtin:claude` resolves to a bundled read-only claude wrapper that completes workers with a real agent; the cross-directory quickstart crash is fixed; missing optional inputs no longer leak `{{name}}` into prompts. Published to npm (`cool-workflow`, bins `cw`/`cool-workflow`) with LICENSE and metadata. Live dogfood proof committed under `docs/dogfood/`.
|
|
651
689
|
|
|
652
|
-
## Tamper-evidence demo (
|
|
690
|
+
## Tamper-evidence demo (v0.1.79)
|
|
653
691
|
|
|
654
|
-
`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` is the operator-facing
|
|
692
|
+
`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` (`cw_telemetry_verify` on MCP) is the operator-facing re-proof: by default it recomputes the hash chain on disk so any later edit to a recorded verdict or usage digest is caught; add `--pubkey <pem-or-path>` to re-run each `attested` hop's ed25519 signature check against the stored raw usage too. What this does and does **not** prove — including the single-keyholder ceiling — is documented honestly in [Trust Model & Limitations](docs/trust-model.md); read it before relying on a green verdict.
|
|
693
|
+
|
|
694
|
+
## Opt-in live agent output during a drive (on main, ships next)
|
|
695
|
+
|
|
696
|
+
Set `CW_AGENT_STREAM=1` to see each worker's live agent trace. The bundled claude wrapper (`builtin:claude` / `scripts/agents/claude-p-agent.js`) keeps the legacy `--output-format json` path by default; only the opt-in path runs claude in `--output-format stream-json` and renders a concise human trace (tool uses, assistant text, per-turn summaries) to **stderr**. CW core forwards that stderr to the operator's terminal only when `CW_AGENT_STREAM=1`, CW's own stderr is a TTY, and `CW_NO_STREAM` is not set; piped/CI runs stay silent (Rule of Silence). Core only forwards the stream, never parses it — vendor-specific rendering is the wrapper's concern (policy), not the kernel's (mechanism).
|
|
655
697
|
|
|
656
698
|
v0.1.79
|
|
699
|
+
|
|
700
|
+
## Fast Architecture Review (v0.1.80)
|
|
701
|
+
|
|
702
|
+
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
703
|
+
|
|
704
|
+
_This documentation tracks Cool Workflow v0.1.81. See [CHANGELOG](../../CHANGELOG.md) for the release notes._
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "architecture-review",
|
|
4
4
|
"title": "Architecture Review",
|
|
5
5
|
"summary": "Map a repository architecture, assess risks, verify important findings, and synthesize an evidence-backed verdict.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"id": "architecture-review-fast",
|
|
4
|
+
"title": "Architecture Review Fast",
|
|
5
|
+
"summary": "Run a shorter architecture review with parallel map and assess phases for faster first results.",
|
|
6
|
+
"version": "0.1.81",
|
|
7
|
+
"author": "COOLWHITE LLC",
|
|
8
|
+
"inputs": [
|
|
9
|
+
{
|
|
10
|
+
"name": "repo",
|
|
11
|
+
"type": "path",
|
|
12
|
+
"required": true,
|
|
13
|
+
"description": "Repository path to inspect."
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"name": "question",
|
|
17
|
+
"type": "string",
|
|
18
|
+
"required": true,
|
|
19
|
+
"description": "Architecture question or decision to review."
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"name": "invariant",
|
|
23
|
+
"type": "string",
|
|
24
|
+
"repeated": true,
|
|
25
|
+
"description": "Invariant that must remain true."
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "focus",
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Optional subsystem, risk area, or file path to emphasize.",
|
|
31
|
+
"default": "the highest-risk runtime and operator paths"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"name": "sourceContext",
|
|
35
|
+
"type": "path",
|
|
36
|
+
"description": "Optional JSONL source context file generated by scripts/source-context.js export.",
|
|
37
|
+
"default": ""
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"name": "sourceContextDigest",
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Optional digest or cache key for the supplied source context.",
|
|
43
|
+
"default": ""
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
"sandboxProfiles": [
|
|
47
|
+
"readonly"
|
|
48
|
+
],
|
|
49
|
+
"compatibility": {
|
|
50
|
+
"minVersion": "0.1.79",
|
|
51
|
+
"workflowSchemaVersion": 1,
|
|
52
|
+
"notes": "Opt-in fast architecture review app; the full architecture-review app remains unchanged."
|
|
53
|
+
},
|
|
54
|
+
"metadata": {
|
|
55
|
+
"canonical": true,
|
|
56
|
+
"domain": "software-architecture",
|
|
57
|
+
"mode": "fast",
|
|
58
|
+
"fullReviewApp": "architecture-review",
|
|
59
|
+
"maintainedAs": "official-userland"
|
|
60
|
+
},
|
|
61
|
+
"workflow": {
|
|
62
|
+
"entrypoint": "workflow.js"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
const FAST_MODEL = modelHint("CW_ARCHITECTURE_REVIEW_FAST_MODEL");
|
|
2
|
+
const STRONG_MODEL = modelHint("CW_ARCHITECTURE_REVIEW_STRONG_MODEL");
|
|
3
|
+
|
|
4
|
+
module.exports = ({ workflow, phase, parallel, agent, artifact, input }) => {
|
|
5
|
+
const inputs = [
|
|
6
|
+
input("repo", {
|
|
7
|
+
type: "path",
|
|
8
|
+
required: true,
|
|
9
|
+
description: "Repository path to inspect."
|
|
10
|
+
}),
|
|
11
|
+
input("question", {
|
|
12
|
+
type: "string",
|
|
13
|
+
required: true,
|
|
14
|
+
description: "Architecture question or decision to review."
|
|
15
|
+
}),
|
|
16
|
+
input("invariant", {
|
|
17
|
+
type: "string",
|
|
18
|
+
repeated: true,
|
|
19
|
+
description: "Invariant that must remain true."
|
|
20
|
+
}),
|
|
21
|
+
input("focus", {
|
|
22
|
+
type: "string",
|
|
23
|
+
description: "Optional subsystem, risk area, or file path to emphasize.",
|
|
24
|
+
default: "the highest-risk runtime and operator paths"
|
|
25
|
+
}),
|
|
26
|
+
input("sourceContext", {
|
|
27
|
+
type: "path",
|
|
28
|
+
description: "Optional JSONL source context file generated by scripts/source-context.js export.",
|
|
29
|
+
default: ""
|
|
30
|
+
}),
|
|
31
|
+
input("sourceContextDigest", {
|
|
32
|
+
type: "string",
|
|
33
|
+
description: "Optional digest or cache key for the supplied source context.",
|
|
34
|
+
default: ""
|
|
35
|
+
})
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
return workflow({
|
|
39
|
+
id: "architecture-review-fast",
|
|
40
|
+
title: "Architecture Review Fast",
|
|
41
|
+
summary: "Run a shorter architecture review with parallel map and assess phases for faster first results.",
|
|
42
|
+
limits: {
|
|
43
|
+
maxAgents: 12,
|
|
44
|
+
maxConcurrentAgents: 4
|
|
45
|
+
},
|
|
46
|
+
inputs,
|
|
47
|
+
sandboxProfiles: ["readonly"],
|
|
48
|
+
metadata: {
|
|
49
|
+
mode: "fast",
|
|
50
|
+
fullReviewApp: "architecture-review"
|
|
51
|
+
},
|
|
52
|
+
phases: [
|
|
53
|
+
parallel("Map", [
|
|
54
|
+
agent(
|
|
55
|
+
"map:runtime-surface",
|
|
56
|
+
[
|
|
57
|
+
"Fast-map the runtime architecture in {{repo}} for {{question}}.",
|
|
58
|
+
contextInstruction(),
|
|
59
|
+
"Focus: {{focus}}. Invariants: {{invariant}}.",
|
|
60
|
+
"Return the primary entrypoints, state stores, execution paths, and the exact files or commands inspected."
|
|
61
|
+
].join(" "),
|
|
62
|
+
fastOptions("Runtime surface mapper", { resultCache: sourceContextResultCache() })
|
|
63
|
+
),
|
|
64
|
+
agent(
|
|
65
|
+
"map:operator-surface",
|
|
66
|
+
[
|
|
67
|
+
"Fast-map operator, CI, deployment, test, release, and background-job surfaces in {{repo}} for {{question}}.",
|
|
68
|
+
contextInstruction(),
|
|
69
|
+
"Focus: {{focus}}. Return concrete files, scripts, configs, missing areas, and candidate runtime bottlenecks."
|
|
70
|
+
].join(" "),
|
|
71
|
+
fastOptions("Operator surface mapper", { resultCache: sourceContextResultCache() })
|
|
72
|
+
)
|
|
73
|
+
]),
|
|
74
|
+
parallel("Assess", [
|
|
75
|
+
agent(
|
|
76
|
+
"assess:risks",
|
|
77
|
+
[
|
|
78
|
+
"Assess the fast map for real P0/P1/P2 architecture and correctness risks.",
|
|
79
|
+
"Separate confirmed risks, conditional risks, non-issues, and unknowns.",
|
|
80
|
+
"Tie every important claim to inspected evidence and the invariants {{invariant}}."
|
|
81
|
+
].join(" "),
|
|
82
|
+
fastOptions("Risk assessor", { resultCache: sourceContextResultCache({ includeCompletedResults: "previous-phases" }) })
|
|
83
|
+
),
|
|
84
|
+
agent(
|
|
85
|
+
"assess:runtime-speed",
|
|
86
|
+
[
|
|
87
|
+
"Assess runtime speed and user-wait risk for {{question}}.",
|
|
88
|
+
"Look for serial agent work, repeated repository scanning, missing cache keys, oversized prompts, and long foreground jobs.",
|
|
89
|
+
"Recommend mechanisms that preserve POLA, stdout/stderr discipline, and zero runtime dependencies."
|
|
90
|
+
].join(" "),
|
|
91
|
+
fastOptions("Runtime speed assessor", { resultCache: sourceContextResultCache({ includeCompletedResults: "previous-phases" }) })
|
|
92
|
+
)
|
|
93
|
+
]),
|
|
94
|
+
phase("Verify", [
|
|
95
|
+
agent(
|
|
96
|
+
"verify:p0-p2-risks",
|
|
97
|
+
[
|
|
98
|
+
"Re-open evidence for every candidate P0/P1/P2 risk from the fast assessment.",
|
|
99
|
+
"Confirm real risks, downgrade unsupported claims, and list exact file paths, commands, logs, or unknowns.",
|
|
100
|
+
"The cw:result evidence array must cite durable locators."
|
|
101
|
+
].join(" "),
|
|
102
|
+
strongOptions("Evidence verifier", { requiresEvidence: true })
|
|
103
|
+
)
|
|
104
|
+
]),
|
|
105
|
+
phase("Verdict", [
|
|
106
|
+
artifact(
|
|
107
|
+
"verdict:fast-synthesis",
|
|
108
|
+
[
|
|
109
|
+
"Synthesize a fast architecture verdict for {{question}}.",
|
|
110
|
+
"Include a short answer, compact architecture map, ranked risks, speed recommendations, non-issues, and evidence links.",
|
|
111
|
+
"State when the full architecture-review app should be scheduled as a background routine.",
|
|
112
|
+
"The cw:result evidence array must support the final verdict."
|
|
113
|
+
].join(" "),
|
|
114
|
+
strongOptions("Fast verdict synthesizer", { requiresEvidence: true })
|
|
115
|
+
)
|
|
116
|
+
])
|
|
117
|
+
]
|
|
118
|
+
});
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
function fastOptions(label, extra) {
|
|
122
|
+
return taskOptions(label, FAST_MODEL, extra);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function strongOptions(label, extra) {
|
|
126
|
+
return taskOptions(label, STRONG_MODEL, extra);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function taskOptions(label, model, extra) {
|
|
130
|
+
return {
|
|
131
|
+
label,
|
|
132
|
+
sandboxProfileId: "readonly",
|
|
133
|
+
...(model ? { model } : {}),
|
|
134
|
+
...(extra || {})
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function modelHint(name) {
|
|
139
|
+
const value = String(process.env[name] || "").trim();
|
|
140
|
+
return value || undefined;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function contextInstruction() {
|
|
144
|
+
return [
|
|
145
|
+
"If {{sourceContext}} is non-empty, read that JSONL source context first and treat {{sourceContextDigest}} as its cache/digest hint.",
|
|
146
|
+
"If the supplied context is missing, unreadable, or obviously stale, say so explicitly instead of guessing.",
|
|
147
|
+
"If no source context is supplied, inspect {{repo}} directly."
|
|
148
|
+
].join(" ");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function sourceContextResultCache(extra) {
|
|
152
|
+
return { mode: "read-write", keyInput: "sourceContextDigest", ...(extra || {}) };
|
|
153
|
+
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "pr-review-fix-ci",
|
|
4
4
|
"title": "PR Review Fix CI",
|
|
5
5
|
"summary": "Review a pull request or branch, inspect CI failures, diagnose actionable issues, optionally patch, verify, and summarize with evidence.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "release-cut",
|
|
4
4
|
"title": "Release Cut",
|
|
5
5
|
"summary": "Prepare a release with checklist discipline: version checks, changelog, tests, packaging, release notes, and final verification.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"id": "research-synthesis",
|
|
4
4
|
"title": "Research Synthesis",
|
|
5
5
|
"summary": "Split a research question into claims, investigate sources, cross-check evidence, verify claims, and synthesize a concise answer.",
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.81",
|
|
7
7
|
"author": "COOLWHITE LLC",
|
|
8
8
|
"inputs": [
|
|
9
9
|
{
|
package/dist/agent-config.js
CHANGED
|
@@ -139,18 +139,30 @@ function agentConfigFromArgs(args) {
|
|
|
139
139
|
// npx/global install, where $(pwd)-relative paths don't exist) can configure a
|
|
140
140
|
// WORKING agent without knowing where the package landed on disk:
|
|
141
141
|
// --agent-command builtin:claude (or CW_AGENT_COMMAND=builtin:claude)
|
|
142
|
-
// resolves to the packaged
|
|
143
|
-
//
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
142
|
+
// resolves to the packaged wrapper invocation. Still pure config — the template
|
|
143
|
+
// is an out-of-process delegation script; CW never calls a model API.
|
|
144
|
+
//
|
|
145
|
+
// The builtin set is DATA, not a kernel TS literal (FreeBSD-audit L15): it lives
|
|
146
|
+
// in scripts/agents/builtin-templates.json (vendor name -> wrapper script name).
|
|
147
|
+
// Adding a vendor is a content/distribution step (drop a wrapper + a JSON line),
|
|
148
|
+
// not a kernel edit — keeping CW vendor-agnostic at the source level.
|
|
149
|
+
function builtinAgentTemplates() {
|
|
150
|
+
const agentsDir = node_path_1.default.join(__dirname, "..", "scripts", "agents");
|
|
151
|
+
const manifest = JSON.parse(node_fs_1.default.readFileSync(node_path_1.default.join(agentsDir, "builtin-templates.json"), "utf8"));
|
|
152
|
+
const out = {};
|
|
153
|
+
for (const [name, script] of Object.entries(manifest.templates || {})) {
|
|
154
|
+
out[name] = `node ${node_path_1.default.join(agentsDir, script)} {{input}} {{result}}`;
|
|
155
|
+
}
|
|
156
|
+
return out;
|
|
157
|
+
}
|
|
147
158
|
function expandBuiltinAgentCommand(command) {
|
|
148
159
|
if (!command || !command.startsWith("builtin:"))
|
|
149
160
|
return command;
|
|
150
161
|
const name = command.slice("builtin:".length).trim();
|
|
151
|
-
const
|
|
162
|
+
const templates = builtinAgentTemplates();
|
|
163
|
+
const template = templates[name];
|
|
152
164
|
if (!template) {
|
|
153
|
-
throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(
|
|
165
|
+
throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(templates).join(", ")}`);
|
|
154
166
|
}
|
|
155
167
|
return template;
|
|
156
168
|
}
|
|
@@ -200,6 +212,8 @@ function setAgentConfigFile(patch, env = process.env) {
|
|
|
200
212
|
endpoint: firstDefined(incoming.endpoint, current.endpoint),
|
|
201
213
|
model: firstDefined(incoming.model, current.model),
|
|
202
214
|
timeoutMs: firstDefined(incoming.timeoutMs, current.timeoutMs),
|
|
215
|
+
attestPublicKey: firstDefined(incoming.attestPublicKey, current.attestPublicKey),
|
|
216
|
+
requireAttestedTelemetry: firstDefined(incoming.requireAttestedTelemetry, current.requireAttestedTelemetry),
|
|
203
217
|
source: "file"
|
|
204
218
|
};
|
|
205
219
|
const stored = redacted(merged);
|
|
@@ -20,7 +20,14 @@ const state_1 = require("./state");
|
|
|
20
20
|
const state_node_1 = require("./state-node");
|
|
21
21
|
const trust_audit_1 = require("./trust-audit");
|
|
22
22
|
const collaboration_1 = require("./collaboration");
|
|
23
|
+
const compare_1 = require("./compare");
|
|
23
24
|
exports.CANDIDATE_SCHEMA_VERSION = 1;
|
|
25
|
+
/** Verdict thresholds on a score's normalized value [0,1], declared once so the
|
|
26
|
+
* numbers carry intent instead of being buried as literals in verdictFor(). A
|
|
27
|
+
* normalized score at-or-above PASS is "pass"; at-or-above WARN (but below
|
|
28
|
+
* PASS) is "warn"; anything lower is "fail". Same numbers as before. */
|
|
29
|
+
const VERDICT_PASS_THRESHOLD = 0.7;
|
|
30
|
+
const VERDICT_WARN_THRESHOLD = 0.4;
|
|
24
31
|
function createCandidateScoring(options = {}) {
|
|
25
32
|
return {
|
|
26
33
|
registerCandidate: (run, input) => registerCandidate(run, input, options),
|
|
@@ -39,7 +46,7 @@ function registerCandidate(run, input, options = {}) {
|
|
|
39
46
|
if (existing)
|
|
40
47
|
return existing;
|
|
41
48
|
const now = new Date().toISOString();
|
|
42
|
-
const id = input.id || createCandidateId(input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
|
|
49
|
+
const id = input.id || createCandidateId(run, input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
|
|
43
50
|
const candidate = {
|
|
44
51
|
schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
|
|
45
52
|
id,
|
|
@@ -109,7 +116,7 @@ function getCandidate(run, candidateId) {
|
|
|
109
116
|
}
|
|
110
117
|
function scoreCandidate(run, candidateId, input, options = {}) {
|
|
111
118
|
const candidate = requireCandidate(run, candidateId);
|
|
112
|
-
const scoreId = input.id || createScoreId(
|
|
119
|
+
const scoreId = input.id || createScoreId(candidate);
|
|
113
120
|
const evidence = (0, trust_audit_1.normalizeEvidence)(run, input.evidence || [], {
|
|
114
121
|
source: "operator-recorded",
|
|
115
122
|
candidateId,
|
|
@@ -279,7 +286,7 @@ function selectCandidate(run, candidateId, options = {}, scoringOptions = {}) {
|
|
|
279
286
|
const now = new Date().toISOString();
|
|
280
287
|
const selection = {
|
|
281
288
|
schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
|
|
282
|
-
id: createSelectionId(candidateId),
|
|
289
|
+
id: createSelectionId(run, candidateId),
|
|
283
290
|
runId: run.id,
|
|
284
291
|
candidateId,
|
|
285
292
|
selectedAt: now,
|
|
@@ -558,16 +565,16 @@ function inferCandidateKind(input) {
|
|
|
558
565
|
return "manual";
|
|
559
566
|
}
|
|
560
567
|
function bestScore(scores) {
|
|
561
|
-
return [...scores].sort((left, right) => right.normalized - left.normalized || left.createdAt
|
|
568
|
+
return [...scores].sort((left, right) => right.normalized - left.normalized || (0, compare_1.compareBytes)(left.createdAt, right.createdAt))[0];
|
|
562
569
|
}
|
|
563
570
|
function compareRows(left, right, policy) {
|
|
564
571
|
const byScore = right.normalized - left.normalized;
|
|
565
572
|
if (byScore !== 0)
|
|
566
573
|
return byScore;
|
|
567
574
|
if (policy.tieBreaker === "candidateId")
|
|
568
|
-
return left.candidate.id
|
|
569
|
-
const byCreated = left.candidate.createdAt
|
|
570
|
-
return byCreated || left.candidate.id
|
|
575
|
+
return (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
|
|
576
|
+
const byCreated = (0, compare_1.compareBytes)(left.candidate.createdAt, right.candidate.createdAt);
|
|
577
|
+
return byCreated || (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
|
|
571
578
|
}
|
|
572
579
|
function detectTies(candidates) {
|
|
573
580
|
const groups = new Map();
|
|
@@ -578,10 +585,15 @@ function detectTies(candidates) {
|
|
|
578
585
|
return Array.from(groups.values()).filter((group) => group.length > 1);
|
|
579
586
|
}
|
|
580
587
|
function mergePolicy(policy = {}) {
|
|
588
|
+
// NOTE: `policy.criteria` (string[]) is intentionally NOT carried here. A
|
|
589
|
+
// whole-repo grep shows it has no read points — scoring reads each score's
|
|
590
|
+
// own `input.criteria` (Record<string, number>), not this list. Emitting a
|
|
591
|
+
// default `criteria: []` advertised a guarantee the code never honored and
|
|
592
|
+
// could silently drift, so it is dropped. The field stays OPTIONAL on
|
|
593
|
+
// CandidateScoringPolicy / CandidateRanking.policy for forward-compat input.
|
|
581
594
|
return {
|
|
582
595
|
id: policy.id || "cw.candidate.default",
|
|
583
596
|
title: policy.title || "Default Candidate Scoring",
|
|
584
|
-
criteria: policy.criteria || [],
|
|
585
597
|
requireEvidence: policy.requireEvidence ?? true,
|
|
586
598
|
requireVerifierGate: policy.requireVerifierGate ?? true,
|
|
587
599
|
minNormalized: policy.minNormalized,
|
|
@@ -591,9 +603,9 @@ function mergePolicy(policy = {}) {
|
|
|
591
603
|
function verdictFor(normalized, policy) {
|
|
592
604
|
if (policy.minNormalized !== undefined && normalized < policy.minNormalized)
|
|
593
605
|
return "fail";
|
|
594
|
-
if (normalized >=
|
|
606
|
+
if (normalized >= VERDICT_PASS_THRESHOLD)
|
|
595
607
|
return "pass";
|
|
596
|
-
if (normalized >=
|
|
608
|
+
if (normalized >= VERDICT_WARN_THRESHOLD)
|
|
597
609
|
return "warn";
|
|
598
610
|
return "fail";
|
|
599
611
|
}
|
|
@@ -616,18 +628,26 @@ function indexPath(run) {
|
|
|
616
628
|
function rankingPath(run) {
|
|
617
629
|
return node_path_1.default.join(candidateRoot(run), "ranking.json");
|
|
618
630
|
}
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
+
// Deterministic candidate id (FreeBSD-audit L12/L13): the candidate's POSITION in
|
|
632
|
+
// the run's candidate set, qualified by kind + seed (a stable worker/task/result
|
|
633
|
+
// id) for readability. No wall-clock stamp, no PRNG suffix — re-running the same
|
|
634
|
+
// workflow mints byte-identical candidate ids, keeping fingerprints replay-stable.
|
|
635
|
+
function createCandidateId(run, kind, seed) {
|
|
636
|
+
const seq = (run.candidates || []).length + 1;
|
|
637
|
+
return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${String(seq).padStart(4, "0")}`;
|
|
638
|
+
}
|
|
639
|
+
// Deterministic score id (FreeBSD-audit L12/L13): the score's POSITION within its
|
|
640
|
+
// candidate's score list. Scores only ever append, so the sequence is unique per
|
|
641
|
+
// candidate and stable across replays.
|
|
642
|
+
function createScoreId(candidate) {
|
|
643
|
+
const seq = (candidate.scores || []).length + 1;
|
|
644
|
+
return `score-${(0, state_1.safeFileName)(candidate.id)}-${String(seq).padStart(4, "0")}`;
|
|
645
|
+
}
|
|
646
|
+
// Deterministic selection id (FreeBSD-audit L12/L13): the selection's POSITION in
|
|
647
|
+
// the run's append-only selection log. No clock, no PRNG.
|
|
648
|
+
function createSelectionId(run, candidateId) {
|
|
649
|
+
const seq = (run.candidateSelections || []).length + 1;
|
|
650
|
+
return `selection-${(0, state_1.safeFileName)(candidateId)}-${String(seq).padStart(4, "0")}`;
|
|
631
651
|
}
|
|
632
652
|
function shouldPersist(options) {
|
|
633
653
|
return options.persist !== false;
|