npm - cool-workflow - Versions diffs - 0.1.79 → 0.1.81 - Mend

cool-workflow 0.1.79 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/README.md +51 -3
package/apps/architecture-review/app.json +1 -1
package/apps/architecture-review-fast/app.json +64 -0
package/apps/architecture-review-fast/workflow.js +153 -0
package/apps/end-to-end-golden-path/app.json +1 -1
package/apps/pr-review-fix-ci/app.json +1 -1
package/apps/release-cut/app.json +1 -1
package/apps/research-synthesis/app.json +1 -1
package/dist/agent-config.js +21 -7
package/dist/candidate-scoring.js +42 -22
package/dist/capability-core.js +132 -17
package/dist/capability-registry.js +138 -168
package/dist/cli.js +97 -98
package/dist/collaboration.js +5 -6
package/dist/commit.js +20 -6
package/dist/compare.js +18 -0
package/dist/coordinator/classify.js +45 -0
package/dist/coordinator/paths.js +42 -0
package/dist/coordinator/util.js +129 -0
package/dist/coordinator.js +127 -300
package/dist/dispatch.js +35 -0
package/dist/drive.js +79 -6
package/dist/error-feedback.js +8 -4
package/dist/evidence-reasoning.js +3 -3
package/dist/execution-backend/agent.js +331 -0
package/dist/execution-backend/probes.js +96 -0
package/dist/execution-backend/util.js +47 -0
package/dist/execution-backend.js +73 -421
package/dist/mcp-server.js +79 -183
package/dist/multi-agent/graph.js +84 -0
package/dist/multi-agent/helpers.js +145 -0
package/dist/multi-agent/paths.js +22 -0
package/dist/multi-agent-eval/format.js +194 -0
package/dist/multi-agent-eval/normalize.js +51 -0
package/dist/multi-agent-eval.js +39 -244
package/dist/multi-agent-host.js +0 -19
package/dist/multi-agent.js +125 -314
package/dist/node-snapshot.js +3 -3
package/dist/observability/format.js +61 -0
package/dist/observability/intake.js +98 -0
package/dist/observability.js +14 -160
package/dist/operator-ux/format.js +364 -0
package/dist/operator-ux.js +22 -363
package/dist/orchestrator/lifecycle-operations.js +2 -1
package/dist/orchestrator/report.js +8 -0
package/dist/orchestrator.js +26 -9
package/dist/reclamation.js +26 -21
package/dist/run-export.js +494 -25
package/dist/run-registry/derive.js +172 -0
package/dist/run-registry/format.js +124 -0
package/dist/run-registry/gc.js +251 -0
package/dist/run-registry/policy.js +16 -0
package/dist/run-registry/queue.js +116 -0
package/dist/run-registry.js +89 -597
package/dist/run-state-schema.js +1 -0
package/dist/sandbox-profile.js +43 -2
package/dist/state-explosion/format.js +159 -0
package/dist/state-explosion/helpers.js +82 -0
package/dist/state-explosion.js +165 -304
package/dist/state-node.js +19 -4
package/dist/telemetry-attestation.js +55 -0
package/dist/telemetry-demo.js +15 -3
package/dist/telemetry-ledger.js +60 -15
package/dist/topology.js +25 -8
package/dist/triggers.js +33 -14
package/dist/trust-audit.js +145 -33
package/dist/version.js +1 -1
package/dist/worker-isolation/helpers.js +51 -0
package/dist/worker-isolation/paths.js +46 -0
package/dist/worker-isolation.js +39 -115
package/docs/agent-delegation-drive.7.md +71 -0
package/docs/canonical-workflow-apps.7.md +37 -0
package/docs/cli-mcp-parity.7.md +16 -0
package/docs/contract-migration-tooling.7.md +6 -0
package/docs/control-plane-scheduling.7.md +6 -0
package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
package/docs/durable-state-and-locking.7.md +8 -0
package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
package/docs/execution-backends.7.md +6 -0
package/docs/index.md +2 -0
package/docs/launch/demo.tape +28 -0
package/docs/launch/launch-kit.md +96 -17
package/docs/launch/pre-launch-checklist.md +53 -0
package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
package/docs/multi-agent-eval-replay-harness.7.md +6 -0
package/docs/multi-agent-operator-ux.7.md +6 -0
package/docs/multi-agent-trust-policy-audit.7.md +27 -0
package/docs/node-snapshot-diff-replay.7.md +6 -0
package/docs/observability-cost-accounting.7.md +6 -0
package/docs/project-index.md +27 -6
package/docs/real-execution-backends.7.md +6 -0
package/docs/release-and-migration.7.md +8 -0
package/docs/release-tooling.7.md +6 -0
package/docs/routines.md +23 -0
package/docs/run-registry-control-plane.7.md +89 -2
package/docs/run-retention-reclamation.7.md +8 -0
package/docs/source-context-profiles.7.md +119 -0
package/docs/state-explosion-management.7.md +13 -0
package/docs/team-collaboration.7.md +6 -0
package/docs/trust-model.md +267 -0
package/docs/unix-principles.md +49 -1
package/docs/vendor-manifest-loadability.7.md +43 -0
package/docs/web-desktop-workbench.7.md +6 -0
package/manifest/plugin.manifest.json +1 -1
package/manifest/source-context-profiles.json +142 -0
package/package.json +4 -1
package/scripts/agents/builtin-templates.json +7 -0
package/scripts/agents/claude-p-agent.js +129 -43
package/scripts/architecture-review-fast.js +362 -0
package/scripts/bump-version.js +5 -10
package/scripts/canonical-apps-list.js +64 -0
package/scripts/canonical-apps.js +36 -4
package/scripts/coverage-gate.js +211 -0
package/scripts/dogfood-release.js +1 -1
package/scripts/golden-path.js +4 -4
package/scripts/parity-check.js +5 -0
package/scripts/release-check.js +5 -1
package/scripts/source-context.js +291 -0
package/scripts/version-sync-check.js +5 -7
package/skills/ci-triage/SKILL.md +50 -0
package/skills/ci-triage/agents/openai.yaml +4 -0
package/skills/cool-workflow/SKILL.md +4 -1
package/skills/deploy-check/SKILL.md +55 -0
package/skills/deploy-check/agents/openai.yaml +4 -0
package/skills/design-qa/SKILL.md +49 -0
package/skills/design-qa/agents/openai.yaml +4 -0
package/skills/pr-review/SKILL.md +45 -0
package/skills/pr-review/agents/openai.yaml +4 -0
package/dist/capability-dispatcher.js +0 -86

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "cool-workflow",
   "description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": {
     "name": "COOLWHITE LLC"
   },

package/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cool-workflow",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
   "author": {
     "name": "COOLWHITE LLC"

package/README.md CHANGED Viewed

@@ -263,6 +263,40 @@ write paths, command execution, network access, and environment exposure. CW
 stores and validates the policy, while the agent host enforces OS/process
 runtime controls. See [docs/sandbox-profiles.7.md](docs/sandbox-profiles.7.md).
+## Quickstart
+**30-second proof, no install** — see that a recorded telemetry verdict can't be forged:
+```bash
+npx cool-workflow demo tamper
+# builds a signed ed25519 ledger, forges it 2 ways, both caught offline
+# -> VERDICT: tamper-evidence holds ✓
+```
+**Try a real run** — no clone needed; drive an architecture review with your own agent:
+```bash
+npx cool-workflow quickstart architecture-review --repo /path/to/repo \
+  --question "Is this architecture sound?" --agent-command builtin:claude
+```
+CW DELEGATES worker execution to your own agent. With no `--agent-command` (or
+`CW_AGENT_COMMAND`) the drive fails closed (status `blocked`) — it never fabricates a
+result. `--agent-command builtin:claude` resolves to a bundled read-only `claude -p`
+wrapper (needs `claude` on your PATH).
+**Re-prove a finished run, offline** (`cw` is the installed bin; or `npx cool-workflow <cmd>`):
+```bash
+cw telemetry verify <run-id>                  # re-checks the hash-chained ledger
+cw telemetry verify <run-id> --pubkey pub.pem # also re-runs ed25519 signature checks
+cw audit verify <run-id>                      # re-proves the trust-audit hash chain
+```
+More: `cw quickstart <app> --preview` (read-only dry run), `cw run resume <run-id> --drive`
+(continue an interrupted run), `cw run inspect-archive <archive>` (integrity-check a
+portable run archive without importing it).
 ## Structure
 ```text
@@ -300,6 +334,10 @@ cool-workflow
 ## Commands
+Installed via npm, the bin is `cw` (alias `cool-workflow`): e.g. `cw list`,
+`cw quickstart …`. From a cloned source checkout, before `npm run build`, use the
+equivalent `node scripts/cw.js <cmd>` form shown in the examples below.
 List bundled workflows:
 ```bash
@@ -619,7 +657,7 @@ Replaces the linear migration chain with a BFS graph path resolver (`findMigrati
 ## Vendor-Adapter Registry (v0.1.47)
-Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data.
+Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data. Cross-vendor is proven by boot, not just by generation: `npm run manifest:load-check` (`node test/vendor-manifest-load-smoke.js`) loads every generated manifest (claude, codex, agents, gemini, opencode) and asserts each exposes the full tool surface (184 tools).
 ## P2 Fixes (v0.1.48)
@@ -649,8 +687,18 @@ The orchestration vision landed in one release, all reviewer-gated:
 `--agent-command builtin:claude` resolves to a bundled read-only claude wrapper that completes workers with a real agent; the cross-directory quickstart crash is fixed; missing optional inputs no longer leak `{{name}}` into prompts. Published to npm (`cool-workflow`, bins `cw`/`cool-workflow`) with LICENSE and metadata. Live dogfood proof committed under `docs/dogfood/`.
-## Tamper-evidence demo (on main, ships next)
+## Tamper-evidence demo (v0.1.79)
-`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` is the operator-facing half (`cw_telemetry_verify` on MCP).
+`cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` (`cw_telemetry_verify` on MCP) is the operator-facing re-proof: by default it recomputes the hash chain on disk so any later edit to a recorded verdict or usage digest is caught; add `--pubkey <pem-or-path>` to re-run each `attested` hop's ed25519 signature check against the stored raw usage too. What this does and does **not** prove — including the single-keyholder ceiling — is documented honestly in [Trust Model & Limitations](docs/trust-model.md); read it before relying on a green verdict.
+## Opt-in live agent output during a drive (on main, ships next)
+Set `CW_AGENT_STREAM=1` to see each worker's live agent trace. The bundled claude wrapper (`builtin:claude` / `scripts/agents/claude-p-agent.js`) keeps the legacy `--output-format json` path by default; only the opt-in path runs claude in `--output-format stream-json` and renders a concise human trace (tool uses, assistant text, per-turn summaries) to **stderr**. CW core forwards that stderr to the operator's terminal only when `CW_AGENT_STREAM=1`, CW's own stderr is a TTY, and `CW_NO_STREAM` is not set; piped/CI runs stay silent (Rule of Silence). Core only forwards the stream, never parses it — vendor-specific rendering is the wrapper's concern (policy), not the kernel's (mechanism).
 v0.1.79
+## Fast Architecture Review (v0.1.80)
+Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
+_This documentation tracks Cool Workflow v0.1.81. See [CHANGELOG](../../CHANGELOG.md) for the release notes._

package/apps/architecture-review/app.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "id": "architecture-review",
   "title": "Architecture Review",
   "summary": "Map a repository architecture, assess risks, verify important findings, and synthesize an evidence-backed verdict.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": "COOLWHITE LLC",
   "inputs": [
     {

package/apps/architecture-review-fast/app.json ADDED Viewed

@@ -0,0 +1,64 @@
+{
+  "schemaVersion": 1,
+  "id": "architecture-review-fast",
+  "title": "Architecture Review Fast",
+  "summary": "Run a shorter architecture review with parallel map and assess phases for faster first results.",
+  "version": "0.1.81",
+  "author": "COOLWHITE LLC",
+  "inputs": [
+    {
+      "name": "repo",
+      "type": "path",
+      "required": true,
+      "description": "Repository path to inspect."
+    },
+    {
+      "name": "question",
+      "type": "string",
+      "required": true,
+      "description": "Architecture question or decision to review."
+    },
+    {
+      "name": "invariant",
+      "type": "string",
+      "repeated": true,
+      "description": "Invariant that must remain true."
+    },
+    {
+      "name": "focus",
+      "type": "string",
+      "description": "Optional subsystem, risk area, or file path to emphasize.",
+      "default": "the highest-risk runtime and operator paths"
+    },
+    {
+      "name": "sourceContext",
+      "type": "path",
+      "description": "Optional JSONL source context file generated by scripts/source-context.js export.",
+      "default": ""
+    },
+    {
+      "name": "sourceContextDigest",
+      "type": "string",
+      "description": "Optional digest or cache key for the supplied source context.",
+      "default": ""
+    }
+  ],
+  "sandboxProfiles": [
+    "readonly"
+  ],
+  "compatibility": {
+    "minVersion": "0.1.79",
+    "workflowSchemaVersion": 1,
+    "notes": "Opt-in fast architecture review app; the full architecture-review app remains unchanged."
+  },
+  "metadata": {
+    "canonical": true,
+    "domain": "software-architecture",
+    "mode": "fast",
+    "fullReviewApp": "architecture-review",
+    "maintainedAs": "official-userland"
+  },
+  "workflow": {
+    "entrypoint": "workflow.js"
+  }
+}

package/apps/architecture-review-fast/workflow.js ADDED Viewed

@@ -0,0 +1,153 @@
+const FAST_MODEL = modelHint("CW_ARCHITECTURE_REVIEW_FAST_MODEL");
+const STRONG_MODEL = modelHint("CW_ARCHITECTURE_REVIEW_STRONG_MODEL");
+module.exports = ({ workflow, phase, parallel, agent, artifact, input }) => {
+  const inputs = [
+    input("repo", {
+      type: "path",
+      required: true,
+      description: "Repository path to inspect."
+    }),
+    input("question", {
+      type: "string",
+      required: true,
+      description: "Architecture question or decision to review."
+    }),
+    input("invariant", {
+      type: "string",
+      repeated: true,
+      description: "Invariant that must remain true."
+    }),
+    input("focus", {
+      type: "string",
+      description: "Optional subsystem, risk area, or file path to emphasize.",
+      default: "the highest-risk runtime and operator paths"
+    }),
+    input("sourceContext", {
+      type: "path",
+      description: "Optional JSONL source context file generated by scripts/source-context.js export.",
+      default: ""
+    }),
+    input("sourceContextDigest", {
+      type: "string",
+      description: "Optional digest or cache key for the supplied source context.",
+      default: ""
+    })
+  ];
+  return workflow({
+    id: "architecture-review-fast",
+    title: "Architecture Review Fast",
+    summary: "Run a shorter architecture review with parallel map and assess phases for faster first results.",
+    limits: {
+      maxAgents: 12,
+      maxConcurrentAgents: 4
+    },
+    inputs,
+    sandboxProfiles: ["readonly"],
+    metadata: {
+      mode: "fast",
+      fullReviewApp: "architecture-review"
+    },
+    phases: [
+      parallel("Map", [
+        agent(
+          "map:runtime-surface",
+          [
+            "Fast-map the runtime architecture in {{repo}} for {{question}}.",
+            contextInstruction(),
+            "Focus: {{focus}}. Invariants: {{invariant}}.",
+            "Return the primary entrypoints, state stores, execution paths, and the exact files or commands inspected."
+          ].join(" "),
+          fastOptions("Runtime surface mapper", { resultCache: sourceContextResultCache() })
+        ),
+        agent(
+          "map:operator-surface",
+          [
+            "Fast-map operator, CI, deployment, test, release, and background-job surfaces in {{repo}} for {{question}}.",
+            contextInstruction(),
+            "Focus: {{focus}}. Return concrete files, scripts, configs, missing areas, and candidate runtime bottlenecks."
+          ].join(" "),
+          fastOptions("Operator surface mapper", { resultCache: sourceContextResultCache() })
+        )
+      ]),
+      parallel("Assess", [
+        agent(
+          "assess:risks",
+          [
+            "Assess the fast map for real P0/P1/P2 architecture and correctness risks.",
+            "Separate confirmed risks, conditional risks, non-issues, and unknowns.",
+            "Tie every important claim to inspected evidence and the invariants {{invariant}}."
+          ].join(" "),
+          fastOptions("Risk assessor", { resultCache: sourceContextResultCache({ includeCompletedResults: "previous-phases" }) })
+        ),
+        agent(
+          "assess:runtime-speed",
+          [
+            "Assess runtime speed and user-wait risk for {{question}}.",
+            "Look for serial agent work, repeated repository scanning, missing cache keys, oversized prompts, and long foreground jobs.",
+            "Recommend mechanisms that preserve POLA, stdout/stderr discipline, and zero runtime dependencies."
+          ].join(" "),
+          fastOptions("Runtime speed assessor", { resultCache: sourceContextResultCache({ includeCompletedResults: "previous-phases" }) })
+        )
+      ]),
+      phase("Verify", [
+        agent(
+          "verify:p0-p2-risks",
+          [
+            "Re-open evidence for every candidate P0/P1/P2 risk from the fast assessment.",
+            "Confirm real risks, downgrade unsupported claims, and list exact file paths, commands, logs, or unknowns.",
+            "The cw:result evidence array must cite durable locators."
+          ].join(" "),
+          strongOptions("Evidence verifier", { requiresEvidence: true })
+        )
+      ]),
+      phase("Verdict", [
+        artifact(
+          "verdict:fast-synthesis",
+          [
+            "Synthesize a fast architecture verdict for {{question}}.",
+            "Include a short answer, compact architecture map, ranked risks, speed recommendations, non-issues, and evidence links.",
+            "State when the full architecture-review app should be scheduled as a background routine.",
+            "The cw:result evidence array must support the final verdict."
+          ].join(" "),
+          strongOptions("Fast verdict synthesizer", { requiresEvidence: true })
+        )
+      ])
+    ]
+  });
+};
+function fastOptions(label, extra) {
+  return taskOptions(label, FAST_MODEL, extra);
+}
+function strongOptions(label, extra) {
+  return taskOptions(label, STRONG_MODEL, extra);
+}
+function taskOptions(label, model, extra) {
+  return {
+    label,
+    sandboxProfileId: "readonly",
+    ...(model ? { model } : {}),
+    ...(extra || {})
+  };
+}
+function modelHint(name) {
+  const value = String(process.env[name] || "").trim();
+  return value || undefined;
+}
+function contextInstruction() {
+  return [
+    "If {{sourceContext}} is non-empty, read that JSONL source context first and treat {{sourceContextDigest}} as its cache/digest hint.",
+    "If the supplied context is missing, unreadable, or obviously stale, say so explicitly instead of guessing.",
+    "If no source context is supplied, inspect {{repo}} directly."
+  ].join(" ");
+}
+function sourceContextResultCache(extra) {
+  return { mode: "read-write", keyInput: "sourceContextDigest", ...(extra || {}) };
+}

package/apps/end-to-end-golden-path/app.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "id": "end-to-end-golden-path",
   "title": "End-to-End Golden Path",
   "summary": "Deterministic one-worker workflow app for proving the CW integration chain.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": "COOLWHITE LLC",
   "inputs": [
     {

package/apps/pr-review-fix-ci/app.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "id": "pr-review-fix-ci",
   "title": "PR Review Fix CI",
   "summary": "Review a pull request or branch, inspect CI failures, diagnose actionable issues, optionally patch, verify, and summarize with evidence.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": "COOLWHITE LLC",
   "inputs": [
     {

package/apps/release-cut/app.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "id": "release-cut",
   "title": "Release Cut",
   "summary": "Prepare a release with checklist discipline: version checks, changelog, tests, packaging, release notes, and final verification.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": "COOLWHITE LLC",
   "inputs": [
     {

package/apps/research-synthesis/app.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "id": "research-synthesis",
   "title": "Research Synthesis",
   "summary": "Split a research question into claims, investigate sources, cross-check evidence, verify claims, and synthesize a concise answer.",
-  "version": "0.1.79",
+  "version": "0.1.81",
   "author": "COOLWHITE LLC",
   "inputs": [
     {

package/dist/agent-config.js CHANGED Viewed

@@ -139,18 +139,30 @@ function agentConfigFromArgs(args) {
 // npx/global install, where $(pwd)-relative paths don't exist) can configure a
 // WORKING agent without knowing where the package landed on disk:
 //   --agent-command builtin:claude   (or CW_AGENT_COMMAND=builtin:claude)
-// resolves to the packaged claude wrapper invocation. Still pure config — the
-// template is an out-of-process delegation script; CW never calls a model API.
-const BUILTIN_AGENT_TEMPLATES = {
-    claude: `node ${node_path_1.default.join(__dirname, "..", "scripts", "agents", "claude-p-agent.js")} {{input}} {{result}}`
-};
+// resolves to the packaged wrapper invocation. Still pure config — the template
+// is an out-of-process delegation script; CW never calls a model API.
+//
+// The builtin set is DATA, not a kernel TS literal (FreeBSD-audit L15): it lives
+// in scripts/agents/builtin-templates.json (vendor name -> wrapper script name).
+// Adding a vendor is a content/distribution step (drop a wrapper + a JSON line),
+// not a kernel edit — keeping CW vendor-agnostic at the source level.
+function builtinAgentTemplates() {
+    const agentsDir = node_path_1.default.join(__dirname, "..", "scripts", "agents");
+    const manifest = JSON.parse(node_fs_1.default.readFileSync(node_path_1.default.join(agentsDir, "builtin-templates.json"), "utf8"));
+    const out = {};
+    for (const [name, script] of Object.entries(manifest.templates || {})) {
+        out[name] = `node ${node_path_1.default.join(agentsDir, script)} {{input}} {{result}}`;
+    }
+    return out;
+}
 function expandBuiltinAgentCommand(command) {
     if (!command || !command.startsWith("builtin:"))
         return command;
     const name = command.slice("builtin:".length).trim();
-    const template = BUILTIN_AGENT_TEMPLATES[name];
+    const templates = builtinAgentTemplates();
+    const template = templates[name];
     if (!template) {
-        throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(BUILTIN_AGENT_TEMPLATES).join(", ")}`);
+        throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(templates).join(", ")}`);
     }
     return template;
 }
@@ -200,6 +212,8 @@ function setAgentConfigFile(patch, env = process.env) {
         endpoint: firstDefined(incoming.endpoint, current.endpoint),
         model: firstDefined(incoming.model, current.model),
         timeoutMs: firstDefined(incoming.timeoutMs, current.timeoutMs),
+        attestPublicKey: firstDefined(incoming.attestPublicKey, current.attestPublicKey),
+        requireAttestedTelemetry: firstDefined(incoming.requireAttestedTelemetry, current.requireAttestedTelemetry),
         source: "file"
     };
     const stored = redacted(merged);

package/dist/candidate-scoring.js CHANGED Viewed

@@ -20,7 +20,14 @@ const state_1 = require("./state");
 const state_node_1 = require("./state-node");
 const trust_audit_1 = require("./trust-audit");
 const collaboration_1 = require("./collaboration");
+const compare_1 = require("./compare");
 exports.CANDIDATE_SCHEMA_VERSION = 1;
+/** Verdict thresholds on a score's normalized value [0,1], declared once so the
+ *  numbers carry intent instead of being buried as literals in verdictFor(). A
+ *  normalized score at-or-above PASS is "pass"; at-or-above WARN (but below
+ *  PASS) is "warn"; anything lower is "fail". Same numbers as before. */
+const VERDICT_PASS_THRESHOLD = 0.7;
+const VERDICT_WARN_THRESHOLD = 0.4;
 function createCandidateScoring(options = {}) {
     return {
         registerCandidate: (run, input) => registerCandidate(run, input, options),
@@ -39,7 +46,7 @@ function registerCandidate(run, input, options = {}) {
     if (existing)
         return existing;
     const now = new Date().toISOString();
-    const id = input.id || createCandidateId(input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
+    const id = input.id || createCandidateId(run, input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
     const candidate = {
         schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
         id,
@@ -109,7 +116,7 @@ function getCandidate(run, candidateId) {
 }
 function scoreCandidate(run, candidateId, input, options = {}) {
     const candidate = requireCandidate(run, candidateId);
-    const scoreId = input.id || createScoreId(candidateId);
+    const scoreId = input.id || createScoreId(candidate);
     const evidence = (0, trust_audit_1.normalizeEvidence)(run, input.evidence || [], {
         source: "operator-recorded",
         candidateId,
@@ -279,7 +286,7 @@ function selectCandidate(run, candidateId, options = {}, scoringOptions = {}) {
     const now = new Date().toISOString();
     const selection = {
         schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
-        id: createSelectionId(candidateId),
+        id: createSelectionId(run, candidateId),
         runId: run.id,
         candidateId,
         selectedAt: now,
@@ -558,16 +565,16 @@ function inferCandidateKind(input) {
     return "manual";
 }
 function bestScore(scores) {
-    return [...scores].sort((left, right) => right.normalized - left.normalized || left.createdAt.localeCompare(right.createdAt))[0];
+    return [...scores].sort((left, right) => right.normalized - left.normalized || (0, compare_1.compareBytes)(left.createdAt, right.createdAt))[0];
 }
 function compareRows(left, right, policy) {
     const byScore = right.normalized - left.normalized;
     if (byScore !== 0)
         return byScore;
     if (policy.tieBreaker === "candidateId")
-        return left.candidate.id.localeCompare(right.candidate.id);
-    const byCreated = left.candidate.createdAt.localeCompare(right.candidate.createdAt);
-    return byCreated || left.candidate.id.localeCompare(right.candidate.id);
+        return (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
+    const byCreated = (0, compare_1.compareBytes)(left.candidate.createdAt, right.candidate.createdAt);
+    return byCreated || (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
 }
 function detectTies(candidates) {
     const groups = new Map();
@@ -578,10 +585,15 @@ function detectTies(candidates) {
     return Array.from(groups.values()).filter((group) => group.length > 1);
 }
 function mergePolicy(policy = {}) {
+    // NOTE: `policy.criteria` (string[]) is intentionally NOT carried here. A
+    // whole-repo grep shows it has no read points — scoring reads each score's
+    // own `input.criteria` (Record<string, number>), not this list. Emitting a
+    // default `criteria: []` advertised a guarantee the code never honored and
+    // could silently drift, so it is dropped. The field stays OPTIONAL on
+    // CandidateScoringPolicy / CandidateRanking.policy for forward-compat input.
     return {
         id: policy.id || "cw.candidate.default",
         title: policy.title || "Default Candidate Scoring",
-        criteria: policy.criteria || [],
         requireEvidence: policy.requireEvidence ?? true,
         requireVerifierGate: policy.requireVerifierGate ?? true,
         minNormalized: policy.minNormalized,
@@ -591,9 +603,9 @@ function mergePolicy(policy = {}) {
 function verdictFor(normalized, policy) {
     if (policy.minNormalized !== undefined && normalized < policy.minNormalized)
         return "fail";
-    if (normalized >= 0.7)
+    if (normalized >= VERDICT_PASS_THRESHOLD)
         return "pass";
-    if (normalized >= 0.4)
+    if (normalized >= VERDICT_WARN_THRESHOLD)
         return "warn";
     return "fail";
 }
@@ -616,18 +628,26 @@ function indexPath(run) {
 function rankingPath(run) {
     return node_path_1.default.join(candidateRoot(run), "ranking.json");
 }
-function createCandidateId(kind, seed) {
-    const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
-    const suffix = Math.random().toString(36).slice(2, 8);
-    return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${stamp}-${suffix}`;
-}
-function createScoreId(candidateId) {
-    const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
-    return `score-${(0, state_1.safeFileName)(candidateId)}-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
-}
-function createSelectionId(candidateId) {
-    const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
-    return `selection-${(0, state_1.safeFileName)(candidateId)}-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
+// Deterministic candidate id (FreeBSD-audit L12/L13): the candidate's POSITION in
+// the run's candidate set, qualified by kind + seed (a stable worker/task/result
+// id) for readability. No wall-clock stamp, no PRNG suffix — re-running the same
+// workflow mints byte-identical candidate ids, keeping fingerprints replay-stable.
+function createCandidateId(run, kind, seed) {
+    const seq = (run.candidates || []).length + 1;
+    return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${String(seq).padStart(4, "0")}`;
+}
+// Deterministic score id (FreeBSD-audit L12/L13): the score's POSITION within its
+// candidate's score list. Scores only ever append, so the sequence is unique per
+// candidate and stable across replays.
+function createScoreId(candidate) {
+    const seq = (candidate.scores || []).length + 1;
+    return `score-${(0, state_1.safeFileName)(candidate.id)}-${String(seq).padStart(4, "0")}`;
+}
+// Deterministic selection id (FreeBSD-audit L12/L13): the selection's POSITION in
+// the run's append-only selection log. No clock, no PRNG.
+function createSelectionId(run, candidateId) {
+    const seq = (run.candidateSelections || []).length + 1;
+    return `selection-${(0, state_1.safeFileName)(candidateId)}-${String(seq).padStart(4, "0")}`;
 }
 function shouldPersist(options) {
     return options.persist !== false;