cool-workflow 0.1.79 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +51 -3
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +64 -0
  6. package/apps/architecture-review-fast/workflow.js +153 -0
  7. package/apps/end-to-end-golden-path/app.json +1 -1
  8. package/apps/pr-review-fix-ci/app.json +1 -1
  9. package/apps/release-cut/app.json +1 -1
  10. package/apps/research-synthesis/app.json +1 -1
  11. package/dist/agent-config.js +21 -7
  12. package/dist/candidate-scoring.js +42 -22
  13. package/dist/capability-core.js +132 -17
  14. package/dist/capability-registry.js +138 -168
  15. package/dist/cli.js +97 -98
  16. package/dist/collaboration.js +5 -6
  17. package/dist/commit.js +20 -6
  18. package/dist/compare.js +18 -0
  19. package/dist/coordinator/classify.js +45 -0
  20. package/dist/coordinator/paths.js +42 -0
  21. package/dist/coordinator/util.js +129 -0
  22. package/dist/coordinator.js +127 -300
  23. package/dist/dispatch.js +35 -0
  24. package/dist/drive.js +79 -6
  25. package/dist/error-feedback.js +8 -4
  26. package/dist/evidence-reasoning.js +3 -3
  27. package/dist/execution-backend/agent.js +331 -0
  28. package/dist/execution-backend/probes.js +96 -0
  29. package/dist/execution-backend/util.js +47 -0
  30. package/dist/execution-backend.js +73 -421
  31. package/dist/mcp-server.js +79 -183
  32. package/dist/multi-agent/graph.js +84 -0
  33. package/dist/multi-agent/helpers.js +145 -0
  34. package/dist/multi-agent/paths.js +22 -0
  35. package/dist/multi-agent-eval/format.js +194 -0
  36. package/dist/multi-agent-eval/normalize.js +51 -0
  37. package/dist/multi-agent-eval.js +39 -244
  38. package/dist/multi-agent-host.js +0 -19
  39. package/dist/multi-agent.js +125 -314
  40. package/dist/node-snapshot.js +3 -3
  41. package/dist/observability/format.js +61 -0
  42. package/dist/observability/intake.js +98 -0
  43. package/dist/observability.js +14 -160
  44. package/dist/operator-ux/format.js +364 -0
  45. package/dist/operator-ux.js +22 -363
  46. package/dist/orchestrator/lifecycle-operations.js +2 -1
  47. package/dist/orchestrator/report.js +8 -0
  48. package/dist/orchestrator.js +26 -9
  49. package/dist/reclamation.js +26 -21
  50. package/dist/run-export.js +494 -25
  51. package/dist/run-registry/derive.js +172 -0
  52. package/dist/run-registry/format.js +124 -0
  53. package/dist/run-registry/gc.js +251 -0
  54. package/dist/run-registry/policy.js +16 -0
  55. package/dist/run-registry/queue.js +116 -0
  56. package/dist/run-registry.js +89 -597
  57. package/dist/run-state-schema.js +1 -0
  58. package/dist/sandbox-profile.js +43 -2
  59. package/dist/state-explosion/format.js +159 -0
  60. package/dist/state-explosion/helpers.js +82 -0
  61. package/dist/state-explosion.js +165 -304
  62. package/dist/state-node.js +19 -4
  63. package/dist/telemetry-attestation.js +55 -0
  64. package/dist/telemetry-demo.js +15 -3
  65. package/dist/telemetry-ledger.js +60 -15
  66. package/dist/topology.js +25 -8
  67. package/dist/triggers.js +33 -14
  68. package/dist/trust-audit.js +145 -33
  69. package/dist/version.js +1 -1
  70. package/dist/worker-isolation/helpers.js +51 -0
  71. package/dist/worker-isolation/paths.js +46 -0
  72. package/dist/worker-isolation.js +39 -115
  73. package/docs/agent-delegation-drive.7.md +71 -0
  74. package/docs/canonical-workflow-apps.7.md +37 -0
  75. package/docs/cli-mcp-parity.7.md +16 -0
  76. package/docs/contract-migration-tooling.7.md +6 -0
  77. package/docs/control-plane-scheduling.7.md +6 -0
  78. package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
  79. package/docs/durable-state-and-locking.7.md +8 -0
  80. package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
  81. package/docs/execution-backends.7.md +6 -0
  82. package/docs/index.md +2 -0
  83. package/docs/launch/demo.tape +28 -0
  84. package/docs/launch/launch-kit.md +96 -17
  85. package/docs/launch/pre-launch-checklist.md +53 -0
  86. package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
  87. package/docs/multi-agent-eval-replay-harness.7.md +6 -0
  88. package/docs/multi-agent-operator-ux.7.md +6 -0
  89. package/docs/multi-agent-trust-policy-audit.7.md +27 -0
  90. package/docs/node-snapshot-diff-replay.7.md +6 -0
  91. package/docs/observability-cost-accounting.7.md +6 -0
  92. package/docs/project-index.md +27 -6
  93. package/docs/real-execution-backends.7.md +6 -0
  94. package/docs/release-and-migration.7.md +8 -0
  95. package/docs/release-tooling.7.md +6 -0
  96. package/docs/routines.md +23 -0
  97. package/docs/run-registry-control-plane.7.md +89 -2
  98. package/docs/run-retention-reclamation.7.md +8 -0
  99. package/docs/source-context-profiles.7.md +119 -0
  100. package/docs/state-explosion-management.7.md +13 -0
  101. package/docs/team-collaboration.7.md +6 -0
  102. package/docs/trust-model.md +267 -0
  103. package/docs/unix-principles.md +49 -1
  104. package/docs/vendor-manifest-loadability.7.md +43 -0
  105. package/docs/web-desktop-workbench.7.md +6 -0
  106. package/manifest/plugin.manifest.json +1 -1
  107. package/manifest/source-context-profiles.json +142 -0
  108. package/package.json +4 -1
  109. package/scripts/agents/builtin-templates.json +7 -0
  110. package/scripts/agents/claude-p-agent.js +129 -43
  111. package/scripts/architecture-review-fast.js +362 -0
  112. package/scripts/bump-version.js +5 -10
  113. package/scripts/canonical-apps-list.js +64 -0
  114. package/scripts/canonical-apps.js +36 -4
  115. package/scripts/coverage-gate.js +211 -0
  116. package/scripts/dogfood-release.js +1 -1
  117. package/scripts/golden-path.js +4 -4
  118. package/scripts/parity-check.js +5 -0
  119. package/scripts/release-check.js +5 -1
  120. package/scripts/source-context.js +291 -0
  121. package/scripts/version-sync-check.js +5 -7
  122. package/skills/ci-triage/SKILL.md +50 -0
  123. package/skills/ci-triage/agents/openai.yaml +4 -0
  124. package/skills/cool-workflow/SKILL.md +4 -1
  125. package/skills/deploy-check/SKILL.md +55 -0
  126. package/skills/deploy-check/agents/openai.yaml +4 -0
  127. package/skills/design-qa/SKILL.md +49 -0
  128. package/skills/design-qa/agents/openai.yaml +4 -0
  129. package/skills/pr-review/SKILL.md +45 -0
  130. package/skills/pr-review/agents/openai.yaml +4 -0
  131. package/dist/capability-dispatcher.js +0 -86
package/dist/dispatch.js CHANGED
@@ -32,6 +32,12 @@ function createDispatchManifest(run, limit, options = {}) {
32
32
  const requestedSandboxProfileId = options.sandboxProfileId || options.sandbox;
33
33
  const sandboxProfileId = String(requestedSandboxProfileId || sandbox_profile_1.DEFAULT_SANDBOX_PROFILE_ID);
34
34
  (0, sandbox_profile_1.resolveSandboxProfileById)(sandboxProfileId, (0, sandbox_profile_1.sandboxContextForValidation)(run.cwd));
35
+ // H7: if the requested profile is a CUSTOM profile loaded from a FILE (non-bundled,
36
+ // existing file), persist its DEFINITION on run.customSandboxProfiles keyed by the
37
+ // definition's logical id. This makes the custom profile durable with run state so a
38
+ // worker boundary can re-resolve it by logical id after a scope snapshot is lost
39
+ // (re-resolving against the worker context, not the dispatch-time file path).
40
+ persistCustomSandboxProfile(run, sandboxProfileId);
35
41
  // Resolve the execution backend once (mechanism vs policy): the kernel records
36
42
  // WHICH backend was selected; it never branches on which one. Defaults to node
37
43
  // (behavior-preserving) when no `--backend` flag / CW_BACKEND env is set.
@@ -199,3 +205,32 @@ function createDispatchId() {
199
205
  const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
200
206
  return `dispatch-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
201
207
  }
208
+ // H7: persist a CUSTOM sandbox profile DEFINITION (loaded from a FILE at dispatch)
209
+ // onto run.customSandboxProfiles, keyed by the definition's logical id. Only fires
210
+ // for a non-bundled id that resolves to a readable, valid profile file. The
211
+ // resolveSandboxProfileById call above has already validated the file (it throws on
212
+ // invalid), so this re-parses only to recover the raw DEFINITION — we store the
213
+ // definition (not a resolved policy) so worker-specific path tokens re-bind to the
214
+ // correct worker context on every later re-resolve. Bundled ids and unknown ids are
215
+ // left untouched, so this never shadows a bundled profile or masks a fail-closed.
216
+ function persistCustomSandboxProfile(run, requested) {
217
+ if (!requested || (0, sandbox_profile_1.isBundledSandboxProfileId)(requested))
218
+ return;
219
+ const absolute = node_path_1.default.resolve(requested);
220
+ if (!node_fs_1.default.existsSync(absolute) || !node_fs_1.default.statSync(absolute).isFile())
221
+ return;
222
+ const validation = (0, sandbox_profile_1.validateSandboxProfileFile)(requested, (0, sandbox_profile_1.sandboxContextForValidation)(run.cwd));
223
+ if (!validation.valid || !validation.profile)
224
+ return;
225
+ let definition;
226
+ try {
227
+ definition = JSON.parse(node_fs_1.default.readFileSync(absolute, "utf8"));
228
+ }
229
+ catch {
230
+ return;
231
+ }
232
+ if (!definition || typeof definition !== "object" || typeof definition.id !== "string" || !definition.id)
233
+ return;
234
+ run.customSandboxProfiles = run.customSandboxProfiles || {};
235
+ run.customSandboxProfiles[definition.id] = definition;
236
+ }
package/dist/drive.js CHANGED
@@ -31,12 +31,15 @@ exports.driveConcurrentRound = driveConcurrentRound;
31
31
  exports.drive = drive;
32
32
  exports.drivePreview = drivePreview;
33
33
  const node_fs_1 = __importDefault(require("node:fs"));
34
+ const node_path_1 = __importDefault(require("node:path"));
34
35
  const dispatch_1 = require("./dispatch");
35
36
  const execution_backend_1 = require("./execution-backend");
36
37
  const worker_isolation_1 = require("./worker-isolation");
37
38
  const agent_config_1 = require("./agent-config");
38
39
  const scheduling_1 = require("./scheduling");
39
40
  const observability_1 = require("./observability");
41
+ const state_1 = require("./state");
42
+ const compare_1 = require("./compare");
40
43
  exports.DRIVE_SCHEMA_VERSION = 1;
41
44
  /** The task the next drive step would advance: a RUNNING (already-dispatched,
42
45
  * awaiting fulfillment / retry) task first, else the next PENDING task in the
@@ -198,21 +201,39 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
198
201
  // Progress BEFORE the (possibly multi-minute) agent spawn, so a live drive shows
199
202
  // immediate activity instead of a long silence on the first worker. task.label
200
203
  // is the human-facing display name; the id stays the stable reference.
201
- emitProgress(`→ ${selected.label || selected.id} (${selected.phase}) — ${dispatched ? "dispatched, " : ""}spawning agent, may take minutes…`);
202
204
  const promptDigest = node_fs_1.default.existsSync(manifest.inputPath) ? (0, execution_backend_1.sha256)(node_fs_1.default.readFileSync(manifest.inputPath, "utf8")) : (0, execution_backend_1.sha256)(manifest.prompt || "");
205
+ const cachePath = resultCachePath(run, selected, (0, execution_backend_1.sha256)(selected.prompt));
206
+ if (cachePath && node_fs_1.default.existsSync(cachePath)) {
207
+ emitProgress(`↺ ${selected.label || selected.id} (${selected.phase}) — accepting cached result`);
208
+ try {
209
+ node_fs_1.default.writeFileSync(manifest.resultPath, node_fs_1.default.readFileSync(cachePath, "utf8"), "utf8");
210
+ runner.recordWorkerOutput(runId, workerId, manifest.resultPath, {});
211
+ }
212
+ catch (error) {
213
+ return handleHop(ctx, selected, workerId, `result cache rejected: ${error instanceof Error ? error.message : String(error)}`);
214
+ }
215
+ return step("accept", "ok", {
216
+ runId,
217
+ taskId: selected.id,
218
+ phase: selected.phase,
219
+ handleKind: "result-cache",
220
+ reason: "result cache hit"
221
+ });
222
+ }
223
+ emitProgress(`→ ${selected.label || selected.id} (${selected.phase}) — ${dispatched ? "dispatched, " : ""}spawning agent, may take minutes…`);
203
224
  const envelope = (0, execution_backend_1.runBackend)(buildAgentRequest(ctx, run, selected, manifest, preparedOutcome));
204
225
  const handle = envelope.provenance.handle;
205
226
  const reportedModel = handle?.metadata?.reportedModel || "unreported";
206
227
  const reportedUsage = handle?.metadata?.reportedUsage;
207
228
  const usageSignature = handle?.metadata?.usageSignature;
208
229
  if (envelope.status !== "completed") {
209
- return handleHop(ctx, selected, workerId, `agent hop ${envelope.status}: ${envelope.result.summary}`, dispatched);
230
+ return handleHop(ctx, selected, workerId, `agent hop ${envelope.status}: ${envelope.result.summary}`);
210
231
  }
211
232
  // 3. ACCEPT — the SEPARATE recordWorkerOutput layer validates + records result.md.
212
233
  // A missing result.md is a failed hop (pre-checked so no terminal side effect);
213
234
  // an invalid result.md throws at validation BEFORE any state mutation.
214
235
  if (!manifest.resultPath || !node_fs_1.default.existsSync(manifest.resultPath)) {
215
- return handleHop(ctx, selected, workerId, "agent produced no result.md", dispatched);
236
+ return handleHop(ctx, selected, workerId, "agent produced no result.md");
216
237
  }
217
238
  try {
218
239
  runner.recordWorkerOutput(runId, workerId, manifest.resultPath, {
@@ -234,7 +255,10 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
234
255
  });
235
256
  }
236
257
  catch (error) {
237
- return handleHop(ctx, selected, workerId, `result.md rejected: ${error instanceof Error ? error.message : String(error)}`, dispatched);
258
+ return handleHop(ctx, selected, workerId, `result.md rejected: ${error instanceof Error ? error.message : String(error)}`);
259
+ }
260
+ if (cachePath && manifest.resultPath && node_fs_1.default.existsSync(manifest.resultPath)) {
261
+ writeResultCache(cachePath, node_fs_1.default.readFileSync(manifest.resultPath, "utf8"));
238
262
  }
239
263
  return step("accept", "ok", {
240
264
  runId,
@@ -245,6 +269,53 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
245
269
  reportedModel
246
270
  });
247
271
  }
272
+ function resultCachePath(run, task, promptDigest) {
273
+ const policy = task.resultCache;
274
+ if (!policy || policy.mode !== "read-write")
275
+ return undefined;
276
+ const keyInput = policy.keyInput;
277
+ const keyValue = keyInput ? String(run.inputs[keyInput] || "").trim() : "";
278
+ if (!keyInput || !keyValue)
279
+ return undefined;
280
+ const completedResultsDigest = completedResultsCacheDigest(run, task);
281
+ if (completedResultsDigest === undefined)
282
+ return undefined;
283
+ const digest = (0, execution_backend_1.sha256)(JSON.stringify({
284
+ schemaVersion: 1,
285
+ workflowId: run.workflow.id,
286
+ taskId: task.id,
287
+ keyInput,
288
+ keyValue,
289
+ promptDigest,
290
+ completedResultsDigest
291
+ })).replace(/^sha256:/, "");
292
+ return node_path_1.default.join(run.cwd, ".cw", "cache", "worker-results", (0, state_1.safeFileName)(run.workflow.id), `${(0, state_1.safeFileName)(task.id)}-${digest.slice(0, 32)}.md`);
293
+ }
294
+ function completedResultsCacheDigest(run, task) {
295
+ if (task.resultCache?.includeCompletedResults !== "previous-phases")
296
+ return "";
297
+ const phaseIndex = run.phases.findIndex((phase) => phase.name === task.phase || phase.id === task.phase);
298
+ if (phaseIndex < 0)
299
+ return undefined;
300
+ const previousTaskIds = new Set(run.phases.slice(0, phaseIndex).flatMap((phase) => phase.taskIds));
301
+ const records = run.tasks
302
+ .filter((candidate) => previousTaskIds.has(candidate.id))
303
+ .sort((a, b) => (0, compare_1.compareBytes)(a.id, b.id))
304
+ .map((candidate) => {
305
+ if (candidate.status !== "completed" || !candidate.resultPath || !node_fs_1.default.existsSync(candidate.resultPath))
306
+ return undefined;
307
+ return [candidate.id, (0, execution_backend_1.sha256)(node_fs_1.default.readFileSync(candidate.resultPath, "utf8"))];
308
+ });
309
+ if (records.some((record) => record === undefined))
310
+ return undefined;
311
+ return (0, execution_backend_1.sha256)(JSON.stringify(records));
312
+ }
313
+ function writeResultCache(file, content) {
314
+ node_fs_1.default.mkdirSync(node_path_1.default.dirname(file), { recursive: true });
315
+ const tmp = `${file}.${process.pid}.tmp`;
316
+ node_fs_1.default.writeFileSync(tmp, content, "utf8");
317
+ node_fs_1.default.renameSync(tmp, file);
318
+ }
248
319
  /** Advance ONE concurrent ROUND: fulfill up to `limit` ready tasks in the first
249
320
  * runnable phase as a single batch, recording results in DETERMINISTIC task
250
321
  * order (the existing phase/dispatch order) regardless of completion order — so
@@ -322,6 +393,9 @@ function prepareConcurrentOutcomes(ctx, batch) {
322
393
  continue;
323
394
  }
324
395
  const manifest = runner.showWorkerManifest(runId, workerId);
396
+ const cachePath = resultCachePath(run, task, (0, execution_backend_1.sha256)(task.prompt));
397
+ if (cachePath && node_fs_1.default.existsSync(cachePath))
398
+ continue;
325
399
  const job = (0, execution_backend_1.prepareAgentSpawn)(buildAgentRequest(ctx, run, task, manifest));
326
400
  if (job) {
327
401
  jobs.push(job);
@@ -338,7 +412,7 @@ function prepareConcurrentOutcomes(ctx, batch) {
338
412
  }
339
413
  /** A failed agent hop: charge one attempt and (reuse v0.1.37 retryOrPark) either
340
414
  * retry on the SAME worker scope next step, or PARK past the retry budget. */
341
- function handleHop(ctx, task, workerId, reason, dispatched) {
415
+ function handleHop(ctx, task, workerId, reason) {
342
416
  const persisted = ctx.runner.showWorker(ctx.runId, workerId).retryCount || 0;
343
417
  const prior = Math.max(ctx.attempts.get(task.id) || 0, persisted);
344
418
  const entry = {
@@ -371,7 +445,6 @@ function handleHop(ctx, task, workerId, reason, dispatched) {
371
445
  });
372
446
  }
373
447
  // Retryable: leave the task running (scope reused) for the next step.
374
- void dispatched;
375
448
  (0, worker_isolation_1.recordWorkerRetryAttempt)(ctx.runner.loadRun(ctx.runId), workerId, decided.attempts || prior + 1, reason);
376
449
  return step("fulfill", "failed", {
377
450
  runId: ctx.runId,
@@ -97,7 +97,7 @@ function recordFeedback(run, input, options = {}) {
97
97
  const now = new Date().toISOString();
98
98
  const record = {
99
99
  schemaVersion: exports.ERROR_FEEDBACK_SCHEMA_VERSION,
100
- id: createFeedbackId(classification),
100
+ id: createFeedbackId(run, classification),
101
101
  runId: run.id,
102
102
  createdAt: now,
103
103
  updatedAt: now,
@@ -357,9 +357,13 @@ function formatEvidence(evidence) {
357
357
  return ["No evidence recorded."];
358
358
  return evidence.map((entry) => `- ${entry.id}: ${entry.locator || entry.path || entry.summary || entry.source || ""}`);
359
359
  }
360
- function createFeedbackId(classification) {
361
- const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
362
- return `feedback-${classification}-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
360
+ // Deterministic feedback id (FreeBSD-audit L12/L13): the feedback record's
361
+ // POSITION in the run's append-only feedback log, qualified by classification for
362
+ // readability. recordFeedback dedups identical errors before minting, so the
363
+ // sequence is stable and collision-free across replays — no clock, no PRNG.
364
+ function createFeedbackId(run, classification) {
365
+ const seq = (run.feedback || []).length + 1;
366
+ return `feedback-${classification}-${String(seq).padStart(4, "0")}`;
363
367
  }
364
368
  function feedbackKey(value) {
365
369
  return [
@@ -343,7 +343,7 @@ function deriveCounterfactuals(run, scores) {
343
343
  forSelectionGate.push({
344
344
  ref: candidate.id,
345
345
  kind: "candidate",
346
- status: candidate.status === "failed" ? "rejected" : "rejected",
346
+ status: "rejected",
347
347
  reason: candidate.feedbackIds[0] ? `see feedback ${candidate.feedbackIds[0]}` : `candidate ${candidate.id} ${candidate.status}`
348
348
  });
349
349
  for (const scoreId of candidate.scores || []) {
@@ -389,11 +389,11 @@ function deriveCounterfactuals(run, scores) {
389
389
  // node. This returns the operator-graph node ids backing every decision-bearing
390
390
  // reasoning step of an adopted chain, so state-explosion can protect them.
391
391
  // ---------------------------------------------------------------------------
392
- function reasoningCriticalNodeIds(run) {
392
+ function reasoningCriticalNodeIds(run, operator = (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run)) {
393
393
  const ids = new Set();
394
394
  const faninIds = new Set((run.multiAgent?.fanins || []).map((entry) => entry.id));
395
395
  const commitById = new Map((run.commits || []).map((commit) => [commit.id, commit]));
396
- for (const evidence of (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run).evidence) {
396
+ for (const evidence of operator.evidence) {
397
397
  if (evidence.status !== "adopted")
398
398
  continue;
399
399
  for (const id of evidence.candidateIds)
@@ -0,0 +1,331 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.resolveAgentInvocation = resolveAgentInvocation;
7
+ exports.stripSecretArgs = stripSecretArgs;
8
+ exports.parseAgentReport = parseAgentReport;
9
+ exports.agentSubstitutions = agentSubstitutions;
10
+ exports.substituteAgentArg = substituteAgentArg;
11
+ exports.recordedAgentHandle = recordedAgentHandle;
12
+ exports.extractEndpointResult = extractEndpointResult;
13
+ exports.agentHandle = agentHandle;
14
+ exports.prepareAgentSpawn = prepareAgentSpawn;
15
+ exports.runAgentBatchOutcomes = runAgentBatchOutcomes;
16
+ // Agent-delegation pure helpers + concurrent batch fulfillment for the
17
+ // execution-backend driver layer. Carved out of execution-backend.ts
18
+ // (FreeBSD-audit god-module carve) so the driver layer no longer bundles the
19
+ // agent sub-domain's data-transform helpers; the stateful runners
20
+ // (runAgentProcess / runAgentEndpoint) that build refusal/delegated envelopes
21
+ // stay in the parent and import these. The parent re-exports the public surface
22
+ // (stripSecretArgs, AgentSpawnJob, prepareAgentSpawn, runAgentBatchOutcomes) so
23
+ // every importer is byte-unchanged.
24
+ //
25
+ // BEHAVIOR-PRESERVING — pure code movement, zero logic change. Every function
26
+ // here is a pure function of its inputs (request/env/argv → resolved data); none
27
+ // reaches back into the parent's envelope builders, so there is no runtime cycle.
28
+ // Matches the existing router pattern (orchestrator/*-operations.ts,
29
+ // run-registry/derive.ts).
30
+ //
31
+ // agent — the v0.1.38 delegating driver. Spawns an EXTERNAL agent process per
32
+ // worker (claude -p / codex exec / …) argv-style (shell:false), or POSTs the
33
+ // manifest to a configured HTTP agent endpoint. The agent reads the worker
34
+ // input/manifest and writes the worker's result.md out-of-process; CW captures
35
+ // the agent CHILD's command + exit + stdout digest as the canonical evidence
36
+ // triple (NEVER the result.md — that is the separate recordWorkerOutput layer)
37
+ // and records the kind:process handle + agent-reported model in provenance.
38
+ //
39
+ // THE RED LINE: CW spawns the agent and records its attested output. It NEVER
40
+ // imports a model SDK, holds an API key, or constructs a model API request. Any
41
+ // API key flows from the agent's OWN inherited env; CW never reads or records it.
42
+ // The operator-chosen CW_AGENT_MODEL is interpolated into `{{model}}` as policy
43
+ // and recorded ONLY in secret-stripped args — it is NEVER the attested model id.
44
+ const node_path_1 = __importDefault(require("node:path"));
45
+ const node_child_process_1 = require("node:child_process");
46
+ const util_1 = require("./util");
47
+ /** Resolve the agent invocation from the request delegation > env. Vendor-neutral;
48
+ * the durable file config is folded in by the drive layer before this point. */
49
+ function resolveAgentInvocation(request) {
50
+ const delegation = request.delegation || {};
51
+ const envCommand = (process.env.CW_AGENT_COMMAND || "").trim();
52
+ const endpoint = delegation.endpoint || (process.env.CW_AGENT_ENDPOINT || "").trim() || undefined;
53
+ const model = delegation.model || (process.env.CW_AGENT_MODEL || "").trim() || undefined;
54
+ // Accept the invocation via delegation (preferred) OR the top-level command/args.
55
+ let binary = delegation.command || request.command || undefined;
56
+ let rawArgs = delegation.args ? [...delegation.args] : request.args ? [...request.args] : [];
57
+ // An env-string command ("claude -p --output-format json {{manifest}}") is split
58
+ // into a binary + discrete argv template — NEVER shell-interpreted.
59
+ if (!binary && envCommand) {
60
+ const parts = envCommand.split(/\s+/).filter(Boolean);
61
+ binary = parts[0];
62
+ if (!delegation.args)
63
+ rawArgs = parts.slice(1);
64
+ }
65
+ else if (binary && !delegation.args && /\s/.test(binary)) {
66
+ const parts = binary.split(/\s+/).filter(Boolean);
67
+ binary = parts[0];
68
+ rawArgs = parts.slice(1);
69
+ }
70
+ return { binary, rawArgs, endpoint, model, timeoutMs: request.timeoutMs };
71
+ }
72
+ const AGENT_SECRET_FLAGS = new Set(["--api-key", "--apikey", "--token", "--key", "--secret", "--password", "--auth", "--bearer"]);
73
+ /** Redact secrets from recorded agent args: a value FOLLOWING a known secret flag,
74
+ * an `--x-key=...` inline value, or a token that LOOKS like a credential. Never
75
+ * record a raw secret in provenance/evidence. Exported so the durable config
76
+ * surface strips the SAME way before persisting/showing a command template. */
77
+ function stripSecretArgs(args) {
78
+ const out = [];
79
+ for (let i = 0; i < args.length; i++) {
80
+ const arg = String(args[i]);
81
+ if (AGENT_SECRET_FLAGS.has(arg.toLowerCase())) {
82
+ out.push(arg);
83
+ if (i + 1 < args.length) {
84
+ out.push("<redacted>");
85
+ i++;
86
+ }
87
+ continue;
88
+ }
89
+ const inline = arg.match(/^(--?[A-Za-z][\w-]*(?:key|token|secret|password|auth|bearer)[\w-]*)=.*/i);
90
+ if (inline) {
91
+ out.push(`${inline[1]}=<redacted>`);
92
+ continue;
93
+ }
94
+ // Bare credential-looking token: a known provider prefix, or a long high-entropy
95
+ // run with NO path separators (so file paths / {{...}} substitutions survive as
96
+ // useful provenance). Over-redaction is safe; leaking a key is not.
97
+ if (/^(sk-|ghp_|gho_|github_pat_|xox[abpr]-|Bearer\s)/.test(arg) || (arg.length >= 32 && /^[A-Za-z0-9_\-]{32,}$/.test(arg))) {
98
+ out.push("<redacted>");
99
+ continue;
100
+ }
101
+ out.push(arg);
102
+ }
103
+ return out;
104
+ }
105
+ /** Best-effort parse of the AGENT-reported model id from its stdout. SOLELY the
106
+ * agent's own report — `unreported` when absent. Never CW_AGENT_MODEL. */
107
+ function parseAgentReport(stdout) {
108
+ const text = String(stdout || "").trim();
109
+ if (!text)
110
+ return {};
111
+ const tryObj = (value) => {
112
+ try {
113
+ const parsed = JSON.parse(value);
114
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : undefined;
115
+ }
116
+ catch {
117
+ return undefined;
118
+ }
119
+ };
120
+ let obj = tryObj(text);
121
+ if (!obj) {
122
+ const line = text
123
+ .split(/\r?\n/)
124
+ .reverse()
125
+ .find((entry) => entry.trim().startsWith("{") && entry.trim().endsWith("}"));
126
+ if (line)
127
+ obj = tryObj(line.trim());
128
+ }
129
+ if (!obj)
130
+ return {};
131
+ const usage = obj.usage && typeof obj.usage === "object" ? obj.usage : undefined;
132
+ let model = typeof obj.model === "string"
133
+ ? obj.model
134
+ : usage && typeof usage.model === "string"
135
+ ? usage.model
136
+ : typeof obj.modelId === "string"
137
+ ? obj.modelId
138
+ : undefined;
139
+ // Some agents (e.g. `claude -p --output-format json`) report no top-level model;
140
+ // the model id(s) appear as KEYS of a `modelUsage` object. Pick the primary model
141
+ // (the one with the most input tokens). Still SOLELY the agent's own report.
142
+ if (!model && obj.modelUsage && typeof obj.modelUsage === "object" && !Array.isArray(obj.modelUsage)) {
143
+ const entries = Object.entries(obj.modelUsage);
144
+ if (entries.length) {
145
+ const tokensOf = (value) => {
146
+ const record = value && typeof value === "object" ? value : {};
147
+ const input = Number(record.inputTokens ?? record.input_tokens ?? 0);
148
+ return Number.isFinite(input) ? input : 0;
149
+ };
150
+ entries.sort((left, right) => tokensOf(right[1]) - tokensOf(left[1]));
151
+ model = entries[0][0];
152
+ }
153
+ }
154
+ // Track 1: the executor's detached signature over its usage report, if it signs.
155
+ // SOLELY the agent's own field — CW verifies it later against the trust key.
156
+ const usageSignature = typeof obj.usageSignature === "string"
157
+ ? obj.usageSignature
158
+ : typeof obj.usage_signature === "string"
159
+ ? obj.usage_signature
160
+ : undefined;
161
+ return { model, usage, usageSignature };
162
+ }
163
+ function agentSubstitutions(request, model) {
164
+ const manifest = request.manifest;
165
+ const workerDir = manifest?.workerDir || request.cwd || "";
166
+ return {
167
+ manifest: manifest?.manifestPath || (workerDir ? node_path_1.default.join(workerDir, "manifest.json") : ""),
168
+ input: manifest?.inputPath || "",
169
+ result: manifest?.resultPath || "",
170
+ workerDir,
171
+ model: model || "",
172
+ prompt: manifest?.prompt || ""
173
+ };
174
+ }
175
+ function substituteAgentArg(arg, subst) {
176
+ return arg.replace(/\{\{(\w+)\}\}/g, (_, key) => (key in subst ? subst[key] : `{{${key}}}`));
177
+ }
178
+ /** Build the recorded process handle for the envelope — secret-stripped + the
179
+ * agent-reported model. Same SHAPE that lands in provenance, never in evidence. */
180
+ function recordedAgentHandle(binary, endpoint, recordedArgs, model, reportedModel, reportedUsage, usageSignature) {
181
+ const ref = binary ? [binary, ...recordedArgs].join(" ") : endpoint || "";
182
+ return {
183
+ kind: "process",
184
+ ref,
185
+ endpoint,
186
+ metadata: {
187
+ mode: binary ? "command" : "endpoint",
188
+ command: binary,
189
+ args: recordedArgs,
190
+ model,
191
+ reportedModel,
192
+ // Telemetry thread-back: the agent's OWN self-reported token usage (parsed
193
+ // from its stdout by parseAgentReport). ATTESTED, never measured by CW —
194
+ // same red-line posture as reportedModel. Lands in provenance, never in the
195
+ // byte-stable evidence triple. Absent when the agent reported no usage.
196
+ ...(reportedUsage ? { reportedUsage } : {}),
197
+ // Track 1: the executor's detached signature over its usage report. CW
198
+ // verifies it against the operator trust key at output intake.
199
+ ...(usageSignature ? { usageSignature } : {})
200
+ }
201
+ };
202
+ }
203
+ function extractEndpointResult(stdout) {
204
+ const text = String(stdout || "").trim();
205
+ if (!text)
206
+ return undefined;
207
+ try {
208
+ const parsed = JSON.parse(text);
209
+ if (parsed && typeof parsed === "object") {
210
+ if (typeof parsed.result === "string")
211
+ return parsed.result;
212
+ if (typeof parsed.resultMarkdown === "string")
213
+ return parsed.resultMarkdown;
214
+ }
215
+ }
216
+ catch {
217
+ /* not JSON — treat the raw text as the result body */
218
+ return text;
219
+ }
220
+ return undefined;
221
+ }
222
+ function agentHandle(request) {
223
+ // The agent invocation is POLICY-as-DATA, resolved flags(delegation) > env. The
224
+ // handle records ONLY secret-stripped provenance; the raw template is re-resolved
225
+ // inside runAgentProcess for substitution + spawning so no secret ever lands in
226
+ // a recorded handle/evidence entry.
227
+ const resolved = resolveAgentInvocation(request);
228
+ if (!resolved.binary && !resolved.endpoint)
229
+ return undefined;
230
+ const strippedArgs = stripSecretArgs(resolved.rawArgs);
231
+ const ref = resolved.binary ? [resolved.binary, ...strippedArgs].join(" ") : resolved.endpoint || "";
232
+ return {
233
+ kind: "process",
234
+ ref,
235
+ endpoint: resolved.endpoint,
236
+ metadata: {
237
+ mode: resolved.binary ? "command" : "endpoint",
238
+ command: resolved.binary,
239
+ args: strippedArgs,
240
+ model: resolved.model
241
+ }
242
+ };
243
+ }
244
+ /** Resolve a request to a spawn-style batch job, or undefined when the agent is
245
+ * endpoint-configured/unconfigured (those settle through the serial path). */
246
+ function prepareAgentSpawn(request) {
247
+ const resolved = resolveAgentInvocation(request);
248
+ if (!resolved.binary)
249
+ return undefined;
250
+ const subst = agentSubstitutions(request, resolved.model);
251
+ return {
252
+ binary: resolved.binary,
253
+ args: resolved.rawArgs.map((arg) => substituteAgentArg(arg, subst)),
254
+ cwd: request.cwd,
255
+ timeoutMs: resolved.timeoutMs || 600000
256
+ };
257
+ }
258
+ // Reads jobs JSON on stdin, spawns ALL concurrently (shell:false, inherited env —
259
+ // the agent's own credentials resolve; CW never reads them), per-job SIGTERM at
260
+ // timeoutMs + SIGKILL at +5s, caps each captured stdout at 32MB, and prints the
261
+ // outcome array when every job has settled. stderr is drained (a full pipe must
262
+ // never wedge a child). A kill yields exitCode null — the no-exit-code refusal.
263
+ const BATCH_DELEGATE_CHILD = `
264
+ const { spawn } = require("node:child_process");
265
+ let raw = "";
266
+ process.stdin.setEncoding("utf8");
267
+ process.stdin.on("data", (d) => (raw += d));
268
+ process.stdin.on("end", () => {
269
+ const jobs = JSON.parse(raw);
270
+ if (!jobs.length) { process.stdout.write("[]"); return; }
271
+ const out = new Array(jobs.length);
272
+ let pending = jobs.length;
273
+ const CAP = 32 * 1024 * 1024;
274
+ jobs.forEach((job, i) => {
275
+ let stdout = "";
276
+ let settled = false;
277
+ const settle = (o) => {
278
+ if (settled) return;
279
+ settled = true;
280
+ out[i] = o;
281
+ if (--pending === 0) process.stdout.write(JSON.stringify(out));
282
+ };
283
+ let child;
284
+ try {
285
+ child = spawn(job.binary, job.args, { cwd: job.cwd, env: process.env, shell: false });
286
+ } catch (error) {
287
+ settle({ spawnError: String((error && error.message) || error), exitCode: null, stdout: "" });
288
+ return;
289
+ }
290
+ const term = setTimeout(() => { try { child.kill("SIGTERM"); } catch {} }, job.timeoutMs);
291
+ const kill = setTimeout(() => { try { child.kill("SIGKILL"); } catch {} }, job.timeoutMs + 5000);
292
+ child.stdout.on("data", (d) => { if (stdout.length < CAP) stdout += d; });
293
+ child.stderr.on("data", () => {});
294
+ child.on("error", (error) => {
295
+ clearTimeout(term); clearTimeout(kill);
296
+ settle({ spawnError: String((error && error.message) || error), exitCode: null, stdout });
297
+ });
298
+ child.on("close", (code) => {
299
+ clearTimeout(term); clearTimeout(kill);
300
+ settle({ exitCode: typeof code === "number" ? code : null, stdout });
301
+ });
302
+ });
303
+ });
304
+ `;
305
+ /** Run a batch of agent spawns concurrently; outcomes index-align with jobs. The
306
+ * parent backstop timeout (max job timeout + 30s) means even a wedged delegate
307
+ * child cannot deadlock the drive: on any batch-level failure EVERY job settles
308
+ * as a fail-closed spawn refusal — never a fabricated completion, never a hang. */
309
+ function runAgentBatchOutcomes(jobs) {
310
+ if (!jobs.length)
311
+ return [];
312
+ const maxTimeout = Math.max(...jobs.map((job) => job.timeoutMs));
313
+ const child = (0, node_child_process_1.spawnSync)(process.execPath, ["-e", BATCH_DELEGATE_CHILD], {
314
+ input: JSON.stringify(jobs),
315
+ encoding: "utf8",
316
+ maxBuffer: 33 * 1024 * 1024 * jobs.length,
317
+ timeout: maxTimeout + 30000
318
+ });
319
+ if (!child.error && typeof child.status === "number" && child.status === 0) {
320
+ try {
321
+ const parsed = JSON.parse(String(child.stdout || ""));
322
+ if (Array.isArray(parsed) && parsed.length === jobs.length)
323
+ return parsed;
324
+ }
325
+ catch {
326
+ // fall through to the fail-closed mapping below
327
+ }
328
+ }
329
+ const reason = child.error ? (0, util_1.messageOf)(child.error) : `batch delegate exited ${child.status === null ? "without an exit code (timed out or killed)" : `with ${child.status}`}`;
330
+ return jobs.map(() => ({ spawnError: `batch delegate failed: ${reason}`, exitCode: null, stdout: "" }));
331
+ }