cool-workflow 0.1.79 → 0.1.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +51 -3
- package/apps/architecture-review/app.json +1 -1
- package/apps/architecture-review-fast/app.json +64 -0
- package/apps/architecture-review-fast/workflow.js +153 -0
- package/apps/end-to-end-golden-path/app.json +1 -1
- package/apps/pr-review-fix-ci/app.json +1 -1
- package/apps/release-cut/app.json +1 -1
- package/apps/research-synthesis/app.json +1 -1
- package/dist/agent-config.js +21 -7
- package/dist/candidate-scoring.js +42 -22
- package/dist/capability-core.js +132 -17
- package/dist/capability-registry.js +138 -168
- package/dist/cli.js +97 -98
- package/dist/collaboration.js +5 -6
- package/dist/commit.js +20 -6
- package/dist/compare.js +18 -0
- package/dist/coordinator/classify.js +45 -0
- package/dist/coordinator/paths.js +42 -0
- package/dist/coordinator/util.js +129 -0
- package/dist/coordinator.js +127 -300
- package/dist/dispatch.js +35 -0
- package/dist/drive.js +79 -6
- package/dist/error-feedback.js +8 -4
- package/dist/evidence-reasoning.js +3 -3
- package/dist/execution-backend/agent.js +331 -0
- package/dist/execution-backend/probes.js +96 -0
- package/dist/execution-backend/util.js +47 -0
- package/dist/execution-backend.js +73 -421
- package/dist/mcp-server.js +79 -183
- package/dist/multi-agent/graph.js +84 -0
- package/dist/multi-agent/helpers.js +145 -0
- package/dist/multi-agent/paths.js +22 -0
- package/dist/multi-agent-eval/format.js +194 -0
- package/dist/multi-agent-eval/normalize.js +51 -0
- package/dist/multi-agent-eval.js +39 -244
- package/dist/multi-agent-host.js +0 -19
- package/dist/multi-agent.js +125 -314
- package/dist/node-snapshot.js +3 -3
- package/dist/observability/format.js +61 -0
- package/dist/observability/intake.js +98 -0
- package/dist/observability.js +14 -160
- package/dist/operator-ux/format.js +364 -0
- package/dist/operator-ux.js +22 -363
- package/dist/orchestrator/lifecycle-operations.js +2 -1
- package/dist/orchestrator/report.js +8 -0
- package/dist/orchestrator.js +26 -9
- package/dist/reclamation.js +26 -21
- package/dist/run-export.js +494 -25
- package/dist/run-registry/derive.js +172 -0
- package/dist/run-registry/format.js +124 -0
- package/dist/run-registry/gc.js +251 -0
- package/dist/run-registry/policy.js +16 -0
- package/dist/run-registry/queue.js +116 -0
- package/dist/run-registry.js +89 -597
- package/dist/run-state-schema.js +1 -0
- package/dist/sandbox-profile.js +43 -2
- package/dist/state-explosion/format.js +159 -0
- package/dist/state-explosion/helpers.js +82 -0
- package/dist/state-explosion.js +165 -304
- package/dist/state-node.js +19 -4
- package/dist/telemetry-attestation.js +55 -0
- package/dist/telemetry-demo.js +15 -3
- package/dist/telemetry-ledger.js +60 -15
- package/dist/topology.js +25 -8
- package/dist/triggers.js +33 -14
- package/dist/trust-audit.js +145 -33
- package/dist/version.js +1 -1
- package/dist/worker-isolation/helpers.js +51 -0
- package/dist/worker-isolation/paths.js +46 -0
- package/dist/worker-isolation.js +39 -115
- package/docs/agent-delegation-drive.7.md +71 -0
- package/docs/canonical-workflow-apps.7.md +37 -0
- package/docs/cli-mcp-parity.7.md +16 -0
- package/docs/contract-migration-tooling.7.md +6 -0
- package/docs/control-plane-scheduling.7.md +6 -0
- package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
- package/docs/durable-state-and-locking.7.md +8 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
- package/docs/execution-backends.7.md +6 -0
- package/docs/index.md +2 -0
- package/docs/launch/demo.tape +28 -0
- package/docs/launch/launch-kit.md +96 -17
- package/docs/launch/pre-launch-checklist.md +53 -0
- package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
- package/docs/multi-agent-eval-replay-harness.7.md +6 -0
- package/docs/multi-agent-operator-ux.7.md +6 -0
- package/docs/multi-agent-trust-policy-audit.7.md +27 -0
- package/docs/node-snapshot-diff-replay.7.md +6 -0
- package/docs/observability-cost-accounting.7.md +6 -0
- package/docs/project-index.md +27 -6
- package/docs/real-execution-backends.7.md +6 -0
- package/docs/release-and-migration.7.md +8 -0
- package/docs/release-tooling.7.md +6 -0
- package/docs/routines.md +23 -0
- package/docs/run-registry-control-plane.7.md +89 -2
- package/docs/run-retention-reclamation.7.md +8 -0
- package/docs/source-context-profiles.7.md +119 -0
- package/docs/state-explosion-management.7.md +13 -0
- package/docs/team-collaboration.7.md +6 -0
- package/docs/trust-model.md +267 -0
- package/docs/unix-principles.md +49 -1
- package/docs/vendor-manifest-loadability.7.md +43 -0
- package/docs/web-desktop-workbench.7.md +6 -0
- package/manifest/plugin.manifest.json +1 -1
- package/manifest/source-context-profiles.json +142 -0
- package/package.json +4 -1
- package/scripts/agents/builtin-templates.json +7 -0
- package/scripts/agents/claude-p-agent.js +129 -43
- package/scripts/architecture-review-fast.js +362 -0
- package/scripts/bump-version.js +5 -10
- package/scripts/canonical-apps-list.js +64 -0
- package/scripts/canonical-apps.js +36 -4
- package/scripts/coverage-gate.js +211 -0
- package/scripts/dogfood-release.js +1 -1
- package/scripts/golden-path.js +4 -4
- package/scripts/parity-check.js +5 -0
- package/scripts/release-check.js +5 -1
- package/scripts/source-context.js +291 -0
- package/scripts/version-sync-check.js +5 -7
- package/skills/ci-triage/SKILL.md +50 -0
- package/skills/ci-triage/agents/openai.yaml +4 -0
- package/skills/cool-workflow/SKILL.md +4 -1
- package/skills/deploy-check/SKILL.md +55 -0
- package/skills/deploy-check/agents/openai.yaml +4 -0
- package/skills/design-qa/SKILL.md +49 -0
- package/skills/design-qa/agents/openai.yaml +4 -0
- package/skills/pr-review/SKILL.md +45 -0
- package/skills/pr-review/agents/openai.yaml +4 -0
- package/dist/capability-dispatcher.js +0 -86
package/dist/dispatch.js
CHANGED
|
@@ -32,6 +32,12 @@ function createDispatchManifest(run, limit, options = {}) {
|
|
|
32
32
|
const requestedSandboxProfileId = options.sandboxProfileId || options.sandbox;
|
|
33
33
|
const sandboxProfileId = String(requestedSandboxProfileId || sandbox_profile_1.DEFAULT_SANDBOX_PROFILE_ID);
|
|
34
34
|
(0, sandbox_profile_1.resolveSandboxProfileById)(sandboxProfileId, (0, sandbox_profile_1.sandboxContextForValidation)(run.cwd));
|
|
35
|
+
// H7: if the requested profile is a CUSTOM profile loaded from a FILE (non-bundled,
|
|
36
|
+
// existing file), persist its DEFINITION on run.customSandboxProfiles keyed by the
|
|
37
|
+
// definition's logical id. This makes the custom profile durable with run state so a
|
|
38
|
+
// worker boundary can re-resolve it by logical id after a scope snapshot is lost
|
|
39
|
+
// (re-resolving against the worker context, not the dispatch-time file path).
|
|
40
|
+
persistCustomSandboxProfile(run, sandboxProfileId);
|
|
35
41
|
// Resolve the execution backend once (mechanism vs policy): the kernel records
|
|
36
42
|
// WHICH backend was selected; it never branches on which one. Defaults to node
|
|
37
43
|
// (behavior-preserving) when no `--backend` flag / CW_BACKEND env is set.
|
|
@@ -199,3 +205,32 @@ function createDispatchId() {
|
|
|
199
205
|
const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
|
|
200
206
|
return `dispatch-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
|
|
201
207
|
}
|
|
208
|
+
// H7: persist a CUSTOM sandbox profile DEFINITION (loaded from a FILE at dispatch)
|
|
209
|
+
// onto run.customSandboxProfiles, keyed by the definition's logical id. Only fires
|
|
210
|
+
// for a non-bundled id that resolves to a readable, valid profile file. The
|
|
211
|
+
// resolveSandboxProfileById call above has already validated the file (it throws on
|
|
212
|
+
// invalid), so this re-parses only to recover the raw DEFINITION — we store the
|
|
213
|
+
// definition (not a resolved policy) so worker-specific path tokens re-bind to the
|
|
214
|
+
// correct worker context on every later re-resolve. Bundled ids and unknown ids are
|
|
215
|
+
// left untouched, so this never shadows a bundled profile or masks a fail-closed.
|
|
216
|
+
function persistCustomSandboxProfile(run, requested) {
|
|
217
|
+
if (!requested || (0, sandbox_profile_1.isBundledSandboxProfileId)(requested))
|
|
218
|
+
return;
|
|
219
|
+
const absolute = node_path_1.default.resolve(requested);
|
|
220
|
+
if (!node_fs_1.default.existsSync(absolute) || !node_fs_1.default.statSync(absolute).isFile())
|
|
221
|
+
return;
|
|
222
|
+
const validation = (0, sandbox_profile_1.validateSandboxProfileFile)(requested, (0, sandbox_profile_1.sandboxContextForValidation)(run.cwd));
|
|
223
|
+
if (!validation.valid || !validation.profile)
|
|
224
|
+
return;
|
|
225
|
+
let definition;
|
|
226
|
+
try {
|
|
227
|
+
definition = JSON.parse(node_fs_1.default.readFileSync(absolute, "utf8"));
|
|
228
|
+
}
|
|
229
|
+
catch {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
if (!definition || typeof definition !== "object" || typeof definition.id !== "string" || !definition.id)
|
|
233
|
+
return;
|
|
234
|
+
run.customSandboxProfiles = run.customSandboxProfiles || {};
|
|
235
|
+
run.customSandboxProfiles[definition.id] = definition;
|
|
236
|
+
}
|
package/dist/drive.js
CHANGED
|
@@ -31,12 +31,15 @@ exports.driveConcurrentRound = driveConcurrentRound;
|
|
|
31
31
|
exports.drive = drive;
|
|
32
32
|
exports.drivePreview = drivePreview;
|
|
33
33
|
const node_fs_1 = __importDefault(require("node:fs"));
|
|
34
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
34
35
|
const dispatch_1 = require("./dispatch");
|
|
35
36
|
const execution_backend_1 = require("./execution-backend");
|
|
36
37
|
const worker_isolation_1 = require("./worker-isolation");
|
|
37
38
|
const agent_config_1 = require("./agent-config");
|
|
38
39
|
const scheduling_1 = require("./scheduling");
|
|
39
40
|
const observability_1 = require("./observability");
|
|
41
|
+
const state_1 = require("./state");
|
|
42
|
+
const compare_1 = require("./compare");
|
|
40
43
|
exports.DRIVE_SCHEMA_VERSION = 1;
|
|
41
44
|
/** The task the next drive step would advance: a RUNNING (already-dispatched,
|
|
42
45
|
* awaiting fulfillment / retry) task first, else the next PENDING task in the
|
|
@@ -198,21 +201,39 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
|
|
|
198
201
|
// Progress BEFORE the (possibly multi-minute) agent spawn, so a live drive shows
|
|
199
202
|
// immediate activity instead of a long silence on the first worker. task.label
|
|
200
203
|
// is the human-facing display name; the id stays the stable reference.
|
|
201
|
-
emitProgress(`→ ${selected.label || selected.id} (${selected.phase}) — ${dispatched ? "dispatched, " : ""}spawning agent, may take minutes…`);
|
|
202
204
|
const promptDigest = node_fs_1.default.existsSync(manifest.inputPath) ? (0, execution_backend_1.sha256)(node_fs_1.default.readFileSync(manifest.inputPath, "utf8")) : (0, execution_backend_1.sha256)(manifest.prompt || "");
|
|
205
|
+
const cachePath = resultCachePath(run, selected, (0, execution_backend_1.sha256)(selected.prompt));
|
|
206
|
+
if (cachePath && node_fs_1.default.existsSync(cachePath)) {
|
|
207
|
+
emitProgress(`↺ ${selected.label || selected.id} (${selected.phase}) — accepting cached result`);
|
|
208
|
+
try {
|
|
209
|
+
node_fs_1.default.writeFileSync(manifest.resultPath, node_fs_1.default.readFileSync(cachePath, "utf8"), "utf8");
|
|
210
|
+
runner.recordWorkerOutput(runId, workerId, manifest.resultPath, {});
|
|
211
|
+
}
|
|
212
|
+
catch (error) {
|
|
213
|
+
return handleHop(ctx, selected, workerId, `result cache rejected: ${error instanceof Error ? error.message : String(error)}`);
|
|
214
|
+
}
|
|
215
|
+
return step("accept", "ok", {
|
|
216
|
+
runId,
|
|
217
|
+
taskId: selected.id,
|
|
218
|
+
phase: selected.phase,
|
|
219
|
+
handleKind: "result-cache",
|
|
220
|
+
reason: "result cache hit"
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
emitProgress(`→ ${selected.label || selected.id} (${selected.phase}) — ${dispatched ? "dispatched, " : ""}spawning agent, may take minutes…`);
|
|
203
224
|
const envelope = (0, execution_backend_1.runBackend)(buildAgentRequest(ctx, run, selected, manifest, preparedOutcome));
|
|
204
225
|
const handle = envelope.provenance.handle;
|
|
205
226
|
const reportedModel = handle?.metadata?.reportedModel || "unreported";
|
|
206
227
|
const reportedUsage = handle?.metadata?.reportedUsage;
|
|
207
228
|
const usageSignature = handle?.metadata?.usageSignature;
|
|
208
229
|
if (envelope.status !== "completed") {
|
|
209
|
-
return handleHop(ctx, selected, workerId, `agent hop ${envelope.status}: ${envelope.result.summary}
|
|
230
|
+
return handleHop(ctx, selected, workerId, `agent hop ${envelope.status}: ${envelope.result.summary}`);
|
|
210
231
|
}
|
|
211
232
|
// 3. ACCEPT — the SEPARATE recordWorkerOutput layer validates + records result.md.
|
|
212
233
|
// A missing result.md is a failed hop (pre-checked so no terminal side effect);
|
|
213
234
|
// an invalid result.md throws at validation BEFORE any state mutation.
|
|
214
235
|
if (!manifest.resultPath || !node_fs_1.default.existsSync(manifest.resultPath)) {
|
|
215
|
-
return handleHop(ctx, selected, workerId, "agent produced no result.md"
|
|
236
|
+
return handleHop(ctx, selected, workerId, "agent produced no result.md");
|
|
216
237
|
}
|
|
217
238
|
try {
|
|
218
239
|
runner.recordWorkerOutput(runId, workerId, manifest.resultPath, {
|
|
@@ -234,7 +255,10 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
|
|
|
234
255
|
});
|
|
235
256
|
}
|
|
236
257
|
catch (error) {
|
|
237
|
-
return handleHop(ctx, selected, workerId, `result.md rejected: ${error instanceof Error ? error.message : String(error)}
|
|
258
|
+
return handleHop(ctx, selected, workerId, `result.md rejected: ${error instanceof Error ? error.message : String(error)}`);
|
|
259
|
+
}
|
|
260
|
+
if (cachePath && manifest.resultPath && node_fs_1.default.existsSync(manifest.resultPath)) {
|
|
261
|
+
writeResultCache(cachePath, node_fs_1.default.readFileSync(manifest.resultPath, "utf8"));
|
|
238
262
|
}
|
|
239
263
|
return step("accept", "ok", {
|
|
240
264
|
runId,
|
|
@@ -245,6 +269,53 @@ function processSelectedTask(ctx, selected, preparedOutcome) {
|
|
|
245
269
|
reportedModel
|
|
246
270
|
});
|
|
247
271
|
}
|
|
272
|
+
function resultCachePath(run, task, promptDigest) {
|
|
273
|
+
const policy = task.resultCache;
|
|
274
|
+
if (!policy || policy.mode !== "read-write")
|
|
275
|
+
return undefined;
|
|
276
|
+
const keyInput = policy.keyInput;
|
|
277
|
+
const keyValue = keyInput ? String(run.inputs[keyInput] || "").trim() : "";
|
|
278
|
+
if (!keyInput || !keyValue)
|
|
279
|
+
return undefined;
|
|
280
|
+
const completedResultsDigest = completedResultsCacheDigest(run, task);
|
|
281
|
+
if (completedResultsDigest === undefined)
|
|
282
|
+
return undefined;
|
|
283
|
+
const digest = (0, execution_backend_1.sha256)(JSON.stringify({
|
|
284
|
+
schemaVersion: 1,
|
|
285
|
+
workflowId: run.workflow.id,
|
|
286
|
+
taskId: task.id,
|
|
287
|
+
keyInput,
|
|
288
|
+
keyValue,
|
|
289
|
+
promptDigest,
|
|
290
|
+
completedResultsDigest
|
|
291
|
+
})).replace(/^sha256:/, "");
|
|
292
|
+
return node_path_1.default.join(run.cwd, ".cw", "cache", "worker-results", (0, state_1.safeFileName)(run.workflow.id), `${(0, state_1.safeFileName)(task.id)}-${digest.slice(0, 32)}.md`);
|
|
293
|
+
}
|
|
294
|
+
function completedResultsCacheDigest(run, task) {
|
|
295
|
+
if (task.resultCache?.includeCompletedResults !== "previous-phases")
|
|
296
|
+
return "";
|
|
297
|
+
const phaseIndex = run.phases.findIndex((phase) => phase.name === task.phase || phase.id === task.phase);
|
|
298
|
+
if (phaseIndex < 0)
|
|
299
|
+
return undefined;
|
|
300
|
+
const previousTaskIds = new Set(run.phases.slice(0, phaseIndex).flatMap((phase) => phase.taskIds));
|
|
301
|
+
const records = run.tasks
|
|
302
|
+
.filter((candidate) => previousTaskIds.has(candidate.id))
|
|
303
|
+
.sort((a, b) => (0, compare_1.compareBytes)(a.id, b.id))
|
|
304
|
+
.map((candidate) => {
|
|
305
|
+
if (candidate.status !== "completed" || !candidate.resultPath || !node_fs_1.default.existsSync(candidate.resultPath))
|
|
306
|
+
return undefined;
|
|
307
|
+
return [candidate.id, (0, execution_backend_1.sha256)(node_fs_1.default.readFileSync(candidate.resultPath, "utf8"))];
|
|
308
|
+
});
|
|
309
|
+
if (records.some((record) => record === undefined))
|
|
310
|
+
return undefined;
|
|
311
|
+
return (0, execution_backend_1.sha256)(JSON.stringify(records));
|
|
312
|
+
}
|
|
313
|
+
function writeResultCache(file, content) {
|
|
314
|
+
node_fs_1.default.mkdirSync(node_path_1.default.dirname(file), { recursive: true });
|
|
315
|
+
const tmp = `${file}.${process.pid}.tmp`;
|
|
316
|
+
node_fs_1.default.writeFileSync(tmp, content, "utf8");
|
|
317
|
+
node_fs_1.default.renameSync(tmp, file);
|
|
318
|
+
}
|
|
248
319
|
/** Advance ONE concurrent ROUND: fulfill up to `limit` ready tasks in the first
|
|
249
320
|
* runnable phase as a single batch, recording results in DETERMINISTIC task
|
|
250
321
|
* order (the existing phase/dispatch order) regardless of completion order — so
|
|
@@ -322,6 +393,9 @@ function prepareConcurrentOutcomes(ctx, batch) {
|
|
|
322
393
|
continue;
|
|
323
394
|
}
|
|
324
395
|
const manifest = runner.showWorkerManifest(runId, workerId);
|
|
396
|
+
const cachePath = resultCachePath(run, task, (0, execution_backend_1.sha256)(task.prompt));
|
|
397
|
+
if (cachePath && node_fs_1.default.existsSync(cachePath))
|
|
398
|
+
continue;
|
|
325
399
|
const job = (0, execution_backend_1.prepareAgentSpawn)(buildAgentRequest(ctx, run, task, manifest));
|
|
326
400
|
if (job) {
|
|
327
401
|
jobs.push(job);
|
|
@@ -338,7 +412,7 @@ function prepareConcurrentOutcomes(ctx, batch) {
|
|
|
338
412
|
}
|
|
339
413
|
/** A failed agent hop: charge one attempt and (reuse v0.1.37 retryOrPark) either
|
|
340
414
|
* retry on the SAME worker scope next step, or PARK past the retry budget. */
|
|
341
|
-
function handleHop(ctx, task, workerId, reason
|
|
415
|
+
function handleHop(ctx, task, workerId, reason) {
|
|
342
416
|
const persisted = ctx.runner.showWorker(ctx.runId, workerId).retryCount || 0;
|
|
343
417
|
const prior = Math.max(ctx.attempts.get(task.id) || 0, persisted);
|
|
344
418
|
const entry = {
|
|
@@ -371,7 +445,6 @@ function handleHop(ctx, task, workerId, reason, dispatched) {
|
|
|
371
445
|
});
|
|
372
446
|
}
|
|
373
447
|
// Retryable: leave the task running (scope reused) for the next step.
|
|
374
|
-
void dispatched;
|
|
375
448
|
(0, worker_isolation_1.recordWorkerRetryAttempt)(ctx.runner.loadRun(ctx.runId), workerId, decided.attempts || prior + 1, reason);
|
|
376
449
|
return step("fulfill", "failed", {
|
|
377
450
|
runId: ctx.runId,
|
package/dist/error-feedback.js
CHANGED
|
@@ -97,7 +97,7 @@ function recordFeedback(run, input, options = {}) {
|
|
|
97
97
|
const now = new Date().toISOString();
|
|
98
98
|
const record = {
|
|
99
99
|
schemaVersion: exports.ERROR_FEEDBACK_SCHEMA_VERSION,
|
|
100
|
-
id: createFeedbackId(classification),
|
|
100
|
+
id: createFeedbackId(run, classification),
|
|
101
101
|
runId: run.id,
|
|
102
102
|
createdAt: now,
|
|
103
103
|
updatedAt: now,
|
|
@@ -357,9 +357,13 @@ function formatEvidence(evidence) {
|
|
|
357
357
|
return ["No evidence recorded."];
|
|
358
358
|
return evidence.map((entry) => `- ${entry.id}: ${entry.locator || entry.path || entry.summary || entry.source || ""}`);
|
|
359
359
|
}
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
360
|
+
// Deterministic feedback id (FreeBSD-audit L12/L13): the feedback record's
|
|
361
|
+
// POSITION in the run's append-only feedback log, qualified by classification for
|
|
362
|
+
// readability. recordFeedback dedups identical errors before minting, so the
|
|
363
|
+
// sequence is stable and collision-free across replays — no clock, no PRNG.
|
|
364
|
+
function createFeedbackId(run, classification) {
|
|
365
|
+
const seq = (run.feedback || []).length + 1;
|
|
366
|
+
return `feedback-${classification}-${String(seq).padStart(4, "0")}`;
|
|
363
367
|
}
|
|
364
368
|
function feedbackKey(value) {
|
|
365
369
|
return [
|
|
@@ -343,7 +343,7 @@ function deriveCounterfactuals(run, scores) {
|
|
|
343
343
|
forSelectionGate.push({
|
|
344
344
|
ref: candidate.id,
|
|
345
345
|
kind: "candidate",
|
|
346
|
-
status:
|
|
346
|
+
status: "rejected",
|
|
347
347
|
reason: candidate.feedbackIds[0] ? `see feedback ${candidate.feedbackIds[0]}` : `candidate ${candidate.id} ${candidate.status}`
|
|
348
348
|
});
|
|
349
349
|
for (const scoreId of candidate.scores || []) {
|
|
@@ -389,11 +389,11 @@ function deriveCounterfactuals(run, scores) {
|
|
|
389
389
|
// node. This returns the operator-graph node ids backing every decision-bearing
|
|
390
390
|
// reasoning step of an adopted chain, so state-explosion can protect them.
|
|
391
391
|
// ---------------------------------------------------------------------------
|
|
392
|
-
function reasoningCriticalNodeIds(run) {
|
|
392
|
+
function reasoningCriticalNodeIds(run, operator = (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run)) {
|
|
393
393
|
const ids = new Set();
|
|
394
394
|
const faninIds = new Set((run.multiAgent?.fanins || []).map((entry) => entry.id));
|
|
395
395
|
const commitById = new Map((run.commits || []).map((commit) => [commit.id, commit]));
|
|
396
|
-
for (const evidence of
|
|
396
|
+
for (const evidence of operator.evidence) {
|
|
397
397
|
if (evidence.status !== "adopted")
|
|
398
398
|
continue;
|
|
399
399
|
for (const id of evidence.candidateIds)
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.resolveAgentInvocation = resolveAgentInvocation;
|
|
7
|
+
exports.stripSecretArgs = stripSecretArgs;
|
|
8
|
+
exports.parseAgentReport = parseAgentReport;
|
|
9
|
+
exports.agentSubstitutions = agentSubstitutions;
|
|
10
|
+
exports.substituteAgentArg = substituteAgentArg;
|
|
11
|
+
exports.recordedAgentHandle = recordedAgentHandle;
|
|
12
|
+
exports.extractEndpointResult = extractEndpointResult;
|
|
13
|
+
exports.agentHandle = agentHandle;
|
|
14
|
+
exports.prepareAgentSpawn = prepareAgentSpawn;
|
|
15
|
+
exports.runAgentBatchOutcomes = runAgentBatchOutcomes;
|
|
16
|
+
// Agent-delegation pure helpers + concurrent batch fulfillment for the
|
|
17
|
+
// execution-backend driver layer. Carved out of execution-backend.ts
|
|
18
|
+
// (FreeBSD-audit god-module carve) so the driver layer no longer bundles the
|
|
19
|
+
// agent sub-domain's data-transform helpers; the stateful runners
|
|
20
|
+
// (runAgentProcess / runAgentEndpoint) that build refusal/delegated envelopes
|
|
21
|
+
// stay in the parent and import these. The parent re-exports the public surface
|
|
22
|
+
// (stripSecretArgs, AgentSpawnJob, prepareAgentSpawn, runAgentBatchOutcomes) so
|
|
23
|
+
// every importer is byte-unchanged.
|
|
24
|
+
//
|
|
25
|
+
// BEHAVIOR-PRESERVING — pure code movement, zero logic change. Every function
|
|
26
|
+
// here is a pure function of its inputs (request/env/argv → resolved data); none
|
|
27
|
+
// reaches back into the parent's envelope builders, so there is no runtime cycle.
|
|
28
|
+
// Matches the existing router pattern (orchestrator/*-operations.ts,
|
|
29
|
+
// run-registry/derive.ts).
|
|
30
|
+
//
|
|
31
|
+
// agent — the v0.1.38 delegating driver. Spawns an EXTERNAL agent process per
|
|
32
|
+
// worker (claude -p / codex exec / …) argv-style (shell:false), or POSTs the
|
|
33
|
+
// manifest to a configured HTTP agent endpoint. The agent reads the worker
|
|
34
|
+
// input/manifest and writes the worker's result.md out-of-process; CW captures
|
|
35
|
+
// the agent CHILD's command + exit + stdout digest as the canonical evidence
|
|
36
|
+
// triple (NEVER the result.md — that is the separate recordWorkerOutput layer)
|
|
37
|
+
// and records the kind:process handle + agent-reported model in provenance.
|
|
38
|
+
//
|
|
39
|
+
// THE RED LINE: CW spawns the agent and records its attested output. It NEVER
|
|
40
|
+
// imports a model SDK, holds an API key, or constructs a model API request. Any
|
|
41
|
+
// API key flows from the agent's OWN inherited env; CW never reads or records it.
|
|
42
|
+
// The operator-chosen CW_AGENT_MODEL is interpolated into `{{model}}` as policy
|
|
43
|
+
// and recorded ONLY in secret-stripped args — it is NEVER the attested model id.
|
|
44
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
45
|
+
const node_child_process_1 = require("node:child_process");
|
|
46
|
+
const util_1 = require("./util");
|
|
47
|
+
/** Resolve the agent invocation from the request delegation > env. Vendor-neutral;
|
|
48
|
+
* the durable file config is folded in by the drive layer before this point. */
|
|
49
|
+
function resolveAgentInvocation(request) {
|
|
50
|
+
const delegation = request.delegation || {};
|
|
51
|
+
const envCommand = (process.env.CW_AGENT_COMMAND || "").trim();
|
|
52
|
+
const endpoint = delegation.endpoint || (process.env.CW_AGENT_ENDPOINT || "").trim() || undefined;
|
|
53
|
+
const model = delegation.model || (process.env.CW_AGENT_MODEL || "").trim() || undefined;
|
|
54
|
+
// Accept the invocation via delegation (preferred) OR the top-level command/args.
|
|
55
|
+
let binary = delegation.command || request.command || undefined;
|
|
56
|
+
let rawArgs = delegation.args ? [...delegation.args] : request.args ? [...request.args] : [];
|
|
57
|
+
// An env-string command ("claude -p --output-format json {{manifest}}") is split
|
|
58
|
+
// into a binary + discrete argv template — NEVER shell-interpreted.
|
|
59
|
+
if (!binary && envCommand) {
|
|
60
|
+
const parts = envCommand.split(/\s+/).filter(Boolean);
|
|
61
|
+
binary = parts[0];
|
|
62
|
+
if (!delegation.args)
|
|
63
|
+
rawArgs = parts.slice(1);
|
|
64
|
+
}
|
|
65
|
+
else if (binary && !delegation.args && /\s/.test(binary)) {
|
|
66
|
+
const parts = binary.split(/\s+/).filter(Boolean);
|
|
67
|
+
binary = parts[0];
|
|
68
|
+
rawArgs = parts.slice(1);
|
|
69
|
+
}
|
|
70
|
+
return { binary, rawArgs, endpoint, model, timeoutMs: request.timeoutMs };
|
|
71
|
+
}
|
|
72
|
+
const AGENT_SECRET_FLAGS = new Set(["--api-key", "--apikey", "--token", "--key", "--secret", "--password", "--auth", "--bearer"]);
|
|
73
|
+
/** Redact secrets from recorded agent args: a value FOLLOWING a known secret flag,
|
|
74
|
+
* an `--x-key=...` inline value, or a token that LOOKS like a credential. Never
|
|
75
|
+
* record a raw secret in provenance/evidence. Exported so the durable config
|
|
76
|
+
* surface strips the SAME way before persisting/showing a command template. */
|
|
77
|
+
function stripSecretArgs(args) {
|
|
78
|
+
const out = [];
|
|
79
|
+
for (let i = 0; i < args.length; i++) {
|
|
80
|
+
const arg = String(args[i]);
|
|
81
|
+
if (AGENT_SECRET_FLAGS.has(arg.toLowerCase())) {
|
|
82
|
+
out.push(arg);
|
|
83
|
+
if (i + 1 < args.length) {
|
|
84
|
+
out.push("<redacted>");
|
|
85
|
+
i++;
|
|
86
|
+
}
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const inline = arg.match(/^(--?[A-Za-z][\w-]*(?:key|token|secret|password|auth|bearer)[\w-]*)=.*/i);
|
|
90
|
+
if (inline) {
|
|
91
|
+
out.push(`${inline[1]}=<redacted>`);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
// Bare credential-looking token: a known provider prefix, or a long high-entropy
|
|
95
|
+
// run with NO path separators (so file paths / {{...}} substitutions survive as
|
|
96
|
+
// useful provenance). Over-redaction is safe; leaking a key is not.
|
|
97
|
+
if (/^(sk-|ghp_|gho_|github_pat_|xox[abpr]-|Bearer\s)/.test(arg) || (arg.length >= 32 && /^[A-Za-z0-9_\-]{32,}$/.test(arg))) {
|
|
98
|
+
out.push("<redacted>");
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
out.push(arg);
|
|
102
|
+
}
|
|
103
|
+
return out;
|
|
104
|
+
}
|
|
105
|
+
/** Best-effort parse of the AGENT-reported model id from its stdout. SOLELY the
|
|
106
|
+
* agent's own report — `unreported` when absent. Never CW_AGENT_MODEL. */
|
|
107
|
+
function parseAgentReport(stdout) {
|
|
108
|
+
const text = String(stdout || "").trim();
|
|
109
|
+
if (!text)
|
|
110
|
+
return {};
|
|
111
|
+
const tryObj = (value) => {
|
|
112
|
+
try {
|
|
113
|
+
const parsed = JSON.parse(value);
|
|
114
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : undefined;
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return undefined;
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
let obj = tryObj(text);
|
|
121
|
+
if (!obj) {
|
|
122
|
+
const line = text
|
|
123
|
+
.split(/\r?\n/)
|
|
124
|
+
.reverse()
|
|
125
|
+
.find((entry) => entry.trim().startsWith("{") && entry.trim().endsWith("}"));
|
|
126
|
+
if (line)
|
|
127
|
+
obj = tryObj(line.trim());
|
|
128
|
+
}
|
|
129
|
+
if (!obj)
|
|
130
|
+
return {};
|
|
131
|
+
const usage = obj.usage && typeof obj.usage === "object" ? obj.usage : undefined;
|
|
132
|
+
let model = typeof obj.model === "string"
|
|
133
|
+
? obj.model
|
|
134
|
+
: usage && typeof usage.model === "string"
|
|
135
|
+
? usage.model
|
|
136
|
+
: typeof obj.modelId === "string"
|
|
137
|
+
? obj.modelId
|
|
138
|
+
: undefined;
|
|
139
|
+
// Some agents (e.g. `claude -p --output-format json`) report no top-level model;
|
|
140
|
+
// the model id(s) appear as KEYS of a `modelUsage` object. Pick the primary model
|
|
141
|
+
// (the one with the most input tokens). Still SOLELY the agent's own report.
|
|
142
|
+
if (!model && obj.modelUsage && typeof obj.modelUsage === "object" && !Array.isArray(obj.modelUsage)) {
|
|
143
|
+
const entries = Object.entries(obj.modelUsage);
|
|
144
|
+
if (entries.length) {
|
|
145
|
+
const tokensOf = (value) => {
|
|
146
|
+
const record = value && typeof value === "object" ? value : {};
|
|
147
|
+
const input = Number(record.inputTokens ?? record.input_tokens ?? 0);
|
|
148
|
+
return Number.isFinite(input) ? input : 0;
|
|
149
|
+
};
|
|
150
|
+
entries.sort((left, right) => tokensOf(right[1]) - tokensOf(left[1]));
|
|
151
|
+
model = entries[0][0];
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Track 1: the executor's detached signature over its usage report, if it signs.
|
|
155
|
+
// SOLELY the agent's own field — CW verifies it later against the trust key.
|
|
156
|
+
const usageSignature = typeof obj.usageSignature === "string"
|
|
157
|
+
? obj.usageSignature
|
|
158
|
+
: typeof obj.usage_signature === "string"
|
|
159
|
+
? obj.usage_signature
|
|
160
|
+
: undefined;
|
|
161
|
+
return { model, usage, usageSignature };
|
|
162
|
+
}
|
|
163
|
+
function agentSubstitutions(request, model) {
|
|
164
|
+
const manifest = request.manifest;
|
|
165
|
+
const workerDir = manifest?.workerDir || request.cwd || "";
|
|
166
|
+
return {
|
|
167
|
+
manifest: manifest?.manifestPath || (workerDir ? node_path_1.default.join(workerDir, "manifest.json") : ""),
|
|
168
|
+
input: manifest?.inputPath || "",
|
|
169
|
+
result: manifest?.resultPath || "",
|
|
170
|
+
workerDir,
|
|
171
|
+
model: model || "",
|
|
172
|
+
prompt: manifest?.prompt || ""
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
function substituteAgentArg(arg, subst) {
|
|
176
|
+
return arg.replace(/\{\{(\w+)\}\}/g, (_, key) => (key in subst ? subst[key] : `{{${key}}}`));
|
|
177
|
+
}
|
|
178
|
+
/** Build the recorded process handle for the envelope — secret-stripped + the
|
|
179
|
+
* agent-reported model. Same SHAPE that lands in provenance, never in evidence. */
|
|
180
|
+
function recordedAgentHandle(binary, endpoint, recordedArgs, model, reportedModel, reportedUsage, usageSignature) {
|
|
181
|
+
const ref = binary ? [binary, ...recordedArgs].join(" ") : endpoint || "";
|
|
182
|
+
return {
|
|
183
|
+
kind: "process",
|
|
184
|
+
ref,
|
|
185
|
+
endpoint,
|
|
186
|
+
metadata: {
|
|
187
|
+
mode: binary ? "command" : "endpoint",
|
|
188
|
+
command: binary,
|
|
189
|
+
args: recordedArgs,
|
|
190
|
+
model,
|
|
191
|
+
reportedModel,
|
|
192
|
+
// Telemetry thread-back: the agent's OWN self-reported token usage (parsed
|
|
193
|
+
// from its stdout by parseAgentReport). ATTESTED, never measured by CW —
|
|
194
|
+
// same red-line posture as reportedModel. Lands in provenance, never in the
|
|
195
|
+
// byte-stable evidence triple. Absent when the agent reported no usage.
|
|
196
|
+
...(reportedUsage ? { reportedUsage } : {}),
|
|
197
|
+
// Track 1: the executor's detached signature over its usage report. CW
|
|
198
|
+
// verifies it against the operator trust key at output intake.
|
|
199
|
+
...(usageSignature ? { usageSignature } : {})
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
function extractEndpointResult(stdout) {
|
|
204
|
+
const text = String(stdout || "").trim();
|
|
205
|
+
if (!text)
|
|
206
|
+
return undefined;
|
|
207
|
+
try {
|
|
208
|
+
const parsed = JSON.parse(text);
|
|
209
|
+
if (parsed && typeof parsed === "object") {
|
|
210
|
+
if (typeof parsed.result === "string")
|
|
211
|
+
return parsed.result;
|
|
212
|
+
if (typeof parsed.resultMarkdown === "string")
|
|
213
|
+
return parsed.resultMarkdown;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
catch {
|
|
217
|
+
/* not JSON — treat the raw text as the result body */
|
|
218
|
+
return text;
|
|
219
|
+
}
|
|
220
|
+
return undefined;
|
|
221
|
+
}
|
|
222
|
+
function agentHandle(request) {
|
|
223
|
+
// The agent invocation is POLICY-as-DATA, resolved flags(delegation) > env. The
|
|
224
|
+
// handle records ONLY secret-stripped provenance; the raw template is re-resolved
|
|
225
|
+
// inside runAgentProcess for substitution + spawning so no secret ever lands in
|
|
226
|
+
// a recorded handle/evidence entry.
|
|
227
|
+
const resolved = resolveAgentInvocation(request);
|
|
228
|
+
if (!resolved.binary && !resolved.endpoint)
|
|
229
|
+
return undefined;
|
|
230
|
+
const strippedArgs = stripSecretArgs(resolved.rawArgs);
|
|
231
|
+
const ref = resolved.binary ? [resolved.binary, ...strippedArgs].join(" ") : resolved.endpoint || "";
|
|
232
|
+
return {
|
|
233
|
+
kind: "process",
|
|
234
|
+
ref,
|
|
235
|
+
endpoint: resolved.endpoint,
|
|
236
|
+
metadata: {
|
|
237
|
+
mode: resolved.binary ? "command" : "endpoint",
|
|
238
|
+
command: resolved.binary,
|
|
239
|
+
args: strippedArgs,
|
|
240
|
+
model: resolved.model
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
/** Resolve a request to a spawn-style batch job, or undefined when the agent is
|
|
245
|
+
* endpoint-configured/unconfigured (those settle through the serial path). */
|
|
246
|
+
function prepareAgentSpawn(request) {
|
|
247
|
+
const resolved = resolveAgentInvocation(request);
|
|
248
|
+
if (!resolved.binary)
|
|
249
|
+
return undefined;
|
|
250
|
+
const subst = agentSubstitutions(request, resolved.model);
|
|
251
|
+
return {
|
|
252
|
+
binary: resolved.binary,
|
|
253
|
+
args: resolved.rawArgs.map((arg) => substituteAgentArg(arg, subst)),
|
|
254
|
+
cwd: request.cwd,
|
|
255
|
+
timeoutMs: resolved.timeoutMs || 600000
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
// Reads jobs JSON on stdin, spawns ALL concurrently (shell:false, inherited env —
|
|
259
|
+
// the agent's own credentials resolve; CW never reads them), per-job SIGTERM at
|
|
260
|
+
// timeoutMs + SIGKILL at +5s, caps each captured stdout at 32MB, and prints the
|
|
261
|
+
// outcome array when every job has settled. stderr is drained (a full pipe must
|
|
262
|
+
// never wedge a child). A kill yields exitCode null — the no-exit-code refusal.
|
|
263
|
+
const BATCH_DELEGATE_CHILD = `
|
|
264
|
+
const { spawn } = require("node:child_process");
|
|
265
|
+
let raw = "";
|
|
266
|
+
process.stdin.setEncoding("utf8");
|
|
267
|
+
process.stdin.on("data", (d) => (raw += d));
|
|
268
|
+
process.stdin.on("end", () => {
|
|
269
|
+
const jobs = JSON.parse(raw);
|
|
270
|
+
if (!jobs.length) { process.stdout.write("[]"); return; }
|
|
271
|
+
const out = new Array(jobs.length);
|
|
272
|
+
let pending = jobs.length;
|
|
273
|
+
const CAP = 32 * 1024 * 1024;
|
|
274
|
+
jobs.forEach((job, i) => {
|
|
275
|
+
let stdout = "";
|
|
276
|
+
let settled = false;
|
|
277
|
+
const settle = (o) => {
|
|
278
|
+
if (settled) return;
|
|
279
|
+
settled = true;
|
|
280
|
+
out[i] = o;
|
|
281
|
+
if (--pending === 0) process.stdout.write(JSON.stringify(out));
|
|
282
|
+
};
|
|
283
|
+
let child;
|
|
284
|
+
try {
|
|
285
|
+
child = spawn(job.binary, job.args, { cwd: job.cwd, env: process.env, shell: false });
|
|
286
|
+
} catch (error) {
|
|
287
|
+
settle({ spawnError: String((error && error.message) || error), exitCode: null, stdout: "" });
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
const term = setTimeout(() => { try { child.kill("SIGTERM"); } catch {} }, job.timeoutMs);
|
|
291
|
+
const kill = setTimeout(() => { try { child.kill("SIGKILL"); } catch {} }, job.timeoutMs + 5000);
|
|
292
|
+
child.stdout.on("data", (d) => { if (stdout.length < CAP) stdout += d; });
|
|
293
|
+
child.stderr.on("data", () => {});
|
|
294
|
+
child.on("error", (error) => {
|
|
295
|
+
clearTimeout(term); clearTimeout(kill);
|
|
296
|
+
settle({ spawnError: String((error && error.message) || error), exitCode: null, stdout });
|
|
297
|
+
});
|
|
298
|
+
child.on("close", (code) => {
|
|
299
|
+
clearTimeout(term); clearTimeout(kill);
|
|
300
|
+
settle({ exitCode: typeof code === "number" ? code : null, stdout });
|
|
301
|
+
});
|
|
302
|
+
});
|
|
303
|
+
});
|
|
304
|
+
`;
|
|
305
|
+
/** Run a batch of agent spawns concurrently; outcomes index-align with jobs. The
|
|
306
|
+
* parent backstop timeout (max job timeout + 30s) means even a wedged delegate
|
|
307
|
+
* child cannot deadlock the drive: on any batch-level failure EVERY job settles
|
|
308
|
+
* as a fail-closed spawn refusal — never a fabricated completion, never a hang. */
|
|
309
|
+
function runAgentBatchOutcomes(jobs) {
|
|
310
|
+
if (!jobs.length)
|
|
311
|
+
return [];
|
|
312
|
+
const maxTimeout = Math.max(...jobs.map((job) => job.timeoutMs));
|
|
313
|
+
const child = (0, node_child_process_1.spawnSync)(process.execPath, ["-e", BATCH_DELEGATE_CHILD], {
|
|
314
|
+
input: JSON.stringify(jobs),
|
|
315
|
+
encoding: "utf8",
|
|
316
|
+
maxBuffer: 33 * 1024 * 1024 * jobs.length,
|
|
317
|
+
timeout: maxTimeout + 30000
|
|
318
|
+
});
|
|
319
|
+
if (!child.error && typeof child.status === "number" && child.status === 0) {
|
|
320
|
+
try {
|
|
321
|
+
const parsed = JSON.parse(String(child.stdout || ""));
|
|
322
|
+
if (Array.isArray(parsed) && parsed.length === jobs.length)
|
|
323
|
+
return parsed;
|
|
324
|
+
}
|
|
325
|
+
catch {
|
|
326
|
+
// fall through to the fail-closed mapping below
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
const reason = child.error ? (0, util_1.messageOf)(child.error) : `batch delegate exited ${child.status === null ? "without an exit code (timed out or killed)" : `with ${child.status}`}`;
|
|
330
|
+
return jobs.map(() => ({ spawnError: `batch delegate failed: ${reason}`, exitCode: null, stdout: "" }));
|
|
331
|
+
}
|