ace-swarm 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/assets/.agents/ACE/AGENT_REGISTRY.md +7 -1
  2. package/assets/.agents/ACE/agent-eval/instructions.md +41 -1
  3. package/assets/.agents/ACE/agent-memory/instructions.md +35 -1
  4. package/assets/.agents/ACE/agent-observability/instructions.md +35 -1
  5. package/assets/.agents/ACE/agent-release/instructions.md +34 -1
  6. package/assets/.agents/ACE/agent-security/instructions.md +35 -1
  7. package/assets/.agents/ACE/agent-skeptic/instructions.md +49 -0
  8. package/assets/.agents/ACE/orchestrator/AGENTS.md +11 -0
  9. package/assets/agent-state/ACE_WORKFLOW.md +65 -0
  10. package/assets/agent-state/INTERFACE_REGISTRY.md +1 -0
  11. package/assets/agent-state/MODULES/schemas/ACE_RUNTIME_PROFILE.schema.json +79 -0
  12. package/assets/scripts/copilot-hook-dispatch.mjs +39 -1
  13. package/assets/tasks/README.md +26 -0
  14. package/dist/ace-autonomy.d.ts +137 -0
  15. package/dist/ace-autonomy.d.ts.map +1 -0
  16. package/dist/ace-autonomy.js +472 -0
  17. package/dist/ace-autonomy.js.map +1 -0
  18. package/dist/agent-runtime/role-adapters.d.ts.map +1 -1
  19. package/dist/agent-runtime/role-adapters.js +47 -6
  20. package/dist/agent-runtime/role-adapters.js.map +1 -1
  21. package/dist/prompts.d.ts.map +1 -1
  22. package/dist/prompts.js +101 -0
  23. package/dist/prompts.js.map +1 -1
  24. package/dist/public-surface.d.ts.map +1 -1
  25. package/dist/public-surface.js +6 -0
  26. package/dist/public-surface.js.map +1 -1
  27. package/dist/resources.d.ts.map +1 -1
  28. package/dist/resources.js +29 -0
  29. package/dist/resources.js.map +1 -1
  30. package/dist/runtime-executor.d.ts.map +1 -1
  31. package/dist/runtime-executor.js +158 -0
  32. package/dist/runtime-executor.js.map +1 -1
  33. package/dist/runtime-profile.d.ts +18 -0
  34. package/dist/runtime-profile.d.ts.map +1 -1
  35. package/dist/runtime-profile.js +39 -3
  36. package/dist/runtime-profile.js.map +1 -1
  37. package/dist/shared.d.ts.map +1 -1
  38. package/dist/shared.js +1 -0
  39. package/dist/shared.js.map +1 -1
  40. package/dist/tools-framework.d.ts.map +1 -1
  41. package/dist/tools-framework.js +366 -128
  42. package/dist/tools-framework.js.map +1 -1
  43. package/dist/tools-memory.d.ts.map +1 -1
  44. package/dist/tools-memory.js +80 -0
  45. package/dist/tools-memory.js.map +1 -1
  46. package/dist/workspace-manager.d.ts.map +1 -1
  47. package/dist/workspace-manager.js +13 -2
  48. package/dist/workspace-manager.js.map +1 -1
  49. package/package.json +1 -1
@@ -3,7 +3,7 @@
3
3
  * plus new run_tests and validate_framework tools.
4
4
  */
5
5
  import { z } from "zod";
6
- import { ALL_MCP_CLIENTS, ALL_AGENTS, COMPOSABLE_AGENTS, SWARM_AGENTS, SWARM_SUBAGENT_MAP, bootstrapAceWorkspace, classifyPathSource, detectAssetDrift, getAllMcpServerConfigSnippets, getAgentInstructionPath, getAgentManifestPath, getKernelArtifactPath, getMcpClientInstallHint, getMcpServerConfigSnippet, getTaskArtifactPath, isSwarmRole, listAvailableSkills, normalizePathForValidation, safeRead, wsPath, } from "./helpers.js";
6
+ import { ALL_MCP_CLIENTS, ALL_AGENTS, COMPOSABLE_AGENTS, SWARM_AGENTS, SWARM_SUBAGENT_MAP, bootstrapAceWorkspace, classifyPathSource, detectAssetDrift, getAllMcpServerConfigSnippets, getAgentInstructionPath, getAgentManifestPath, getKernelArtifactPath, getMcpClientInstallHint, getMcpServerConfigSnippet, getTaskArtifactPath, isSwarmRole, listAvailableSkills, normalizePathForValidation, safeRead, safeWrite, withFileLock, wsPath, } from "./helpers.js";
7
7
  import { getRoleTitle, MCP_CLIENT_ENUM, scoreDomains, } from "./shared.js";
8
8
  import { refreshAstgrepIndex } from "./astgrep-index.js";
9
9
  import { scanWorkspaceDelta } from "./index-store.js";
@@ -17,6 +17,7 @@ import { existsSync, readdirSync, readFileSync } from "node:fs";
17
17
  import { resolve } from "node:path";
18
18
  import { auditPublicSurface } from "./public-surface.js";
19
19
  import { PROVENANCE_CRITICAL_EVENT_TYPES, validateArtifactManifestPayload, validateProvenanceLogContent, validateTealConfigContent, } from "./schemas.js";
20
+ import { readAceTaskContractAssessment } from "./ace-autonomy.js";
20
21
  function getArtifactManifestEntries(payload) {
21
22
  if (!payload || typeof payload !== "object" || Array.isArray(payload))
22
23
  return [];
@@ -35,6 +36,232 @@ function getArtifactManifestEntries(payload) {
35
36
  }
36
37
  return [];
37
38
  }
39
+ function readGateManifests(gatesDir) {
40
+ const files = readdirSync(gatesDir).filter((f) => f.endsWith(".json"));
41
+ const allGates = [];
42
+ for (const file of files) {
43
+ try {
44
+ const raw = readFileSync(resolve(gatesDir, file), "utf-8");
45
+ const gate = JSON.parse(raw);
46
+ if (gate.id)
47
+ allGates.push(gate);
48
+ }
49
+ catch {
50
+ /* skip corrupt manifests */
51
+ }
52
+ }
53
+ return allGates;
54
+ }
55
+ function evaluateGateTargets(targets) {
56
+ const results = [];
57
+ for (const gate of targets) {
58
+ if (gate.type === "executable" && gate.command.trim().length > 0) {
59
+ const testResult = runTestSuite(gate.command);
60
+ results.push({
61
+ id: gate.id,
62
+ type: gate.type,
63
+ ok: testResult.ok,
64
+ detail: testResult.ok
65
+ ? `PASS (exit ${testResult.exit_code}, ${testResult.duration_ms}ms)`
66
+ : `FAIL (exit ${testResult.exit_code}): ${testResult.output.slice(0, 500)}`,
67
+ invariant: gate.invariant,
68
+ evidence_requirement: gate.evidence_requirement,
69
+ });
70
+ continue;
71
+ }
72
+ if (gate.type === "artifact_scan") {
73
+ const evidenceReq = gate.evidence_requirement.trim();
74
+ const looksLikeProse = evidenceReq.includes(" ") &&
75
+ !/\.(md|json|ts|js|yaml|yml|ndjson|txt)\b/.test(evidenceReq) &&
76
+ !evidenceReq.includes("/");
77
+ const missing = [];
78
+ if (looksLikeProse) {
79
+ const knownArtifacts = [
80
+ "STATUS.md",
81
+ "EVIDENCE_LOG.md",
82
+ "HANDOFF.json",
83
+ "DECISIONS.md",
84
+ "RISKS.md",
85
+ "SCOPE.md",
86
+ "TASK.md",
87
+ "QUALITY_GATES.md",
88
+ "SPEC_CONTRACT.json",
89
+ "TEAL_CONFIG.md",
90
+ ];
91
+ const invariantLower = gate.invariant.toLowerCase();
92
+ const relevantArtifacts = knownArtifacts.filter((artifact) => invariantLower.includes(artifact.replace(/\.\w+$/, "").toLowerCase().replace(/_/g, " ")) || invariantLower.includes(artifact.toLowerCase()));
93
+ if (relevantArtifacts.length === 0) {
94
+ missing.push(`(prose evidence cannot be auto-verified: "${evidenceReq.slice(0, 80)}")`);
95
+ }
96
+ else {
97
+ for (const artifact of relevantArtifacts) {
98
+ if (!existsSync(wsPath("agent-state", artifact))) {
99
+ missing.push(artifact);
100
+ }
101
+ }
102
+ }
103
+ }
104
+ else {
105
+ const evidenceFiles = evidenceReq
106
+ .split(/[+,&]/)
107
+ .map((s) => s.trim())
108
+ .filter(Boolean);
109
+ for (const ref of evidenceFiles) {
110
+ const candidates = [wsPath("agent-state", ref), wsPath(ref)];
111
+ const found = candidates.some((candidate) => existsSync(candidate));
112
+ if (!found) {
113
+ const asFile = ref.replace(/\s+/g, "_").replace(/[^a-zA-Z0-9_./-]/g, "");
114
+ const fileFound = existsSync(wsPath("agent-state", asFile));
115
+ if (!fileFound)
116
+ missing.push(ref);
117
+ }
118
+ }
119
+ }
120
+ results.push({
121
+ id: gate.id,
122
+ type: gate.type,
123
+ ok: missing.length === 0,
124
+ detail: missing.length === 0
125
+ ? "PASS (all evidence artifacts present)"
126
+ : `FAIL (missing evidence: ${missing.join(", ")})`,
127
+ invariant: gate.invariant,
128
+ evidence_requirement: gate.evidence_requirement,
129
+ });
130
+ continue;
131
+ }
132
+ if (gate.type === "manual_review") {
133
+ results.push({
134
+ id: gate.id,
135
+ type: gate.type,
136
+ ok: false,
137
+ detail: "PENDING — requires manual review (not auto-enforceable, blocking until reviewed)",
138
+ invariant: gate.invariant,
139
+ evidence_requirement: gate.evidence_requirement,
140
+ });
141
+ continue;
142
+ }
143
+ results.push({
144
+ id: gate.id,
145
+ type: gate.type ?? "unknown",
146
+ ok: false,
147
+ detail: "Unknown gate type or no command specified",
148
+ invariant: gate.invariant,
149
+ evidence_requirement: gate.evidence_requirement,
150
+ });
151
+ }
152
+ return results;
153
+ }
154
+ function buildAdversarialClaim(result) {
155
+ switch (result.type) {
156
+ case "executable":
157
+ return `Executable gate ${result.id} failed its invariant check.`;
158
+ case "artifact_scan":
159
+ return `Artifact evidence for gate ${result.id} is not sufficient to trust the invariant.`;
160
+ case "manual_review":
161
+ return `Gate ${result.id} still depends on human review before the invariant can be trusted.`;
162
+ default:
163
+ return `Gate ${result.id} could not be verified cleanly.`;
164
+ }
165
+ }
166
+ function inferRouteHint(result) {
167
+ if (result.type === "executable")
168
+ return "agent-builder";
169
+ if (result.type === "artifact_scan" &&
170
+ result.detail.toLowerCase().includes("prose evidence cannot be auto-verified")) {
171
+ return "agent-spec";
172
+ }
173
+ if (result.type === "artifact_scan")
174
+ return "agent-ops";
175
+ if (result.type === "manual_review")
176
+ return "agent-skeptic";
177
+ return undefined;
178
+ }
179
+ function buildAdversarialReview(results, reviewFocus) {
180
+ const candidates = results
181
+ .filter((result) => !result.ok)
182
+ .map((result) => ({
183
+ gate_id: result.id,
184
+ gate_type: result.type,
185
+ claim: buildAdversarialClaim(result),
186
+ detail: result.detail,
187
+ invariant: result.invariant,
188
+ evidence_requirement: result.evidence_requirement,
189
+ route_hint: inferRouteHint(result),
190
+ }));
191
+ const disproved = [];
192
+ const confirmed = [];
193
+ for (const finding of candidates) {
194
+ if (finding.gate_type === "manual_review") {
195
+ disproved.push({
196
+ ...finding,
197
+ reason: "Pending manual review alone is not enough to confirm an actionable defect.",
198
+ });
199
+ continue;
200
+ }
201
+ if (finding.gate_type === "artifact_scan" &&
202
+ finding.detail.toLowerCase().includes("prose evidence cannot be auto-verified")) {
203
+ disproved.push({
204
+ ...finding,
205
+ reason: "The evidence contract is too vague to confirm a defect without stronger artifacts.",
206
+ });
207
+ continue;
208
+ }
209
+ confirmed.push(finding);
210
+ }
211
+ return {
212
+ mode: "skeptic_adversarial",
213
+ focus: reviewFocus?.trim() || undefined,
214
+ candidates,
215
+ disproved,
216
+ confirmed,
217
+ summary: {
218
+ candidate_count: candidates.length,
219
+ disproved_count: disproved.length,
220
+ confirmed_count: confirmed.length,
221
+ actionable_gate_ids: confirmed.map((finding) => finding.gate_id),
222
+ },
223
+ };
224
+ }
225
+ async function appendEvidenceLogEntrySafe(review) {
226
+ const timestamp = new Date().toISOString();
227
+ const anchor = `#ts:${timestamp}`;
228
+ const evidenceRef = `agent-state/EVIDENCE_LOG.md${anchor}`;
229
+ return withFileLock("agent-state/EVIDENCE_LOG.md", () => {
230
+ const existing = safeRead("agent-state/EVIDENCE_LOG.md");
231
+ const seed = existing.startsWith("[FILE NOT FOUND]") || existing.startsWith("[ACCESS DENIED]")
232
+ ? "# EVIDENCE LOG\n\nAppend-only validation evidence.\n\n## Entries\n"
233
+ : existing.trimEnd();
234
+ const lines = [
235
+ "",
236
+ `[${timestamp}] Skeptic adversarial review via execute_gates`,
237
+ `- evidence_ref: ${anchor}`,
238
+ ...(review.focus ? [`- focus: ${review.focus}`] : []),
239
+ `- candidates: ${review.summary.candidate_count}`,
240
+ `- disproved: ${review.summary.disproved_count}`,
241
+ `- confirmed: ${review.summary.confirmed_count}`,
242
+ `- actionable_gate_ids: ${review.summary.actionable_gate_ids.length > 0
243
+ ? review.summary.actionable_gate_ids.join(", ")
244
+ : "none"}`,
245
+ "- bug_hunter_candidates:",
246
+ ...(review.candidates.length > 0
247
+ ? review.candidates.map((finding) => ` - ${finding.gate_id} [${finding.gate_type}]: ${finding.claim} (${finding.detail})`)
248
+ : [" - none"]),
249
+ "- disprover_results:",
250
+ ...(review.disproved.length > 0
251
+ ? review.disproved.map((finding) => ` - ${finding.gate_id}: ${finding.reason ?? "candidate rejected"}`)
252
+ : [" - none"]),
253
+ "- adjudicator_confirmed:",
254
+ ...(review.confirmed.length > 0
255
+ ? review.confirmed.map((finding) => {
256
+ const route = finding.route_hint ? ` route_hint=${finding.route_hint}` : "";
257
+ return ` - ${finding.gate_id}: ${finding.detail}.${route}`;
258
+ })
259
+ : [" - none"]),
260
+ ];
261
+ const path = safeWrite("agent-state/EVIDENCE_LOG.md", `${seed}${lines.join("\n")}\n`);
262
+ return { path, evidenceRef };
263
+ });
264
+ }
38
265
  export function registerFrameworkTools(server) {
39
266
  // ── Routing (improved with weighted scoring) ──────────────────────
40
267
  server.tool("route_task", "Determine which ACE agent(s) or skill should handle a task", {
@@ -129,6 +356,14 @@ export function registerFrameworkTools(server) {
129
356
  const route = routingMap[detected] ?? routingMap.mixed;
130
357
  const recommendedSkills = new Set(route.skills ?? []);
131
358
  const recommendedSubagents = new Set(route.subagents);
359
+ const taskContract = readAceTaskContractAssessment();
360
+ const stateConflictRequested = text.includes("contradict") ||
361
+ text.includes("conflict") ||
362
+ text.includes("misalign") ||
363
+ text.includes("drift") ||
364
+ text.includes("blocked");
365
+ const requiresGovernance = detected === "mixed" || !taskContract.ok || stateConflictRequested;
366
+ const activeRoute = requiresGovernance ? routingMap.mixed : route;
132
367
  if (text.includes("schema") || text.includes("contract")) {
133
368
  recommendedSubagents.add("spec");
134
369
  recommendedSkills.add("schema-forge");
@@ -232,6 +467,11 @@ export function registerFrameworkTools(server) {
232
467
  text.includes("fix")) {
233
468
  recommendedSubagents.add("builder");
234
469
  }
470
+ if (requiresGovernance) {
471
+ recommendedSubagents.add("skeptic");
472
+ recommendedSubagents.add("ops");
473
+ recommendedSubagents.add("memory");
474
+ }
235
475
  return {
236
476
  content: [
237
477
  {
@@ -241,22 +481,38 @@ export function registerFrameworkTools(server) {
241
481
  "",
242
482
  `**Task:** ${description}`,
243
483
  `**Detected Domain:** ${detected}`,
244
- `**Primary Swarm Agent(s):** ${route.swarm_agents
484
+ `**Primary Swarm Agent(s):** ${activeRoute.swarm_agents
245
485
  .map((agent) => `ACE-${getRoleTitle(agent)}`)
246
486
  .join(", ")}`,
487
+ "**Hierarchy Rule:** Top-level routing stays locked to ACE-Orchestrator, ACE-VOS, ACE-UI, or ACE-Coders. Composable agents are delegated specialists, not peer replacements.",
488
+ `**Preflight Owner:** ACE-Orchestrator`,
489
+ `**Task Contract:** ${taskContract.ok ? "aligned" : "attention required"}`,
247
490
  `**Composable Subagents (Universal):** ${[...COMPOSABLE_AGENTS].join(", ")}`,
248
- `**Recommended Subagents:** ${[...recommendedSubagents].join(", ")}`,
249
- `**Pipeline:** ${route.pipeline}`,
250
- `**Recommended Prompt:** ${route.prompt}`,
491
+ `**Recommended Delegated Subagents:** ${[...recommendedSubagents].join(", ")}`,
492
+ `**Pipeline:** ${activeRoute.pipeline}`,
493
+ `**Recommended Prompt:** ${activeRoute.prompt}`,
251
494
  recommendedSkills.size > 0
252
495
  ? `**Recommended Skills:** ${[...recommendedSkills].join(", ")}`
253
496
  : "",
497
+ ...(!taskContract.ok
498
+ ? [
499
+ "",
500
+ "## Preflight Risks",
501
+ ...taskContract.files.map((entry) => `- ${entry.path}: ${entry.valid ? "ok" : entry.note}`),
502
+ ...taskContract.blockers.map((entry) => `- blocker: ${entry}`),
503
+ ]
504
+ : []),
505
+ "",
506
+ "## Preflight Expectations",
507
+ "1. Recall current ACE context from TASK, SCOPE, QUALITY_GATES, STATUS, HANDOFF, and EVIDENCE artifacts.",
508
+ "2. Validate the ACE quartet before dispatching substantial work.",
509
+ "3. Route ambiguity or contract drift through skeptic/validation before handoff.",
254
510
  "",
255
511
  "## Next Steps",
256
- `1. Activate prompt: \`${route.prompt}\``,
257
- "2. Load shared kernel context with `get_kernel_artifact` (`directive_kernel`, `agent_registry`)",
258
- "3. Generate a structured handoff with `create_handoff` when cross-role routing is needed",
259
- "4. Load role task pack with `get_task_pack`",
512
+ `1. Activate prompt: \`${activeRoute.prompt}\``,
513
+ "2. Run `validate_framework` and inspect current ACE state before cross-role dispatch.",
514
+ "3. Load shared kernel context with `get_kernel_artifact` (`directive_kernel`, `agent_registry`)",
515
+ "4. Generate a structured handoff with `create_handoff` only after preflight clears",
260
516
  ]
261
517
  .filter(Boolean)
262
518
  .join("\n"),
@@ -452,6 +708,7 @@ export function registerFrameworkTools(server) {
452
708
  "agent-state/TASK.md",
453
709
  "agent-state/STATUS.md",
454
710
  "agent-state/SCOPE.md",
711
+ "agent-state/QUALITY_GATES.md",
455
712
  "agent-state/EVIDENCE_LOG.md",
456
713
  "agent-state/DECISIONS.md",
457
714
  "agent-state/RISKS.md",
@@ -640,6 +897,10 @@ export function registerFrameworkTools(server) {
640
897
  const checks = [];
641
898
  // Check: required state files exist
642
899
  const requiredFiles = [
900
+ "agent-state/TASK.md",
901
+ "agent-state/SCOPE.md",
902
+ "agent-state/QUALITY_GATES.md",
903
+ "agent-state/HANDOFF.json",
643
904
  "agent-state/STATUS.md",
644
905
  "agent-state/EVIDENCE_LOG.md",
645
906
  "agent-state/DECISIONS.md",
@@ -814,6 +1075,17 @@ export function registerFrameworkTools(server) {
814
1075
  : `${missingLinks.length} manifest entries missing provenance links in PROVENANCE_LOG.md`,
815
1076
  });
816
1077
  }
1078
+ const taskContract = readAceTaskContractAssessment();
1079
+ checks.push({
1080
+ name: "task-contract:quartet",
1081
+ ok: taskContract.ok,
1082
+ detail: taskContract.ok
1083
+ ? taskContract.summary
1084
+ : [
1085
+ ...taskContract.files.map((entry) => `${entry.path}=${entry.valid ? "ok" : entry.note}`),
1086
+ ...taskContract.blockers,
1087
+ ].join("; "),
1088
+ });
817
1089
  // Check: git status
818
1090
  checks.push({
819
1091
  name: "git-repo",
@@ -1002,7 +1274,15 @@ export function registerFrameworkTools(server) {
1002
1274
  .array(z.string())
1003
1275
  .optional()
1004
1276
  .describe("Specific gate IDs to run (e.g. ['gate-correctness']). Omit to run all registered gates."),
1005
- }, async ({ gate_ids }) => {
1277
+ review_mode: z
1278
+ .enum(["skeptic_adversarial"])
1279
+ .optional()
1280
+ .describe("Optional skeptic review overlay that keeps only surviving findings actionable."),
1281
+ review_focus: z
1282
+ .string()
1283
+ .optional()
1284
+ .describe("Optional short focus string persisted with skeptic adversarial review evidence."),
1285
+ }, async ({ gate_ids, review_mode, review_focus }) => {
1006
1286
  const gatesDir = wsPath("agent-state", "MODULES", "gates");
1007
1287
  if (!existsSync(gatesDir)) {
1008
1288
  return {
@@ -1014,19 +1294,7 @@ export function registerFrameworkTools(server) {
1014
1294
  ],
1015
1295
  };
1016
1296
  }
1017
- const files = readdirSync(gatesDir).filter((f) => f.endsWith(".json"));
1018
- const allGates = [];
1019
- for (const file of files) {
1020
- try {
1021
- const raw = readFileSync(resolve(gatesDir, file), "utf-8");
1022
- const gate = JSON.parse(raw);
1023
- if (gate.id)
1024
- allGates.push(gate);
1025
- }
1026
- catch {
1027
- /* skip corrupt manifests */
1028
- }
1029
- }
1297
+ const allGates = readGateManifests(gatesDir);
1030
1298
  // Filter to requested gates (or run all)
1031
1299
  const targets = gate_ids
1032
1300
  ? allGates.filter((g) => gate_ids.includes(g.id))
@@ -1041,110 +1309,24 @@ export function registerFrameworkTools(server) {
1041
1309
  ],
1042
1310
  };
1043
1311
  }
1044
- const results = [];
1045
- for (const gate of targets) {
1046
- if (gate.type === "executable" && gate.command.trim().length > 0) {
1047
- // Run the command
1048
- const testResult = runTestSuite(gate.command);
1049
- results.push({
1050
- id: gate.id,
1051
- type: gate.type,
1052
- ok: testResult.ok,
1053
- detail: testResult.ok
1054
- ? `PASS (exit ${testResult.exit_code}, ${testResult.duration_ms}ms)`
1055
- : `FAIL (exit ${testResult.exit_code}): ${testResult.output.slice(0, 500)}`,
1056
- });
1057
- }
1058
- else if (gate.type === "artifact_scan") {
1059
- // Scan evidence requirement against existing files.
1060
- // Guard: if the evidence_requirement looks like prose (contains spaces
1061
- // and no path separators or file extensions), skip path-splitting and
1062
- // instead verify that the invariant-referenced state artifacts exist.
1063
- const evidenceReq = gate.evidence_requirement.trim();
1064
- const looksLikeProse = evidenceReq.includes(" ") &&
1065
- !/\.(md|json|ts|js|yaml|yml|ndjson|txt)\b/.test(evidenceReq) &&
1066
- !evidenceReq.includes("/");
1067
- let missing = [];
1068
- if (looksLikeProse) {
1069
- // Prose evidence — extract known artifact names from the invariant
1070
- // instead of blindly splitting prose into file paths.
1071
- const knownArtifacts = [
1072
- "STATUS.md", "EVIDENCE_LOG.md", "HANDOFF.json", "DECISIONS.md",
1073
- "RISKS.md", "SCOPE.md", "TASK.md", "QUALITY_GATES.md",
1074
- "SPEC_CONTRACT.json", "TEAL_CONFIG.md",
1075
- ];
1076
- // Check that at least some key artifacts referenced in the invariant exist
1077
- const invariantLower = gate.invariant.toLowerCase();
1078
- const relevantArtifacts = knownArtifacts.filter((a) => invariantLower.includes(a.replace(/\.\w+$/, "").toLowerCase().replace(/_/g, " "))
1079
- || invariantLower.includes(a.toLowerCase()));
1080
- if (relevantArtifacts.length === 0) {
1081
- // No specific artifacts detectable from invariant; treat as insufficient evidence
1082
- missing.push(`(prose evidence cannot be auto-verified: "${evidenceReq.slice(0, 80)}")`);
1083
- }
1084
- else {
1085
- for (const artifact of relevantArtifacts) {
1086
- if (!existsSync(wsPath("agent-state", artifact))) {
1087
- missing.push(artifact);
1088
- }
1089
- }
1090
- }
1091
- }
1092
- else {
1093
- // Structured evidence — split by delimiters and look up as file paths
1094
- const evidenceFiles = evidenceReq
1095
- .split(/[+,&]/)
1096
- .map((s) => s.trim())
1097
- .filter(Boolean);
1098
- for (const ref of evidenceFiles) {
1099
- const candidates = [
1100
- wsPath("agent-state", ref),
1101
- wsPath(ref),
1102
- ];
1103
- const found = candidates.some((c) => existsSync(c));
1104
- if (!found) {
1105
- const asFile = ref.replace(/\s+/g, "_").replace(/[^a-zA-Z0-9_./-]/g, "");
1106
- const fileFound = existsSync(wsPath("agent-state", asFile));
1107
- if (!fileFound)
1108
- missing.push(ref);
1109
- }
1110
- }
1111
- }
1112
- results.push({
1113
- id: gate.id,
1114
- type: gate.type,
1115
- ok: missing.length === 0,
1116
- detail: missing.length === 0
1117
- ? "PASS (all evidence artifacts present)"
1118
- : `FAIL (missing evidence: ${missing.join(", ")})`,
1119
- });
1120
- }
1121
- else if (gate.type === "manual_review") {
1122
- // Manual review gates must NOT auto-pass — they are explicitly deferred
1123
- // and should be surfaced as pending rather than silently passed.
1124
- results.push({
1125
- id: gate.id,
1126
- type: gate.type,
1127
- ok: false,
1128
- detail: "PENDING — requires manual review (not auto-enforceable, blocking until reviewed)",
1129
- });
1130
- }
1131
- else {
1132
- results.push({
1133
- id: gate.id,
1134
- type: gate.type ?? "unknown",
1135
- ok: false,
1136
- detail: "Unknown gate type or no command specified",
1137
- });
1138
- }
1139
- }
1312
+ const results = evaluateGateTargets(targets);
1140
1313
  const passed = results.filter((r) => r.ok).length;
1141
1314
  const failed = results.filter((r) => !r.ok).length;
1142
- const allOk = failed === 0;
1315
+ const review = review_mode === "skeptic_adversarial"
1316
+ ? buildAdversarialReview(results, review_focus)
1317
+ : undefined;
1318
+ const evidence = review_mode === "skeptic_adversarial" && review
1319
+ ? await appendEvidenceLogEntrySafe(review)
1320
+ : undefined;
1321
+ const blockingFailures = review ? review.summary.confirmed_count : failed;
1322
+ const allOk = blockingFailures === 0;
1143
1323
  await appendStatusEventSafe({
1144
1324
  source_module: "capability-framework",
1145
1325
  event_type: "GATES_EXECUTED",
1146
1326
  status: allOk ? "pass" : "fail",
1147
- summary: `Gates executed: ${passed}/${results.length} passed`,
1327
+ summary: review
1328
+ ? `Gates executed with skeptic review: ${passed}/${results.length} passed, ${review.summary.confirmed_count} confirmed findings`
1329
+ : `Gates executed: ${passed}/${results.length} passed`,
1148
1330
  payload: {
1149
1331
  gates_run: results.map((r) => r.id),
1150
1332
  gate_results: results.map((r) => ({
@@ -1154,27 +1336,83 @@ export function registerFrameworkTools(server) {
1154
1336
  })),
1155
1337
  passed,
1156
1338
  failed,
1157
- evidence_ref: "agent-state/MODULES/gates/",
1339
+ evidence_ref: evidence?.evidenceRef ?? "agent-state/MODULES/gates/",
1340
+ ...(review
1341
+ ? {
1342
+ review: {
1343
+ mode: review.mode,
1344
+ focus: review.focus,
1345
+ candidate_count: review.summary.candidate_count,
1346
+ disproved_count: review.summary.disproved_count,
1347
+ confirmed_count: review.summary.confirmed_count,
1348
+ actionable_gate_ids: review.summary.actionable_gate_ids,
1349
+ candidates: review.candidates,
1350
+ disproved: review.disproved,
1351
+ confirmed: review.confirmed,
1352
+ evidence_ref: evidence?.evidenceRef,
1353
+ },
1354
+ }
1355
+ : {}),
1158
1356
  },
1159
1357
  objective_id: "gate-execution",
1160
1358
  });
1161
1359
  const ledger = await appendRunLedgerEntrySafe({
1162
1360
  tool: "execute_gates",
1163
1361
  category: allOk ? "info" : "regression",
1164
- message: `Gate execution: ${passed}/${results.length} passed`,
1165
- artifacts: results.map((r) => `agent-state/MODULES/gates/${r.id}.json`),
1166
- metadata: { passed, failed, gate_ids: results.map((r) => r.id) },
1362
+ message: review
1363
+ ? `Gate execution with skeptic review: ${review.summary.confirmed_count} confirmed findings`
1364
+ : `Gate execution: ${passed}/${results.length} passed`,
1365
+ artifacts: [
1366
+ ...results.map((r) => `agent-state/MODULES/gates/${r.id}.json`),
1367
+ ...(evidence ? ["agent-state/EVIDENCE_LOG.md"] : []),
1368
+ ],
1369
+ metadata: {
1370
+ passed,
1371
+ failed,
1372
+ gate_ids: results.map((r) => r.id),
1373
+ ...(review
1374
+ ? {
1375
+ review_mode: review.mode,
1376
+ review_focus: review.focus,
1377
+ candidate_count: review.summary.candidate_count,
1378
+ disproved_count: review.summary.disproved_count,
1379
+ confirmed_count: review.summary.confirmed_count,
1380
+ actionable_gate_ids: review.summary.actionable_gate_ids,
1381
+ evidence_ref: evidence?.evidenceRef,
1382
+ }
1383
+ : {}),
1384
+ },
1167
1385
  });
1168
1386
  return {
1169
1387
  content: [
1170
1388
  {
1171
1389
  type: "text",
1172
1390
  text: [
1173
- allOk
1174
- ? `✅ All gates passed: ${passed}/${results.length}`
1175
- : `❌ Gate failures: ${failed}/${results.length} failed`,
1391
+ review
1392
+ ? allOk
1393
+ ? `✅ Skeptic adversarial review cleared: ${review.summary.confirmed_count} confirmed findings`
1394
+ : `❌ Skeptic adversarial review found ${review.summary.confirmed_count} confirmed finding${review.summary.confirmed_count === 1 ? "" : "s"}`
1395
+ : allOk
1396
+ ? `✅ All gates passed: ${passed}/${results.length}`
1397
+ : `❌ Gate failures: ${failed}/${results.length} failed`,
1176
1398
  `Run ledger: ${ledger.path} (${ledger.entry.id})`,
1399
+ ...(evidence ? [`Evidence: ${evidence.path} (${evidence.evidenceRef})`] : []),
1177
1400
  "",
1401
+ ...(review
1402
+ ? [
1403
+ `Review mode: ${review.mode}`,
1404
+ ...(review.focus ? [`Review focus: ${review.focus}`] : []),
1405
+ `- bug-hunter candidates: ${review.summary.candidate_count}`,
1406
+ `- disproved candidates: ${review.summary.disproved_count}`,
1407
+ `- confirmed findings: ${review.summary.confirmed_count}`,
1408
+ ...review.disproved.map((finding) => `- disproved ${finding.gate_id}: ${finding.reason ?? "candidate rejected"}`),
1409
+ ...review.confirmed.map((finding) => {
1410
+ const route = finding.route_hint ? ` (route ${finding.route_hint})` : "";
1411
+ return `- confirmed ${finding.gate_id}: ${finding.detail}${route}`;
1412
+ }),
1413
+ "",
1414
+ ]
1415
+ : []),
1178
1416
  ...results.map((r) => `- ${r.ok ? "✅" : "❌"} ${r.id} [${r.type}]: ${r.detail}`),
1179
1417
  ].join("\n"),
1180
1418
  },