npm - @h9-foundry/agentforge-cli - Versions diffs - 0.7.0 → 0.8.0 - Mend

@h9-foundry/agentforge-cli 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/.tsbuildinfo +1 -1
package/dist/bin.js +49 -1
package/dist/bin.js.map +1 -1
package/dist/index.d.ts +34 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1141 -4
package/dist/index.js.map +1 -1
package/dist/internal/builtin-agents.d.ts.map +1 -1
package/dist/internal/builtin-agents.js +1372 -196
package/dist/internal/builtin-agents.js.map +1 -1
package/package.json +8 -8

package/dist/index.js CHANGED Viewed

@@ -1,11 +1,12 @@
 import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
+import { execFileSync } from "node:child_process";
 import yaml from "js-yaml";
-import { renderAuditBundleMarkdown } from "@h9-foundry/agentforge-audit";
+import { buildAuditBundle, createAuditEntry, renderAuditBundleMarkdown } from "@h9-foundry/agentforge-audit";
 import { createWorkflowState, findWorkspaceRoot } from "@h9-foundry/agentforge-context-engine";
 import { createPolicyEngine, loadPolicyDocument, resolvePolicy } from "@h9-foundry/agentforge-policy-engine";
 import { runWorkflow } from "@h9-foundry/agentforge-runtime";
-import { agentforgeConfigSchema, auditBundleSchema, designArtifactSchema, designRequestSchema, implementationRequestSchema, planningArtifactSchema, planningRequestSchema, qaRequestSchema, securityRequestSchema, workflowDefinitionSchema } from "@h9-foundry/agentforge-schemas";
+import { agentforgeConfigSchema, auditBundleSchema, benchmarkArtifactSchema, designArtifactSchema, designRequestSchema, evalArtifactSchema, evalFixtureCorpusSchema, implementationRequestSchema, incidentRequestSchema, maintenanceRequestSchema, planningArtifactSchema, planningRequestSchema, qaRequestSchema, releaseRequestSchema, schemaFixtures, securityRequestSchema, workflowDefinitionSchema } from "@h9-foundry/agentforge-schemas";
 import { createBuiltinAdapters } from "./internal/builtin-adapters.js";
 import { createBuiltinAgentRegistry } from "./internal/builtin-agents.js";
 import { LocalPluginRegistry } from "./internal/local-plugin-registry.js";
@@ -231,6 +232,84 @@ nodes:
     kind: report
     outputs_to: reports.final
 `;
+const releaseWorkflowTemplate = `version: 1
+name: release-readiness
+description: Validate a bounded release-readiness request while keeping trusted publish automation separate.
+trigger: manual
+catalog:
+  domain: release
+  supportLevel: partial
+  maturity: mvp
+  trustScope: official-core-only
+nodes:
+  - id: intake
+    kind: deterministic
+    agent: release-intake
+    outputs_to: agentResults.intake
+  - id: evidence
+    kind: deterministic
+    agent: release-evidence-normalizer
+    outputs_to: agentResults.evidence
+  - id: release
+    kind: reasoning
+    agent: release-analyst
+    outputs_to: agentResults.release
+  - id: report
+    kind: report
+    outputs_to: reports.final
+`;
+const incidentWorkflowTemplate = `version: 1
+name: incident-handoff
+description: Validate staged incident evidence while keeping the default path local, read-only, and explicit.
+trigger: manual
+catalog:
+  domain: operate
+  supportLevel: partial
+  maturity: mvp
+  trustScope: official-core-only
+nodes:
+  - id: intake
+    kind: deterministic
+    agent: incident-intake
+    outputs_to: agentResults.intake
+  - id: evidence
+    kind: deterministic
+    agent: incident-evidence-normalizer
+    outputs_to: agentResults.evidence
+  - id: incident
+    kind: reasoning
+    agent: incident-analyst
+    outputs_to: agentResults.incident
+  - id: report
+    kind: report
+    outputs_to: reports.final
+`;
+const maintenanceWorkflowTemplate = `version: 1
+name: maintenance-triage
+description: Validate a bounded maintenance request while keeping the default path local, read-only, and routing-oriented.
+trigger: manual
+catalog:
+  domain: maintain
+  supportLevel: partial
+  maturity: mvp
+  trustScope: official-core-only
+nodes:
+  - id: intake
+    kind: deterministic
+    agent: maintenance-intake
+    outputs_to: agentResults.intake
+  - id: evidence
+    kind: deterministic
+    agent: maintenance-evidence-normalizer
+    outputs_to: agentResults.evidence
+  - id: maintenance
+    kind: reasoning
+    agent: maintenance-analyst
+    outputs_to: agentResults.maintenance
+  - id: report
+    kind: report
+    outputs_to: reports.final
+`;
 function loadYaml(filePath) {
     return yaml.load(readFileSync(filePath, "utf8"));
 }
@@ -240,6 +319,136 @@ function isRecord(value) {
 function asArray(value) {
     return Array.isArray(value) ? value : [];
 }
+function runGit(root, args) {
+    try {
+        return execFileSync("git", args, { cwd: root, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }).trim();
+    }
+    catch {
+        return "";
+    }
+}
+function parseGitHubRepositoryUrl(value) {
+    const trimmed = value.trim();
+    const sshMatch = trimmed.match(/^git@([^:]+):([^/]+)\/([^/]+?)(?:\.git)?$/i);
+    if (sshMatch) {
+        return {
+            host: sshMatch[1].toLowerCase(),
+            owner: sshMatch[2],
+            repo: sshMatch[3]
+        };
+    }
+    const httpsMatch = trimmed.match(/^https?:\/\/([^/]+)\/([^/]+)\/([^/]+?)(?:\.git)?(?:\/)?$/i);
+    if (!httpsMatch) {
+        return undefined;
+    }
+    return {
+        host: httpsMatch[1].toLowerCase(),
+        owner: httpsMatch[2],
+        repo: httpsMatch[3]
+    };
+}
+function inferGitHubRepoContext(root) {
+    const packageJsonPath = join(root, "package.json");
+    if (existsSync(packageJsonPath)) {
+        const parsed = JSON.parse(readFileSync(packageJsonPath, "utf8"));
+        if (isRecord(parsed)) {
+            const repository = parsed.repository;
+            if (typeof repository === "string") {
+                const context = parseGitHubRepositoryUrl(repository);
+                if (context) {
+                    return context;
+                }
+            }
+            if (isRecord(repository) && typeof repository.url === "string") {
+                const context = parseGitHubRepositoryUrl(repository.url);
+                if (context) {
+                    return context;
+                }
+            }
+        }
+    }
+    const remoteUrl = runGit(root, ["config", "--get", "remote.origin.url"]);
+    return remoteUrl ? parseGitHubRepositoryUrl(remoteUrl) : undefined;
+}
+function normalizeGitHubReference(rawValue, repoContext) {
+    const raw = rawValue.trim();
+    if (!raw) {
+        return undefined;
+    }
+    const fromParts = (context, kind, number) => ({
+        platform: "github",
+        host: context.host,
+        owner: context.owner,
+        repo: context.repo,
+        kind,
+        number,
+        canonical: kind === "issue"
+            ? `${context.owner}/${context.repo}#${number}`
+            : `${context.owner}/${context.repo}/pull/${number}`,
+        url: kind === "issue"
+            ? `https://${context.host}/${context.owner}/${context.repo}/issues/${number}`
+            : `https://${context.host}/${context.owner}/${context.repo}/pull/${number}`,
+        source: raw
+    });
+    const urlMatch = raw.match(/^https?:\/\/([^/]+)\/([^/]+)\/([^/]+)\/(issues|pull)\/(\d+)(?:\/)?$/i);
+    if (urlMatch) {
+        return fromParts({ host: urlMatch[1].toLowerCase(), owner: urlMatch[2], repo: urlMatch[3] }, urlMatch[4].toLowerCase() === "pull" ? "pull_request" : "issue", Number.parseInt(urlMatch[5], 10));
+    }
+    const repoIssueMatch = raw.match(/^([^/\s]+)\/([^#\s]+)#(\d+)$/);
+    if (repoIssueMatch) {
+        return fromParts({ host: repoContext?.host ?? "github.com", owner: repoIssueMatch[1], repo: repoIssueMatch[2] }, "issue", Number.parseInt(repoIssueMatch[3], 10));
+    }
+    const repoPullMatch = raw.match(/^([^/\s]+)\/([^/\s]+)\/pull\/(\d+)$/i);
+    if (repoPullMatch) {
+        return fromParts({ host: repoContext?.host ?? "github.com", owner: repoPullMatch[1], repo: repoPullMatch[2] }, "pull_request", Number.parseInt(repoPullMatch[3], 10));
+    }
+    const shortIssueMatch = raw.match(/^#(\d+)$/);
+    if (shortIssueMatch && repoContext) {
+        return fromParts(repoContext, "issue", Number.parseInt(shortIssueMatch[1], 10));
+    }
+    const shortPullMatch = raw.match(/^(?:PR|pr)\s*#(\d+)$/);
+    if (shortPullMatch && repoContext) {
+        return fromParts(repoContext, "pull_request", Number.parseInt(shortPullMatch[1], 10));
+    }
+    return undefined;
+}
+function normalizeGitHubReferences(rawValues, repoContext) {
+    const seen = new Set();
+    const normalized = [];
+    for (const rawValue of rawValues) {
+        const githubRef = normalizeGitHubReference(rawValue, repoContext);
+        if (!githubRef || seen.has(githubRef.canonical)) {
+            continue;
+        }
+        seen.add(githubRef.canonical);
+        normalized.push(githubRef);
+    }
+    return normalized;
+}
+export function mapWorkflowRunStatusToGitHubStatus(workflow, localRunStatus) {
+    if (localRunStatus === "success") {
+        return {
+            workflow,
+            localRunStatus,
+            githubStatus: "completed",
+            reason: "Successful local workflow runs map to completed GitHub handoff status."
+        };
+    }
+    if (localRunStatus === "partial") {
+        return {
+            workflow,
+            localRunStatus,
+            githubStatus: "blocked",
+            reason: "Partial local workflow runs map to blocked GitHub handoff status until follow-up work resolves them."
+        };
+    }
+    return {
+        workflow,
+        localRunStatus,
+        githubStatus: "failed",
+        reason: "Failed local workflow runs map to failed GitHub handoff status."
+    };
+}
 function ensureReadablePath(policyEngine, pathValue, purpose) {
     const decision = policyEngine.canReadPath(pathValue);
     if (!decision.allowed) {
@@ -263,9 +472,31 @@ function validatePlanningRequestCompleteness(request) {
     }
     return request;
 }
+function validateIncidentRequestCompleteness(request) {
+    const evidenceSignalCount = request.evidenceSources.length + request.releaseReportRefs.length;
+    if (evidenceSignalCount === 0) {
+        throw new Error("Incident request is underspecified. Add at least one of evidenceSources or releaseReportRefs.");
+    }
+    return request;
+}
+function validateMaintenanceRequestCompleteness(request) {
+    const supportingSignalCount = request.dependencyAlertRefs.length +
+        request.docsTaskRefs.length +
+        request.releaseReportRefs.length +
+        request.issueRefs.length;
+    if (supportingSignalCount === 0) {
+        throw new Error("Maintenance request is underspecified. Add at least one of dependencyAlertRefs, docsTaskRefs, releaseReportRefs, or issueRefs.");
+    }
+    return request;
+}
 function validateWorkflowLifecyclePosture(workflow, policyEngine) {
     const domain = workflow.catalog?.domain;
-    if (domain !== "plan" && domain !== "design" && domain !== "build" && domain !== "security") {
+    if (domain !== "plan" &&
+        domain !== "design" &&
+        domain !== "build" &&
+        domain !== "security" &&
+        domain !== "release" &&
+        domain !== "operate") {
         return;
     }
     if (policyEngine.snapshot.defaults.network !== "deny") {
@@ -299,6 +530,19 @@ function loadDesignBundleArtifact(root, designRecordRef) {
     }
     return designArtifactSchema.parse(designArtifact);
 }
+function ensureBundleContainsArtifactKind(root, bundleRef, artifactKind, purpose) {
+    const artifactKinds = loadLifecycleArtifactKinds(root, bundleRef);
+    if (!artifactKinds.includes(artifactKind)) {
+        throw new Error(`Referenced ${purpose} does not contain a ${artifactKind} artifact: ${bundleRef}`);
+    }
+}
+function validateReleaseRequestCompleteness(request) {
+    const evidenceSignalCount = request.qaReportRefs.length + request.securityReportRefs.length + request.evidenceSources.length;
+    if (evidenceSignalCount === 0) {
+        throw new Error("Release request is underspecified. Add at least one of qaReportRefs, securityReportRefs, or evidenceSources.");
+    }
+    return request;
+}
 function loadLifecycleArtifactKinds(root, bundleRef) {
     const bundlePath = join(root, bundleRef);
     if (!existsSync(bundlePath)) {
@@ -307,6 +551,31 @@ function loadLifecycleArtifactKinds(root, bundleRef) {
     const bundle = auditBundleSchema.parse(JSON.parse(readFileSync(bundlePath, "utf8")));
     return bundle.lifecycleArtifacts.map((artifact) => artifact.artifactKind);
 }
+function loadLifecycleArtifactSourceReferences(root, bundleRef) {
+    const bundlePath = join(root, bundleRef);
+    if (!existsSync(bundlePath)) {
+        throw new Error(`Referenced bundle not found: ${bundleRef}`);
+    }
+    const bundle = auditBundleSchema.parse(JSON.parse(readFileSync(bundlePath, "utf8")));
+    const repoContext = inferGitHubRepoContext(root);
+    const issueRefs = new Set();
+    const githubRefs = new Map();
+    for (const artifact of bundle.lifecycleArtifacts) {
+        for (const issueRef of artifact.source.issueRefs) {
+            issueRefs.add(issueRef);
+        }
+        for (const githubRef of artifact.source.githubRefs ?? []) {
+            githubRefs.set(githubRef.canonical, githubRef);
+        }
+        for (const githubRef of normalizeGitHubReferences(artifact.source.issueRefs, repoContext)) {
+            githubRefs.set(githubRef.canonical, githubRef);
+        }
+    }
+    return {
+        issueRefs: [...issueRefs],
+        githubRefs: [...githubRefs.values()]
+    };
+}
 function prepareWorkflowInputs(workflow, root, policyEngine) {
     const requestsDir = join(root, ".agentops", "requests");
     ensureDirectory(requestsDir);
@@ -314,8 +583,10 @@ function prepareWorkflowInputs(workflow, root, policyEngine) {
         const requestPath = ".agentops/requests/planning.yaml";
         ensureReadablePath(policyEngine, requestPath, "planning request");
         const planningRequest = validatePlanningRequestCompleteness(readYamlFile(join(root, requestPath), planningRequestSchema, "planning request"));
+        const planningGithubRefs = normalizeGitHubReferences(planningRequest.issueRefs, inferGitHubRepoContext(root));
         return {
             planningRequest,
+            planningGithubRefs,
             requestFile: requestPath
         };
     }
@@ -348,11 +619,19 @@ function prepareWorkflowInputs(workflow, root, policyEngine) {
         ensureReadablePath(policyEngine, requestPath, "QA request");
         const qaRequest = readYamlFile(join(root, requestPath), qaRequestSchema, "QA request");
         ensureReadablePath(policyEngine, qaRequest.targetRef, "QA target reference");
+        if (!existsSync(join(root, qaRequest.targetRef))) {
+            throw new Error(`QA target reference not found: ${qaRequest.targetRef}`);
+        }
         for (const evidenceSource of qaRequest.evidenceSources) {
             ensureReadablePath(policyEngine, evidenceSource, "QA evidence source");
         }
+        const referencedSourceRefs = qaRequest.targetRef.endsWith("bundle.json")
+            ? loadLifecycleArtifactSourceReferences(root, qaRequest.targetRef)
+            : { issueRefs: [], githubRefs: [] };
         return {
             qaRequest: qaRequest,
+            qaIssueRefs: referencedSourceRefs.issueRefs,
+            qaGithubRefs: referencedSourceRefs.githubRefs,
             requestFile: requestPath
         };
     }
@@ -374,9 +653,129 @@ function prepareWorkflowInputs(workflow, root, policyEngine) {
         if (securityRequest.targetRef.endsWith("bundle.json") && !referencedArtifactKinds.some((kind) => allowedSecurityTargets.has(kind))) {
             throw new Error(`Referenced security bundle does not contain a supported lifecycle artifact: ${securityRequest.targetRef}`);
         }
+        const referencedSourceRefs = securityRequest.targetRef.endsWith("bundle.json")
+            ? loadLifecycleArtifactSourceReferences(root, securityRequest.targetRef)
+            : { issueRefs: [], githubRefs: [] };
         return {
             securityRequest: securityRequest,
             securityTargetArtifactKinds: referencedArtifactKinds,
+            securityIssueRefs: referencedSourceRefs.issueRefs,
+            securityGithubRefs: referencedSourceRefs.githubRefs,
+            requestFile: requestPath
+        };
+    }
+    if (workflow.name === "release-readiness") {
+        const requestPath = ".agentops/requests/release.yaml";
+        ensureReadablePath(policyEngine, requestPath, "release request");
+        const releaseRequest = validateReleaseRequestCompleteness(readYamlFile(join(root, requestPath), releaseRequestSchema, "release request"));
+        const releaseIssueRefs = new Set();
+        const releaseGithubRefMap = new Map();
+        for (const qaReportRef of releaseRequest.qaReportRefs) {
+            ensureReadablePath(policyEngine, qaReportRef, "QA report reference");
+            ensureBundleContainsArtifactKind(root, qaReportRef, "qa-report", "QA report reference");
+            const refs = loadLifecycleArtifactSourceReferences(root, qaReportRef);
+            for (const issueRef of refs.issueRefs) {
+                releaseIssueRefs.add(issueRef);
+            }
+            for (const githubRef of refs.githubRefs) {
+                releaseGithubRefMap.set(githubRef.canonical, githubRef);
+            }
+        }
+        for (const securityReportRef of releaseRequest.securityReportRefs) {
+            ensureReadablePath(policyEngine, securityReportRef, "security report reference");
+            ensureBundleContainsArtifactKind(root, securityReportRef, "security-report", "security report reference");
+            const refs = loadLifecycleArtifactSourceReferences(root, securityReportRef);
+            for (const issueRef of refs.issueRefs) {
+                releaseIssueRefs.add(issueRef);
+            }
+            for (const githubRef of refs.githubRefs) {
+                releaseGithubRefMap.set(githubRef.canonical, githubRef);
+            }
+        }
+        for (const evidenceSource of releaseRequest.evidenceSources) {
+            ensureReadablePath(policyEngine, evidenceSource, "release evidence source");
+            if (!existsSync(join(root, evidenceSource))) {
+                throw new Error(`Release evidence source not found: ${evidenceSource}`);
+            }
+        }
+        return {
+            releaseRequest: releaseRequest,
+            releaseIssueRefs: [...releaseIssueRefs],
+            releaseGithubRefs: [...releaseGithubRefMap.values()],
+            requestFile: requestPath
+        };
+    }
+    if (workflow.name === "incident-handoff") {
+        const requestPath = ".agentops/requests/incident.yaml";
+        ensureReadablePath(policyEngine, requestPath, "incident request");
+        const incidentRequest = validateIncidentRequestCompleteness(readYamlFile(join(root, requestPath), incidentRequestSchema, "incident request"));
+        const repoContext = inferGitHubRepoContext(root);
+        const incidentIssueRefs = new Set(incidentRequest.issueRefs);
+        const incidentGithubRefMap = new Map();
+        for (const githubRef of normalizeGitHubReferences(incidentRequest.issueRefs, repoContext)) {
+            incidentGithubRefMap.set(githubRef.canonical, githubRef);
+        }
+        for (const releaseReportRef of incidentRequest.releaseReportRefs) {
+            ensureReadablePath(policyEngine, releaseReportRef, "release report reference");
+            ensureBundleContainsArtifactKind(root, releaseReportRef, "release-report", "release report reference");
+            const refs = loadLifecycleArtifactSourceReferences(root, releaseReportRef);
+            for (const issueRef of refs.issueRefs) {
+                incidentIssueRefs.add(issueRef);
+            }
+            for (const githubRef of refs.githubRefs) {
+                incidentGithubRefMap.set(githubRef.canonical, githubRef);
+            }
+        }
+        for (const evidenceSource of incidentRequest.evidenceSources) {
+            ensureReadablePath(policyEngine, evidenceSource, "incident evidence source");
+            if (!existsSync(join(root, evidenceSource))) {
+                throw new Error(`Incident evidence source not found: ${evidenceSource}`);
+            }
+        }
+        return {
+            incidentRequest: incidentRequest,
+            incidentIssueRefs: [...incidentIssueRefs],
+            incidentGithubRefs: [...incidentGithubRefMap.values()],
+            requestFile: requestPath
+        };
+    }
+    if (workflow.name === "maintenance-triage") {
+        const requestPath = ".agentops/requests/maintenance.yaml";
+        ensureReadablePath(policyEngine, requestPath, "maintenance request");
+        const maintenanceRequest = validateMaintenanceRequestCompleteness(readYamlFile(join(root, requestPath), maintenanceRequestSchema, "maintenance request"));
+        const repoContext = inferGitHubRepoContext(root);
+        const maintenanceIssueRefs = new Set(maintenanceRequest.issueRefs);
+        const maintenanceGithubRefMap = new Map();
+        for (const githubRef of normalizeGitHubReferences(maintenanceRequest.issueRefs, repoContext)) {
+            maintenanceGithubRefMap.set(githubRef.canonical, githubRef);
+        }
+        for (const releaseReportRef of maintenanceRequest.releaseReportRefs) {
+            ensureReadablePath(policyEngine, releaseReportRef, "release report reference");
+            ensureBundleContainsArtifactKind(root, releaseReportRef, "release-report", "release report reference");
+            const refs = loadLifecycleArtifactSourceReferences(root, releaseReportRef);
+            for (const issueRef of refs.issueRefs) {
+                maintenanceIssueRefs.add(issueRef);
+            }
+            for (const githubRef of refs.githubRefs) {
+                maintenanceGithubRefMap.set(githubRef.canonical, githubRef);
+            }
+        }
+        for (const dependencyAlertRef of maintenanceRequest.dependencyAlertRefs) {
+            ensureReadablePath(policyEngine, dependencyAlertRef, "dependency alert reference");
+            if (!existsSync(join(root, dependencyAlertRef))) {
+                throw new Error(`Dependency alert reference not found: ${dependencyAlertRef}`);
+            }
+        }
+        for (const docsTaskRef of maintenanceRequest.docsTaskRefs) {
+            ensureReadablePath(policyEngine, docsTaskRef, "docs task reference");
+            if (!existsSync(join(root, docsTaskRef))) {
+                throw new Error(`Docs task reference not found: ${docsTaskRef}`);
+            }
+        }
+        return {
+            maintenanceRequest: maintenanceRequest,
+            maintenanceIssueRefs: [...maintenanceIssueRefs],
+            maintenanceGithubRefs: [...maintenanceGithubRefMap.values()],
             requestFile: requestPath
         };
     }
@@ -453,6 +852,26 @@ function readLatestCompleteRunBundle(runsRoot) {
     if (!existsSync(runsRoot)) {
         return undefined;
     }
+    const parseRunTimestampMs = (value) => {
+        if (typeof value !== "string" || value.length === 0) {
+            return undefined;
+        }
+        const compactDateTimeMatch = value.match(/^(\d{4})-(\d{2})-(\d{2})-(\d{2})(\d{2})(\d{2})$/);
+        if (compactDateTimeMatch) {
+            const [, year, month, day, hour, minute, second] = compactDateTimeMatch;
+            const isoCandidate = `${year}-${month}-${day}T${hour}:${minute}:${second}Z`;
+            const parsedCompactDateTime = Date.parse(isoCandidate);
+            if (!Number.isNaN(parsedCompactDateTime)) {
+                return parsedCompactDateTime;
+            }
+        }
+        const parsedDate = Date.parse(value);
+        if (!Number.isNaN(parsedDate)) {
+            return parsedDate;
+        }
+        const timestampPrefix = Number.parseInt(value.split("-")[0] ?? "", 10);
+        return Number.isNaN(timestampPrefix) ? undefined : timestampPrefix;
+    };
     const candidates = readdirSync(runsRoot)
         .map((entry) => {
         const bundlePath = join(runsRoot, entry, "bundle.json");
@@ -462,15 +881,24 @@ function readLatestCompleteRunBundle(runsRoot) {
         const stats = statSync(bundlePath);
         const bundle = JSON.parse(readFileSync(bundlePath, "utf8"));
         const bundleRunId = typeof bundle.runId === "string" ? bundle.runId : entry;
+        const parsedCompletedAtMs = parseRunTimestampMs(bundle.finishedAt) ??
+            parseRunTimestampMs(bundle.startedAt) ??
+            parseRunTimestampMs(bundleRunId) ??
+            parseRunTimestampMs(entry);
+        const completedAtMs = parsedCompletedAtMs ?? stats.mtimeMs;
         return {
             runDir: entry,
             bundle,
             bundleRunId,
-            completedAtMs: stats.mtimeMs
+            completedAtMs,
+            hasExplicitTimestamp: typeof parsedCompletedAtMs === "number"
         };
     })
         .filter((candidate) => Boolean(candidate))
         .sort((left, right) => {
+        if (left.hasExplicitTimestamp !== right.hasExplicitTimestamp) {
+            return left.hasExplicitTimestamp ? -1 : 1;
+        }
         if (left.completedAtMs !== right.completedAtMs) {
             return right.completedAtMs - left.completedAtMs;
         }
@@ -478,6 +906,29 @@ function readLatestCompleteRunBundle(runsRoot) {
     });
     return candidates[0] ? { runDir: candidates[0].runDir, bundle: candidates[0].bundle } : undefined;
 }
+function readRunBundleByRef(root, runRef) {
+    const config = loadAgentForgeConfig(root);
+    const runsRoot = join(root, config.runtime.runsPath);
+    const bundlePath = runRef.endsWith(".json") || runRef.includes("/")
+        ? (runRef.startsWith("/") ? runRef : join(root, runRef))
+        : join(runsRoot, runRef, "bundle.json");
+    if (!existsSync(bundlePath)) {
+        throw new Error(`Run bundle not found: ${runRef}`);
+    }
+    const bundle = auditBundleSchema.parse(JSON.parse(readFileSync(bundlePath, "utf8")));
+    return {
+        runId: typeof bundle.runId === "string" ? bundle.runId : runRef,
+        bundlePath,
+        bundle
+    };
+}
+function extractEvalArtifact(bundle, runRef) {
+    const artifact = bundle.lifecycleArtifacts.find((candidate) => candidate.artifactKind === "eval-result");
+    if (!artifact) {
+        throw new Error(`Run ${runRef} does not contain an eval-result artifact.`);
+    }
+    return evalArtifactSchema.parse(artifact);
+}
 function loadAgentForgeConfig(root) {
     const configPath = join(root, ".agentops", "agentops.yaml");
     if (!existsSync(configPath)) {
@@ -506,6 +957,425 @@ function loadAgentForgeConfig(root) {
 function ensureDirectory(pathValue) {
     mkdirSync(pathValue, { recursive: true });
 }
+function writeYamlFile(filePath, value) {
+    writeFileSync(filePath, yaml.dump(value), "utf8");
+}
+function loadEvalFixtureCorpus() {
+    return evalFixtureCorpusSchema.parse(schemaFixtures.evalFixtureCorpus);
+}
+function getEvalSpec(specId) {
+    const corpus = loadEvalFixtureCorpus();
+    const spec = corpus.specs.find((candidate) => candidate.id === specId);
+    if (!spec) {
+        throw new Error(`Unknown eval spec: ${specId}`);
+    }
+    return spec;
+}
+function toBundleRef(run) {
+    return `.agentops/runs/${run.runId}/bundle.json`;
+}
+function toSummaryRef(run) {
+    return `.agentops/runs/${run.runId}/summary.md`;
+}
+function toSetupRun(workflow, run) {
+    return {
+        workflow,
+        runId: run.runId,
+        bundlePath: toBundleRef(run)
+    };
+}
+function createBlankEvalWorkspace(root, evalRunId, specId) {
+    const workspaceRoot = join(root, ".agentops", "evals", specId, evalRunId, "workspace");
+    ensureDirectory(workspaceRoot);
+    const evidenceRoot = join(workspaceRoot, ".agentops", "evidence");
+    ensureDirectory(evidenceRoot);
+    execFileSync("git", ["init"], { cwd: workspaceRoot, stdio: "ignore" });
+    execFileSync("git", ["config", "user.email", "eval@example.com"], { cwd: workspaceRoot, stdio: "ignore" });
+    execFileSync("git", ["config", "user.name", "AgentForge Eval"], { cwd: workspaceRoot, stdio: "ignore" });
+    writeFileSync(join(workspaceRoot, "package.json"), JSON.stringify({
+        name: "fixture",
+        repository: {
+            type: "git",
+            url: "https://github.com/H9-Foundry/fixture.git"
+        },
+        scripts: {
+            test: "echo test",
+            lint: "echo lint",
+            typecheck: "echo typecheck",
+            build: "echo build"
+        }
+    }, null, 2), "utf8");
+    writeFileSync(join(workspaceRoot, "pnpm-lock.yaml"), "lockfileVersion: '9.0'\n", "utf8");
+    writeFileSync(join(workspaceRoot, "src.ts"), "export const value = 1;\n", "utf8");
+    writeFileSync(join(evidenceRoot, "dependency-alerts.json"), JSON.stringify({
+        alerts: [
+            {
+                package: "example-dependency",
+                severity: "moderate",
+                summary: "Upgrade pending review for deterministic eval coverage."
+            }
+        ]
+    }, null, 2), "utf8");
+    writeFileSync(join(evidenceRoot, "docs-task.md"), "# Docs follow-up\n\n- Align workflow documentation after maintenance triage.\n", "utf8");
+    execFileSync("git", ["add", "."], { cwd: workspaceRoot, stdio: "ignore" });
+    execFileSync("git", ["-c", "commit.gpgsign=false", "commit", "-m", "init"], { cwd: workspaceRoot, stdio: "ignore" });
+    writeFileSync(join(workspaceRoot, "src.ts"), "export const value = 2;\n", "utf8");
+    initProject(workspaceRoot);
+    return workspaceRoot;
+}
+function evalRedactionCategories() {
+    return ["github-token", "api-key", "aws-key", "bearer-token", "password", "private-key"];
+}
+function createEvalBundle(root, spec, evaluatedRun, workspacePath, setupRuns, deterministicChecks, modelDependentChecks) {
+    const config = loadAgentForgeConfig(root);
+    const policy = resolvePolicy(loadPolicyDocument(join(root, ".agentops", "policy.yaml")), process.env.CI ? "ci" : "local");
+    const state = createWorkflowState({
+        cwd: root,
+        workflow: `eval:${spec.id}`,
+        mode: "inspect",
+        policy
+    });
+    const runsRoot = join(root, config.runtime.runsPath);
+    const outputDir = join(runsRoot, state.runId);
+    ensureDirectory(outputDir);
+    const jsonPath = join(outputDir, "bundle.json");
+    const markdownPath = join(outputDir, "summary.md");
+    const failureCount = deterministicChecks.filter((check) => check.status === "failed").length;
+    const passed = failureCount === 0;
+    const startedAt = new Date().toISOString();
+    const evalArtifact = evalArtifactSchema.parse({
+        schemaVersion: state.version,
+        artifactKind: "eval-result",
+        lifecycleDomain: "evaluate",
+        workflow: {
+            name: state.workflow,
+            displayName: "Eval Runner"
+        },
+        source: {
+            sourceType: "workflow-run",
+            runId: state.runId,
+            inputRefs: [
+                ...(evaluatedRun?.jsonPath ? [evaluatedRun.jsonPath] : []),
+                ...setupRuns.map((setup) => setup.bundlePath)
+            ],
+            issueRefs: ["#165"],
+            githubRefs: []
+        },
+        status: passed ? "complete" : "draft",
+        generatedAt: startedAt,
+        repo: {
+            root: state.repo.root,
+            name: state.repo.name,
+            branch: state.repo.branch
+        },
+        provenance: {
+            generatedBy: "agentforge-runtime",
+            schemaVersion: state.version,
+            executionEnvironment: state.context.ciExecution ? "ci" : "local",
+            repoRoot: state.repo.root
+        },
+        redaction: {
+            applied: true,
+            strategyVersion: "1.0.0",
+            categories: evalRedactionCategories()
+        },
+        auditLink: {
+            bundlePath: jsonPath,
+            entryIds: [`${state.runId}-eval-runner`],
+            findingIds: [],
+            proposedActionIds: []
+        },
+        summary: passed
+            ? `Eval result for ${spec.id} passed ${deterministicChecks.length} deterministic check(s).`
+            : `Eval result for ${spec.id} failed ${failureCount} deterministic check(s).`,
+        payload: {
+            specId: spec.id,
+            specName: spec.name,
+            workflow: spec.workflow,
+            repoFixture: spec.repoFixture,
+            workspacePath,
+            evaluatedRunId: evaluatedRun?.runId,
+            evaluatedBundlePath: evaluatedRun ? toBundleRef(evaluatedRun) : undefined,
+            setupRuns,
+            deterministicChecks,
+            modelDependentChecks,
+            passed,
+            failureCount,
+            warningCount: 0
+        }
+    });
+    state.lifecycleArtifacts = [evalArtifact];
+    state.auditTrail = [
+        createAuditEntry({
+            id: `${state.runId}-eval-runner`,
+            nodeId: "eval-runner",
+            nodeName: "eval-runner",
+            kind: "deterministic",
+            startedAt,
+            completedAt: new Date().toISOString(),
+            status: passed ? "success" : "failed",
+            summary: evalArtifact.summary,
+            toolsRequested: [],
+            toolsExecuted: [],
+            blockedActions: [],
+            validationPassed: passed
+        }),
+        createAuditEntry({
+            id: `${state.runId}-report`,
+            nodeId: "report",
+            nodeName: "final-report",
+            kind: "report",
+            startedAt,
+            completedAt: new Date().toISOString(),
+            status: "success",
+            summary: "Generated eval result artifacts.",
+            toolsRequested: [],
+            toolsExecuted: [],
+            blockedActions: [],
+            validationPassed: true
+        })
+    ];
+    const bundle = buildAuditBundle(state, {
+        startedAt,
+        finishedAt: new Date().toISOString(),
+        status: passed ? "success" : "partial",
+        jsonPath,
+        markdownPath,
+        provenance: {
+            generatedBy: "agentforge-runtime",
+            schemaVersion: state.version,
+            executionEnvironment: state.context.ciExecution ? "ci" : "local",
+            repoRoot: state.repo.root
+        },
+        redaction: {
+            applied: true,
+            strategyVersion: "1.0.0",
+            categories: evalRedactionCategories()
+        },
+        components: []
+    });
+    writeFileSync(jsonPath, JSON.stringify(bundle, null, 2), "utf8");
+    writeFileSync(markdownPath, renderAuditBundleMarkdown(bundle), "utf8");
+    return { bundle, jsonPath, markdownPath, outputDir };
+}
+function compareDeterministicChecks(baselineChecks, candidateChecks) {
+    const regressions = [];
+    const improvements = [];
+    const nonComparableFindings = [];
+    let unchangedCount = 0;
+    const baselineByName = new Map(baselineChecks.map((check) => [check.name, check]));
+    const candidateByName = new Map(candidateChecks.map((check) => [check.name, check]));
+    const checkNames = [...new Set([...baselineByName.keys(), ...candidateByName.keys()])].sort();
+    for (const name of checkNames) {
+        const baselineCheck = baselineByName.get(name);
+        const candidateCheck = candidateByName.get(name);
+        if (!baselineCheck || !candidateCheck) {
+            nonComparableFindings.push(`Deterministic check \`${name}\` is missing from one of the eval results.`);
+            continue;
+        }
+        if (baselineCheck.status === candidateCheck.status) {
+            unchangedCount += 1;
+            continue;
+        }
+        if (baselineCheck.status === "not_applicable" || candidateCheck.status === "not_applicable") {
+            nonComparableFindings.push(`Deterministic check \`${name}\` changed between comparable and not_applicable states (${baselineCheck.status} -> ${candidateCheck.status}).`);
+            continue;
+        }
+        if (baselineCheck.status === "passed" && candidateCheck.status === "failed") {
+            regressions.push({
+                name,
+                classification: "regression",
+                baselineStatus: baselineCheck.status,
+                candidateStatus: candidateCheck.status,
+                details: candidateCheck.details ?? baselineCheck.details
+            });
+            continue;
+        }
+        if (baselineCheck.status === "failed" && candidateCheck.status === "passed") {
+            improvements.push({
+                name,
+                classification: "improvement",
+                baselineStatus: baselineCheck.status,
+                candidateStatus: candidateCheck.status,
+                details: candidateCheck.details ?? baselineCheck.details
+            });
+            continue;
+        }
+        nonComparableFindings.push(`Deterministic check \`${name}\` changed in an unsupported way (${baselineCheck.status} -> ${candidateCheck.status}).`);
+    }
+    return { regressions, improvements, unchangedCount, nonComparableFindings };
+}
+function compareEvalArtifacts(baselineRunId, baselineBundlePath, baselineArtifact, candidateRunId, candidateBundlePath, candidateArtifact) {
+    if (baselineArtifact.payload.specId !== candidateArtifact.payload.specId) {
+        return {
+            runId: candidateRunId,
+            bundlePath: candidateBundlePath,
+            specId: candidateArtifact.payload.specId,
+            workflow: candidateArtifact.payload.workflow,
+            comparable: false,
+            passed: candidateArtifact.payload.passed,
+            failureCount: candidateArtifact.payload.failureCount,
+            deterministicCheckCount: candidateArtifact.payload.deterministicChecks.length,
+            regressions: [],
+            improvements: [],
+            unchangedCount: 0,
+            nonComparableFindings: [
+                `Spec mismatch: baseline ${baselineArtifact.payload.specId} vs candidate ${candidateArtifact.payload.specId}.`
+            ]
+        };
+    }
+    if (baselineArtifact.payload.workflow !== candidateArtifact.payload.workflow) {
+        return {
+            runId: candidateRunId,
+            bundlePath: candidateBundlePath,
+            specId: candidateArtifact.payload.specId,
+            workflow: candidateArtifact.payload.workflow,
+            comparable: false,
+            passed: candidateArtifact.payload.passed,
+            failureCount: candidateArtifact.payload.failureCount,
+            deterministicCheckCount: candidateArtifact.payload.deterministicChecks.length,
+            regressions: [],
+            improvements: [],
+            unchangedCount: 0,
+            nonComparableFindings: [
+                `Workflow mismatch: baseline ${baselineArtifact.payload.workflow} vs candidate ${candidateArtifact.payload.workflow}.`
+            ]
+        };
+    }
+    const comparison = compareDeterministicChecks(baselineArtifact.payload.deterministicChecks, candidateArtifact.payload.deterministicChecks);
+    return {
+        runId: candidateRunId,
+        bundlePath: candidateBundlePath,
+        specId: candidateArtifact.payload.specId,
+        workflow: candidateArtifact.payload.workflow,
+        comparable: comparison.nonComparableFindings.length === 0,
+        passed: candidateArtifact.payload.passed,
+        failureCount: candidateArtifact.payload.failureCount,
+        deterministicCheckCount: candidateArtifact.payload.deterministicChecks.length,
+        regressions: comparison.regressions,
+        improvements: comparison.improvements,
+        unchangedCount: comparison.unchangedCount,
+        nonComparableFindings: comparison.nonComparableFindings
+    };
+}
+function createBenchmarkBundle(root, baselineRunId, baselineBundlePath, baselineArtifact, comparedRuns) {
+    const config = loadAgentForgeConfig(root);
+    const policy = resolvePolicy(loadPolicyDocument(join(root, ".agentops", "policy.yaml")), process.env.CI ? "ci" : "local");
+    const state = createWorkflowState({
+        cwd: root,
+        workflow: "eval:compare",
+        mode: "inspect",
+        policy
+    });
+    const runsRoot = join(root, config.runtime.runsPath);
+    const outputDir = join(runsRoot, state.runId);
+    ensureDirectory(outputDir);
+    const jsonPath = join(outputDir, "bundle.json");
+    const markdownPath = join(outputDir, "summary.md");
+    const regressionCount = comparedRuns.reduce((total, candidate) => total + candidate.regressions.length, 0);
+    const improvementCount = comparedRuns.reduce((total, candidate) => total + candidate.improvements.length, 0);
+    const unchangedCount = comparedRuns.reduce((total, candidate) => total + candidate.unchangedCount, 0);
+    const nonComparableCount = comparedRuns.reduce((total, candidate) => total + candidate.nonComparableFindings.length, 0);
+    const summaryConclusion = regressionCount > 0
+        ? `Detected ${regressionCount} deterministic regression(s) across compared eval results.`
+        : improvementCount > 0
+            ? `Detected ${improvementCount} deterministic improvement(s) with no regressions.`
+            : nonComparableCount > 0
+                ? `Compared eval results contain ${nonComparableCount} non-comparable difference(s) and no deterministic regressions.`
+                : "No deterministic regressions detected across compared eval results.";
+    const benchmarkArtifact = benchmarkArtifactSchema.parse({
+        schemaVersion: state.version,
+        artifactKind: "benchmark-summary",
+        lifecycleDomain: "evaluate",
+        workflow: {
+            name: state.workflow,
+            displayName: "Eval Benchmark Compare"
+        },
+        source: {
+            sourceType: "workflow-run",
+            runId: state.runId,
+            inputRefs: [baselineBundlePath, ...comparedRuns.map((candidate) => candidate.bundlePath)],
+            issueRefs: ["#166"],
+            githubRefs: []
+        },
+        status: "complete",
+        generatedAt: new Date().toISOString(),
+        repo: {
+            root: state.repo.root,
+            name: state.repo.name,
+            branch: state.repo.branch
+        },
+        provenance: {
+            generatedBy: "agentforge-runtime",
+            schemaVersion: state.version,
+            executionEnvironment: state.context.ciExecution ? "ci" : "local",
+            repoRoot: state.repo.root
+        },
+        redaction: {
+            applied: true,
+            strategyVersion: "1.0.0",
+            categories: evalRedactionCategories()
+        },
+        auditLink: {
+            bundlePath: jsonPath,
+            entryIds: [`${state.runId}-benchmark-compare`],
+            findingIds: [],
+            proposedActionIds: []
+        },
+        summary: summaryConclusion,
+        payload: {
+            baselineRunId,
+            baselineBundlePath,
+            baselineSpecId: baselineArtifact.payload.specId,
+            baselineWorkflow: baselineArtifact.payload.workflow,
+            comparedRuns,
+            regressionCount,
+            improvementCount,
+            unchangedCount,
+            nonComparableCount,
+            summaryConclusion
+        }
+    });
+    state.lifecycleArtifacts = [benchmarkArtifact];
+    state.auditTrail = [
+        createAuditEntry({
+            id: `${state.runId}-benchmark-compare`,
+            nodeId: "benchmark-compare",
+            nodeName: "benchmark-compare",
+            kind: "deterministic",
+            startedAt: new Date().toISOString(),
+            completedAt: new Date().toISOString(),
+            status: regressionCount > 0 ? "failed" : "success",
+            summary: benchmarkArtifact.summary,
+            toolsRequested: [],
+            toolsExecuted: [],
+            blockedActions: [],
+            validationPassed: regressionCount === 0
+        })
+    ];
+    const bundle = buildAuditBundle(state, {
+        startedAt: new Date().toISOString(),
+        finishedAt: new Date().toISOString(),
+        status: regressionCount > 0 || nonComparableCount > 0 ? "partial" : "success",
+        jsonPath,
+        markdownPath,
+        provenance: {
+            generatedBy: "agentforge-runtime",
+            schemaVersion: state.version,
+            executionEnvironment: state.context.ciExecution ? "ci" : "local",
+            repoRoot: state.repo.root
+        },
+        redaction: {
+            applied: true,
+            strategyVersion: "1.0.0",
+            categories: evalRedactionCategories()
+        },
+        components: []
+    });
+    writeFileSync(jsonPath, JSON.stringify(bundle, null, 2), "utf8");
+    writeFileSync(markdownPath, renderAuditBundleMarkdown(bundle), "utf8");
+    return { bundle, jsonPath, markdownPath, outputDir };
+}
 function ensureInitFiles(root) {
     const created = [];
     const configDir = join(root, ".agentops");
@@ -545,6 +1415,18 @@ function ensureInitFiles(root) {
         {
             path: join(workflowsDir, "security-review.yaml"),
             contents: securityWorkflowTemplate
+        },
+        {
+            path: join(workflowsDir, "release-readiness.yaml"),
+            contents: releaseWorkflowTemplate
+        },
+        {
+            path: join(workflowsDir, "incident-handoff.yaml"),
+            contents: incidentWorkflowTemplate
+        },
+        {
+            path: join(workflowsDir, "maintenance-triage.yaml"),
+            contents: maintenanceWorkflowTemplate
         }
     ];
     for (const file of files) {
@@ -694,6 +1576,261 @@ export async function runLocalWorkflow(workflowName, cwd = process.cwd()) {
         artifactKinds: bundle.lifecycleArtifacts.map((artifact) => artifact.artifactKind)
     };
 }
+function checkResult(status, name, expected, actual, details) {
+    return {
+        name,
+        status,
+        expected,
+        actual,
+        ...(details ? { details } : {})
+    };
+}
+function compareEvalSpec(spec, bundle, executionError) {
+    const checks = [];
+    if (!bundle) {
+        checks.push(checkResult("failed", "workflow-execution", "successful workflow execution", executionError ?? "unknown failure", "The eval runner could not produce an evaluated workflow bundle."));
+        return {
+            deterministicChecks: checks,
+            modelDependentChecks: [
+                {
+                    name: "rubric-scoring",
+                    status: "not_executed",
+                    details: "Provider-dependent scoring is out of scope for the first local eval runner slice."
+                }
+            ]
+        };
+    }
+    checks.push(checkResult(bundle.status === spec.expectedStatus ? "passed" : "failed", "run-status", spec.expectedStatus, bundle.status, "The evaluated workflow status should match the deterministic eval spec."));
+    checks.push(checkResult(bundle.redaction.applied === spec.redactionExpectations.applied ? "passed" : "failed", "redaction-applied", String(spec.redactionExpectations.applied), String(bundle.redaction.applied)));
+    for (const category of spec.redactionExpectations.expectedCategories) {
+        checks.push(checkResult(bundle.redaction.categories.includes(category) ? "passed" : "failed", `redaction-category:${category}`, category, bundle.redaction.categories.join(", ")));
+    }
+    checks.push(checkResult(bundle.policy.defaults.executionMode === spec.policyExpectations.executionMode ? "passed" : "failed", "policy-execution-mode", spec.policyExpectations.executionMode, bundle.policy.defaults.executionMode));
+    if (spec.policyExpectations.readOnly) {
+        checks.push(checkResult(bundle.policy.defaults.writes !== "allow" ? "passed" : "failed", "policy-read-only", "writes not equal allow", bundle.policy.defaults.writes));
+    }
+    for (const sideEffectClass of spec.policyExpectations.sideEffectClasses) {
+        checks.push(checkResult("not_applicable", `side-effect-class:${sideEffectClass}`, sideEffectClass, undefined, "The first eval runner records policy posture and workflow outputs but does not inspect adapter-level side-effect execution traces."));
+    }
+    for (const expectedArtifact of spec.artifactExpectations) {
+        const actualArtifact = bundle.lifecycleArtifacts.find((artifact) => artifact.artifactKind === expectedArtifact.artifactKind);
+        checks.push(checkResult(actualArtifact ? "passed" : "failed", `artifact-kind:${expectedArtifact.artifactKind}`, expectedArtifact.artifactKind, actualArtifact?.artifactKind));
+        if (!actualArtifact || typeof actualArtifact.payload !== "object" || actualArtifact.payload === null) {
+            continue;
+        }
+        const payload = actualArtifact.payload;
+        for (const field of expectedArtifact.requiredPayloadFields) {
+            checks.push(checkResult(field in payload ? "passed" : "failed", `payload-field:${expectedArtifact.artifactKind}:${field}`, field, Object.keys(payload).join(", ")));
+        }
+        for (const term of expectedArtifact.requiredSummaryTerms) {
+            const summary = actualArtifact.summary.toLowerCase();
+            checks.push(checkResult(summary.includes(term.toLowerCase()) ? "passed" : "failed", `summary-term:${expectedArtifact.artifactKind}:${term}`, term, actualArtifact.summary));
+        }
+    }
+    if (spec.artifactExpectations.length === 0) {
+        checks.push(checkResult(bundle.lifecycleArtifacts.length === 0 ? "passed" : "failed", "artifact-count", "0", String(bundle.lifecycleArtifacts.length)));
+    }
+    return {
+        deterministicChecks: checks,
+        modelDependentChecks: [
+            {
+                name: "rubric-scoring",
+                status: "not_executed",
+                details: "Provider-dependent scoring is out of scope for the first local eval runner slice."
+            }
+        ]
+    };
+}
+async function executeEvalWorkflow(spec, workspaceRoot) {
+    const setupRuns = [];
+    const requestsRoot = join(workspaceRoot, ".agentops", "requests");
+    ensureDirectory(requestsRoot);
+    const runPlanning = async () => {
+        writeYamlFile(join(requestsRoot, "planning.yaml"), schemaFixtures.planningRequest);
+        return runLocalWorkflow("planning-discovery", workspaceRoot);
+    };
+    const runDesign = async () => {
+        const planningRun = await runPlanning();
+        setupRuns.push(toSetupRun("planning-discovery", planningRun));
+        writeYamlFile(join(requestsRoot, "design.yaml"), {
+            ...schemaFixtures.designRequest,
+            planningBriefRef: toBundleRef(planningRun)
+        });
+        return runLocalWorkflow("architecture-design-review", workspaceRoot);
+    };
+    const runImplementation = async () => {
+        const designRun = await runDesign();
+        setupRuns.push(toSetupRun("architecture-design-review", designRun));
+        writeYamlFile(join(requestsRoot, "implementation.yaml"), {
+            ...schemaFixtures.implementationRequest,
+            designRecordRef: toBundleRef(designRun)
+        });
+        return runLocalWorkflow("implementation-proposal", workspaceRoot);
+    };
+    const runQa = async () => {
+        const implementationRun = await runImplementation();
+        setupRuns.push(toSetupRun("implementation-proposal", implementationRun));
+        writeYamlFile(join(requestsRoot, "qa.yaml"), {
+            ...schemaFixtures.qaRequest,
+            targetRef: toBundleRef(implementationRun),
+            evidenceSources: [toSummaryRef(implementationRun)]
+        });
+        return runLocalWorkflow("qa-review", workspaceRoot);
+    };
+    const runSecurity = async () => {
+        const qaRun = await runQa();
+        setupRuns.push(toSetupRun("qa-review", qaRun));
+        writeYamlFile(join(requestsRoot, "security.yaml"), {
+            ...schemaFixtures.securityRequest,
+            targetRef: toBundleRef(qaRun),
+            evidenceSources: [toSummaryRef(qaRun)]
+        });
+        return runLocalWorkflow("security-review", workspaceRoot);
+    };
+    const runRelease = async () => {
+        const securityRun = await runSecurity();
+        setupRuns.push(toSetupRun("security-review", securityRun));
+        const qaRun = setupRuns.find((run) => run.workflow === "qa-review");
+        if (!qaRun) {
+            throw new Error("QA setup run was not recorded before release eval execution.");
+        }
+        writeYamlFile(join(requestsRoot, "release.yaml"), {
+            ...schemaFixtures.releaseRequest,
+            qaReportRefs: [qaRun.bundlePath],
+            securityReportRefs: [toBundleRef(securityRun)],
+            evidenceSources: [toSummaryRef(securityRun)]
+        });
+        return runLocalWorkflow("release-readiness", workspaceRoot);
+    };
+    switch (spec.workflow) {
+        case "pr-review":
+            return { evaluatedRun: await runLocalWorkflow("pr-review", workspaceRoot), setupRuns };
+        case "planning-discovery":
+            writeYamlFile(join(requestsRoot, "planning.yaml"), spec.request);
+            return { evaluatedRun: await runLocalWorkflow("planning-discovery", workspaceRoot), setupRuns };
+        case "architecture-design-review": {
+            const planningRun = await runPlanning();
+            setupRuns.push(toSetupRun("planning-discovery", planningRun));
+            writeYamlFile(join(requestsRoot, "design.yaml"), {
+                ...spec.request,
+                planningBriefRef: toBundleRef(planningRun)
+            });
+            return { evaluatedRun: await runLocalWorkflow("architecture-design-review", workspaceRoot), setupRuns };
+        }
+        case "implementation-proposal": {
+            const designRun = await runDesign();
+            setupRuns.push(toSetupRun("architecture-design-review", designRun));
+            writeYamlFile(join(requestsRoot, "implementation.yaml"), {
+                ...spec.request,
+                designRecordRef: toBundleRef(designRun)
+            });
+            return { evaluatedRun: await runLocalWorkflow("implementation-proposal", workspaceRoot), setupRuns };
+        }
+        case "qa-review": {
+            const implementationRun = await runImplementation();
+            setupRuns.push(toSetupRun("implementation-proposal", implementationRun));
+            writeYamlFile(join(requestsRoot, "qa.yaml"), {
+                ...spec.request,
+                targetRef: toBundleRef(implementationRun),
+                evidenceSources: [toSummaryRef(implementationRun)]
+            });
+            return { evaluatedRun: await runLocalWorkflow("qa-review", workspaceRoot), setupRuns };
+        }
+        case "security-review": {
+            const qaRun = await runQa();
+            setupRuns.push(toSetupRun("qa-review", qaRun));
+            writeYamlFile(join(requestsRoot, "security.yaml"), {
+                ...spec.request,
+                targetRef: toBundleRef(qaRun),
+                evidenceSources: [toSummaryRef(qaRun)]
+            });
+            return { evaluatedRun: await runLocalWorkflow("security-review", workspaceRoot), setupRuns };
+        }
+        case "maintenance-triage": {
+            const releaseRun = await runRelease();
+            setupRuns.push(toSetupRun("release-readiness", releaseRun));
+            writeYamlFile(join(requestsRoot, "maintenance.yaml"), {
+                ...spec.request,
+                releaseReportRefs: [toBundleRef(releaseRun)]
+            });
+            return { evaluatedRun: await runLocalWorkflow("maintenance-triage", workspaceRoot), setupRuns };
+        }
+    }
+}
+export async function runLocalEval(specId, cwd = process.cwd()) {
+    const root = findWorkspaceRoot(cwd);
+    ensureInitFiles(root);
+    const spec = getEvalSpec(specId);
+    const controlPolicy = resolvePolicy(loadPolicyDocument(join(root, ".agentops", "policy.yaml")), process.env.CI ? "ci" : "local");
+    const controlState = createWorkflowState({
+        cwd: root,
+        workflow: `eval:${spec.id}`,
+        mode: controlPolicy.defaults.executionMode,
+        policy: controlPolicy
+    });
+    const workspaceRoot = spec.repoFixture === "agentforge-monorepo" ? root : createBlankEvalWorkspace(root, controlState.runId, spec.id);
+    let evaluatedRun;
+    let setupRuns = [];
+    let executionError;
+    try {
+        const result = await executeEvalWorkflow(spec, workspaceRoot);
+        evaluatedRun = result.evaluatedRun;
+        setupRuns = result.setupRuns;
+    }
+    catch (error) {
+        executionError = error instanceof Error ? error.message : String(error);
+    }
+    const evaluatedBundle = evaluatedRun && existsSync(evaluatedRun.jsonPath)
+        ? auditBundleSchema.parse(JSON.parse(readFileSync(evaluatedRun.jsonPath, "utf8")))
+        : undefined;
+    const { deterministicChecks, modelDependentChecks } = compareEvalSpec(spec, evaluatedBundle, executionError);
+    const { bundle, jsonPath, markdownPath, outputDir } = createEvalBundle(root, spec, evaluatedRun, workspaceRoot, setupRuns, deterministicChecks, modelDependentChecks);
+    return {
+        runId: bundle.runId,
+        specId: spec.id,
+        workflow: spec.workflow,
+        outputDir,
+        jsonPath,
+        markdownPath,
+        status: bundle.status,
+        evaluatedRunId: evaluatedRun?.runId,
+        evaluatedBundlePath: evaluatedRun ? toBundleRef(evaluatedRun) : undefined,
+        setupRunCount: setupRuns.length,
+        deterministicCheckCount: deterministicChecks.length,
+        deterministicFailures: deterministicChecks.filter((check) => check.status === "failed").length,
+        artifactKinds: bundle.lifecycleArtifacts.map((artifact) => artifact.artifactKind)
+    };
+}
+export function compareLocalEvalRuns(baselineRunRef, candidateRunRefs, cwd = process.cwd()) {
+    if (candidateRunRefs.length === 0) {
+        throw new Error("Provide at least one candidate eval run to compare against the baseline.");
+    }
+    const root = findWorkspaceRoot(cwd);
+    ensureInitFiles(root);
+    const baseline = readRunBundleByRef(root, baselineRunRef);
+    const baselineArtifact = extractEvalArtifact(baseline.bundle, baselineRunRef);
+    const comparedRuns = candidateRunRefs.map((candidateRunRef) => {
+        const candidate = readRunBundleByRef(root, candidateRunRef);
+        const candidateArtifact = extractEvalArtifact(candidate.bundle, candidateRunRef);
+        return compareEvalArtifacts(baseline.runId, baseline.bundlePath, baselineArtifact, candidate.runId, candidate.bundlePath, candidateArtifact);
+    });
+    const { bundle, jsonPath, markdownPath, outputDir } = createBenchmarkBundle(root, baseline.runId, baseline.bundlePath, baselineArtifact, comparedRuns);
+    return {
+        runId: bundle.runId,
+        outputDir,
+        jsonPath,
+        markdownPath,
+        status: bundle.status,
+        baselineRunId: baseline.runId,
+        comparedRunIds: comparedRuns.map((candidate) => candidate.runId),
+        comparableRunCount: comparedRuns.filter((candidate) => candidate.comparable).length,
+        regressionCount: comparedRuns.reduce((total, candidate) => total + candidate.regressions.length, 0),
+        improvementCount: comparedRuns.reduce((total, candidate) => total + candidate.improvements.length, 0),
+        unchangedCount: comparedRuns.reduce((total, candidate) => total + candidate.unchangedCount, 0),
+        nonComparableCount: comparedRuns.reduce((total, candidate) => total + candidate.nonComparableFindings.length, 0),
+        artifactKinds: bundle.lifecycleArtifacts.map((artifact) => artifact.artifactKind)
+    };
+}
 export function explainLastRun(cwd = process.cwd()) {
     const root = findWorkspaceRoot(cwd);
     const config = loadAgentForgeConfig(root);