npm - @allurereport/plugin-agent - Versions diffs - 3.10.0 → 3.12.0 - Mend

@allurereport/plugin-agent 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +118 -77
package/dist/capabilities.d.ts +127 -0
package/dist/capabilities.js +266 -0
package/dist/errors.d.ts +9 -0
package/dist/errors.js +15 -0
package/dist/guidance.d.ts +4 -5
package/dist/guidance.js +223 -60
package/dist/harness.d.ts +72 -4
package/dist/harness.js +49 -17
package/dist/index.d.ts +9 -1
package/dist/index.js +9 -0
package/dist/inline-expectations.d.ts +23 -0
package/dist/inline-expectations.js +186 -0
package/dist/invalid-output.d.ts +58 -0
package/dist/invalid-output.js +238 -0
package/dist/model.d.ts +59 -0
package/dist/model.js +8 -1
package/dist/paths.d.ts +3 -0
package/dist/paths.js +10 -0
package/dist/plugin.js +916 -137
package/dist/query.d.ts +195 -0
package/dist/query.js +177 -0
package/dist/selection.d.ts +42 -0
package/dist/selection.js +141 -0
package/dist/state.d.ts +56 -0
package/dist/state.js +277 -0
package/dist/utils.d.ts +17 -0
package/dist/utils.js +171 -0
package/package.json +6 -6

package/dist/plugin.js CHANGED Viewed

@@ -12,20 +12,12 @@ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (
 var _AgentPlugin_runtime;
 import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
 import { basename, dirname, extname, join, relative, resolve } from "node:path";
-import process, { env } from "node:process";
+import process from "node:process";
 import { formatDuration, isAttachment, isStep, } from "@allurereport/core-api";
-import { parse } from "yaml";
 import { renderAgentsGuide } from "./guidance.js";
-const AGENT_OUTPUT_ENV = "ALLURE_AGENT_OUTPUT";
-const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
-const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
-const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
-const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
-const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
-const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
-const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
+import { parseAgentExpectations } from "./model.js";
 const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
-const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest", "project"];
+const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"];
 const STATUS_ORDER = {
     failed: 0,
     broken: 1,
@@ -126,16 +118,34 @@ const normalizeLabelValues = (value) => {
         return values.length ? [[name, values]] : [];
     }));
 };
+const normalizeNonNegativeInteger = (value) => typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
+const normalizePositiveInteger = (value) => typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
 const normalizeSelectors = (input) => ({
     environments: normalizeStringArray(input?.environments),
     fullNames: normalizeStringArray(input?.full_names),
     fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
     labelValues: normalizeLabelValues(input?.label_values),
+    testCount: normalizeNonNegativeInteger(input?.test_count),
 });
 const hasSelector = (selectors) => selectors.environments.length > 0 ||
     selectors.fullNames.length > 0 ||
     selectors.fullNamePrefixes.length > 0 ||
     Object.keys(selectors.labelValues).length > 0;
+const normalizeEvidenceExpectations = (input) => ({
+    minSteps: normalizePositiveInteger(input?.min_steps),
+    minAttachments: normalizePositiveInteger(input?.min_attachments),
+    stepNameContains: normalizeStringArray(input?.step_name_contains),
+    attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
+        if (!attachment || typeof attachment !== "object") {
+            return [];
+        }
+        const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
+        const contentType = typeof attachment.content_type === "string" && attachment.content_type.length > 0
+            ? attachment.content_type
+            : undefined;
+        return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
+    }),
+});
 const normalizeNotes = (value) => {
     if (typeof value === "string") {
         return value.length > 0 ? [value] : [];
@@ -273,6 +283,27 @@ const mergeStepSummaries = (items) => items.reduce((acc, item) => ({
     attachmentRefs: 0,
     assertionLikeSteps: 0,
 });
+const collectStepNames = (steps, path = []) => {
+    const names = [];
+    for (const node of steps) {
+        if (!isStep(node)) {
+            continue;
+        }
+        const nextPath = [...path, node.name];
+        names.push({ name: node.name, path: nextPath });
+        if (node.steps.length) {
+            names.push(...collectStepNames(node.steps, nextPath));
+        }
+    }
+    return names;
+};
+const testStepContainsText = (entry, expectedText) => {
+    const expected = normalizeStepText(expectedText);
+    if (!expected) {
+        return false;
+    }
+    return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
+};
 const buildAttemptSignature = (attempt) => JSON.stringify({
     status: attempt.tr.status,
     errorMessage: attempt.tr.error?.message,
@@ -357,6 +388,7 @@ const subtractStatusCounts = (left, right) => ({
 });
 const summarizeStatusCounts = (counts) => `${counts.total} total (${counts.failed} failed, ${counts.broken} broken, ${counts.unknown} unknown, ${counts.skipped} skipped, ${counts.passed} passed)`;
 const normalizeLogLine = (value) => value.replace(/\s+/g, " ").trim();
+const normalizeStepText = (value) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
 const normalizeWarningLine = (value) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
 const buildCountedValues = (values) => {
     const counts = new Map();
@@ -751,11 +783,68 @@ const renderModelingSummary = (modeling) => {
         : "None");
     return lines.join("\n");
 };
+const cloneHumanReportStatus = (status) => ({
+    ...status,
+    reports: status.reports.map((report) => ({ ...report })),
+    ...(status.errors ? { errors: status.errors.map((error) => ({ ...error })) } : {}),
+});
+const resolveHumanReportStatus = async (provider) => {
+    if (!provider) {
+        return undefined;
+    }
+    const status = typeof provider === "function" ? await provider() : provider;
+    return status ? cloneHumanReportStatus(status) : undefined;
+};
+const renderHumanReportSection = (humanReport) => {
+    if (!humanReport) {
+        return undefined;
+    }
+    const lines = [
+        "## Human Report",
+        "",
+        `- Status: ${humanReport.status}`,
+        `- Mode: ${humanReport.mode}`,
+        `- Result Count: ${humanReport.result_count ?? "unknown"}`,
+        `- Threshold: ${humanReport.threshold}`,
+    ];
+    if (humanReport.path) {
+        lines.push(`- Path: [${escapeInlineMarkdown(humanReport.path)}](${normalizeMarkdownPath(humanReport.path)})`);
+    }
+    if (humanReport.reason) {
+        lines.push(`- Reason: ${escapeInlineMarkdown(humanReport.reason)}`);
+    }
+    if (humanReport.error) {
+        lines.push(`- Error: ${escapeInlineMarkdown(humanReport.error)}`);
+    }
+    if (humanReport.reports.length > 1) {
+        lines.push("");
+        lines.push("### Reports");
+        lines.push("");
+        lines.push(humanReport.reports
+            .map((report) => `- ${escapeInlineMarkdown(report.plugin_id)}: [${escapeInlineMarkdown(report.path)}](${normalizeMarkdownPath(report.path)})`)
+            .join("\n"));
+    }
+    if (humanReport.errors?.length) {
+        lines.push("");
+        lines.push("### Report Errors");
+        lines.push("");
+        lines.push(humanReport.errors
+            .map((error) => {
+            const prefix = error.plugin_id ? `${error.plugin_id}: ` : "";
+            return `- ${escapeInlineMarkdown(`${prefix}${error.message}`)}`;
+        })
+            .join("\n"));
+    }
+    return lines.join("\n");
+};
 const renderSelectorSummary = (title, selectors) => {
-    if (!hasSelector(selectors)) {
+    if (!hasSelector(selectors) && selectors.testCount === undefined) {
         return `- ${title}: None`;
     }
     const parts = [];
+    if (selectors.testCount !== undefined) {
+        parts.push(`test count: ${selectors.testCount}`);
+    }
     if (selectors.environments.length) {
         parts.push(`environments: ${selectors.environments.join(", ")}`);
     }
@@ -771,6 +860,29 @@ const renderSelectorSummary = (title, selectors) => {
     }
     return `- ${title}: ${parts.join(" | ")}`;
 };
+const renderEvidenceExpectationSummary = (evidence) => {
+    const parts = [];
+    if (evidence.minSteps !== undefined) {
+        parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
+    }
+    if (evidence.minAttachments !== undefined) {
+        parts.push(`attachments per test: >= ${evidence.minAttachments}`);
+    }
+    if (evidence.stepNameContains.length) {
+        parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
+    }
+    if (evidence.attachments.length) {
+        parts.push(`attachments: ${evidence.attachments
+            .map((attachment) => [
+            attachment.name ? `name=${attachment.name}` : undefined,
+            attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
+        ]
+            .filter(Boolean)
+            .join(", "))
+            .join("; ")}`);
+    }
+    return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
+};
 const buildCheckSummary = (findings) => {
     const countsBySeverity = {
         high: 0,
@@ -794,6 +906,240 @@ const buildCheckSummary = (findings) => {
         countsByCategory,
     };
 };
+const EXPECTATION_CHECK_IDS = new Set([
+    "expectations-invalid",
+    "expectations-empty",
+    "expectations-unsupported-control",
+    "expectations-weak-goal",
+    "expected-test-missing",
+    "expected-prefix-missing",
+    "expected-label-missing",
+    "expected-environment-missing",
+    "expected-count-mismatch",
+    "expected-step-containing-missing",
+    "insufficient-expected-steps",
+    "insufficient-expected-attachments",
+    "missing-expected-attachment",
+    "forbidden-label-observed",
+    "no-tests-observed",
+]);
+const MISSING_EXPECTED_CHECK_IDS = new Set([
+    "expected-test-missing",
+    "expected-prefix-missing",
+    "expected-label-missing",
+    "expected-environment-missing",
+]);
+const EVIDENCE_MISMATCH_CHECK_IDS = new Set([
+    "expected-step-containing-missing",
+    "insufficient-expected-steps",
+    "insufficient-expected-attachments",
+    "missing-expected-attachment",
+]);
+const countLabelValues = (labelValues) => Object.values(labelValues).reduce((total, values) => total + values.length, 0);
+const recognizedControlCount = (expectations) => {
+    if (!expectations) {
+        return 0;
+    }
+    return ((expectations.goal ? 1 : 0) +
+        (expectations.taskId ? 1 : 0) +
+        (expectations.expected.testCount !== undefined ? 1 : 0) +
+        expectations.expected.environments.length +
+        expectations.expected.fullNames.length +
+        expectations.expected.fullNamePrefixes.length +
+        countLabelValues(expectations.expected.labelValues) +
+        countLabelValues(expectations.forbidden.labelValues) +
+        (expectations.evidence.minSteps !== undefined ? 1 : 0) +
+        (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
+        expectations.evidence.stepNameContains.length +
+        expectations.evidence.attachments.length);
+};
+const runtimeMatchingControlCount = (expectations) => {
+    if (!expectations) {
+        return 0;
+    }
+    return ((expectations.expected.testCount !== undefined ? 1 : 0) +
+        expectations.expected.environments.length +
+        expectations.expected.fullNames.length +
+        expectations.expected.fullNamePrefixes.length +
+        countLabelValues(expectations.expected.labelValues) +
+        countLabelValues(expectations.forbidden.labelValues) +
+        (expectations.evidence.minSteps !== undefined ? 1 : 0) +
+        (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
+        expectations.evidence.stepNameContains.length +
+        expectations.evidence.attachments.length);
+};
+const toExpectationModel = (expectations) => {
+    const expected = {};
+    const forbidden = {};
+    const evidence = {};
+    if (expectations.expected.testCount !== undefined) {
+        expected.test_count = expectations.expected.testCount;
+    }
+    if (expectations.expected.environments.length) {
+        expected.environments = expectations.expected.environments;
+    }
+    if (expectations.expected.fullNames.length) {
+        expected.full_names = expectations.expected.fullNames;
+    }
+    if (expectations.expected.fullNamePrefixes.length) {
+        expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
+    }
+    if (Object.keys(expectations.expected.labelValues).length) {
+        expected.label_values = expectations.expected.labelValues;
+    }
+    if (Object.keys(expectations.forbidden.labelValues).length) {
+        forbidden.label_values = expectations.forbidden.labelValues;
+    }
+    if (expectations.evidence.minSteps !== undefined) {
+        evidence.min_steps = expectations.evidence.minSteps;
+    }
+    if (expectations.evidence.minAttachments !== undefined) {
+        evidence.min_attachments = expectations.evidence.minAttachments;
+    }
+    if (expectations.evidence.stepNameContains.length) {
+        evidence.step_name_contains = expectations.evidence.stepNameContains;
+    }
+    if (expectations.evidence.attachments.length) {
+        evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
+            ...(attachment.name ? { name: attachment.name } : {}),
+            ...(attachment.contentType ? { content_type: attachment.contentType } : {}),
+        }));
+    }
+    return {
+        ...(expectations.goal ? { goal: expectations.goal } : {}),
+        ...(expectations.taskId ? { task_id: expectations.taskId } : {}),
+        ...(Object.keys(expected).length ? { expected } : {}),
+        ...(Object.keys(forbidden).length ? { forbidden } : {}),
+        ...(Object.keys(evidence).length ? { evidence } : {}),
+        ...(expectations.notes.length ? { notes: expectations.notes } : {}),
+    };
+};
+const defaultImpactForFinding = (finding) => {
+    if (finding.impact) {
+        return finding.impact;
+    }
+    if ([
+        "expected-test-missing",
+        "expected-prefix-missing",
+        "expected-label-missing",
+        "expected-environment-missing",
+        "forbidden-label-observed",
+        "no-tests-observed",
+    ].includes(finding.checkName)) {
+        return "reject";
+    }
+    if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
+        return "reject";
+    }
+    if ([
+        "expectations-invalid",
+        "expectations-empty",
+        "expectations-unsupported-control",
+        "expected-count-mismatch",
+        "expected-step-containing-missing",
+        "insufficient-expected-steps",
+        "insufficient-expected-attachments",
+        "missing-expected-attachment",
+        "runner-failures-outside-logical-results",
+        "metadata-mismatch",
+        "history-id-collision",
+        "failed-without-useful-steps",
+        "failed-without-attachments",
+        "nontrivial-run-with-empty-trace",
+        "retries-without-new-evidence",
+        "passed-without-observable-evidence",
+    ].includes(finding.checkName)) {
+        return "iterate";
+    }
+    if (finding.severity === "high") {
+        return "iterate";
+    }
+    return "advisory";
+};
+const strongestImpact = (findings, fallback) => {
+    if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
+        return "reject";
+    }
+    if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
+        return "iterate";
+    }
+    return fallback;
+};
+const buildExpectationResult = (params) => {
+    const { expectations, findings, observedTestCount, modelingSummary } = params;
+    const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
+    const recognized = recognizedControlCount(expectations);
+    const runtimeMatching = runtimeMatchingControlCount(expectations);
+    const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
+    const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
+    const unsupportedFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-unsupported-control");
+    const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
+    const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
+    let status;
+    let impact;
+    if (invalidFindings.length) {
+        status = "unavailable";
+        impact =
+            strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
+    }
+    else if (emptyFindings.length || unsupportedFindings.length) {
+        status = "unsupported";
+        impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
+    }
+    else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
+        status = "failed";
+        impact = "reject";
+    }
+    else if (runtimeMatching === 0) {
+        status = "not_requested";
+        impact = "advisory";
+    }
+    else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
+        status = "failed";
+        impact = "reject";
+    }
+    else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
+        status = "failed";
+        impact = "iterate";
+    }
+    else if (modelingSummary.completeness === "partial") {
+        status = "partial";
+        impact = "iterate";
+    }
+    else {
+        status = "matched";
+        impact = "accept";
+    }
+    return {
+        schema_version: "allure-agent-expectation-result/v1",
+        status,
+        impact,
+        source: expectations
+            ? {
+                kind: expectations.sourceKind,
+                path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
+            }
+            : {
+                kind: "none",
+                path: null,
+            },
+        recognized_control_count: recognized,
+        unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
+        degraded_controls: [],
+        summary: {
+            expected_tests: expectedTests,
+            observed_tests: observedTestCount,
+            missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
+                .length,
+            forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
+                .length,
+            unexpected_observed: 0,
+            evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
+                .length,
+        },
+        finding_ids: expectationFindings.map((finding) => finding.findingId),
+    };
+};
 const sortFindings = (findings) => [...findings].sort((left, right) => {
     const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
     if (bySeverity !== 0) {
@@ -818,6 +1164,28 @@ const renderFindingEvidenceLinks = (params) => {
     })
         .join("\n");
 };
+const formatFindingStructuredValue = (value) => {
+    if (value === undefined || value === null) {
+        return undefined;
+    }
+    if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
+        return String(value);
+    }
+    if (Array.isArray(value)) {
+        return value
+            .map((item) => formatFindingStructuredValue(item))
+            .filter(Boolean)
+            .join(", ");
+    }
+    if (typeof value === "object") {
+        const parts = Object.entries(value).flatMap(([key, item]) => {
+            const formatted = formatFindingStructuredValue(item);
+            return formatted ? [`${key}: ${formatted}`] : [];
+        });
+        return parts.length ? parts.join("; ") : undefined;
+    }
+    return undefined;
+};
 const renderFindingsSection = (params) => {
     const { title, findings, currentFilePath, outputDir } = params;
     if (!findings.length) {
@@ -825,25 +1193,26 @@ const renderFindingsSection = (params) => {
     }
     const lines = [`## ${title}`, ""];
     for (const finding of sortFindings(findings)) {
-        lines.push(`### [${finding.severity.toUpperCase()}] ${escapeInlineMarkdown(finding.category)} / ${escapeInlineMarkdown(finding.checkName)}`);
-        lines.push("");
-        lines.push(`- Message: ${escapeInlineMarkdown(finding.message)}`);
-        lines.push(`- Explanation: ${escapeInlineMarkdown(finding.explanation)}`);
-        lines.push(`- Remediation: ${escapeInlineMarkdown(finding.remediationHint)}`);
-        if (finding.expectedReference) {
-            lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
-        }
-        if (finding.confidence !== undefined) {
-            lines.push(`- Confidence: ${finding.confidence}`);
-        }
-        lines.push("- Evidence:");
-        lines.push("");
-        lines.push(renderFindingEvidenceLinks({
+        const impact = defaultImpactForFinding(finding);
+        const expected = formatFindingStructuredValue(finding.expected) ??
+            (finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
+        const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
+        const evidenceLinks = renderFindingEvidenceLinks({
             finding,
             currentFilePath,
             outputDir,
-        }));
-        lines.push("");
+        });
+        lines.push(`- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`);
+        if (expected) {
+            lines.push(`  Expected: ${escapeInlineMarkdown(expected)}`);
+        }
+        if (observed) {
+            lines.push(`  Observed: ${escapeInlineMarkdown(observed)}`);
+        }
+        lines.push(`  Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
+        if (evidenceLinks !== "None") {
+            lines.push(`  Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
+        }
     }
     return lines.join("\n").trimEnd();
 };
@@ -858,6 +1227,25 @@ const renderExpectationSection = (entry) => {
     ];
     return lines.join("\n");
 };
+const renderExpectationResultSection = (params) => {
+    const result = buildExpectationResult(params);
+    const summary = result.summary;
+    return [
+        "## Expectation Result",
+        "",
+        `- Status: ${result.status}`,
+        `- Impact: ${result.impact}`,
+        `- Recognized Controls: ${result.recognized_control_count}`,
+        `- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
+        `- Expected Tests: ${summary.expected_tests}`,
+        `- Observed Tests: ${summary.observed_tests}`,
+        `- Missing Expected: ${summary.missing_expected}`,
+        `- Forbidden Observed: ${summary.forbidden_observed}`,
+        `- Evidence Mismatches: ${summary.evidence_mismatches}`,
+        `- Run Manifest: [manifest/run.json](manifest/run.json)`,
+        `- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
+    ].join("\n");
+};
 const renderRerunGuidance = (findings) => {
     const relevant = findings.filter(({ category }) => category === "evidence" || category === "smells" || category === "metadata");
     if (!relevant.length) {
@@ -875,7 +1263,7 @@ const renderRerunGuidance = (findings) => {
     if (relevant.some(({ checkName }) => checkName === "noop-dominated-steps")) {
         lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
     }
-    lines.push("- Rerun only the relevant tests with the same expectations file so the next review is scoped and comparable.");
+    lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
     return lines.join("\n");
 };
 const renderTestFile = (params) => {
@@ -948,7 +1336,7 @@ const renderTestFile = (params) => {
     return `${lines.join("\n").trimEnd()}\n`;
 };
 const renderIndex = (params) => {
-    const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, } = params;
+    const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, humanReport, } = params;
     const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
     const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
     const remainingGlobalArtifacts = globalArtifacts.filter((artifact) => artifact.displayName !== "stdout.txt" && artifact.displayName !== "stderr.txt");
@@ -1009,20 +1397,35 @@ const renderIndex = (params) => {
         : "None");
     lines.push("");
     lines.push(renderModelingSummary(modelingSummary));
+    const humanReportSection = renderHumanReportSection(humanReport);
+    if (humanReportSection) {
+        lines.push("");
+        lines.push(humanReportSection);
+    }
     if (expectations) {
         lines.push("");
         lines.push("## Expected Scope");
         lines.push("");
         lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
         lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
-        lines.push(`- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
+        lines.push(expectations.sourceKind === "inline"
+            ? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
+            : `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
         lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
         lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
+        lines.push(renderEvidenceExpectationSummary(expectations.evidence));
         if (expectations.notes.length) {
             lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
         }
     }
     lines.push("");
+    lines.push(renderExpectationResultSection({
+        expectations,
+        findings,
+        observedTestCount: tests.length,
+        modelingSummary,
+    }));
+    lines.push("");
     lines.push("## Advisory Check Summary");
     lines.push("");
     lines.push(`- modeling completeness: ${modelingSummary.completeness}`);
@@ -1225,10 +1628,7 @@ const readMaterializedArtifactText = async (outputDir, artifact) => {
         return undefined;
     }
 };
-const resolveOutputDir = (options) => {
-    const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
-    return outputDir ? resolve(outputDir) : undefined;
-};
+const resolveOutputDir = (options) => (options.outputDir ? resolve(options.outputDir) : undefined);
 const cleanupManagedEntries = async (outputDir) => {
     await Promise.all(MANAGED_ENTRIES.map(async (entry) => {
         await rm(join(outputDir, entry), {
@@ -1267,15 +1667,97 @@ const createFindingFactory = () => {
         };
     };
 };
-const parseExpectations = (rawContent) => {
-    const parsed = parse(rawContent);
+const assertExpectationsObject = (parsed) => {
     if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
-        throw new Error("Expected a YAML or JSON object");
+        throw new Error("Expected an expectations object");
     }
-    return parsed;
 };
-const loadExpectations = async (outputDir, createFinding) => {
-    const configuredPath = env[AGENT_EXPECTATIONS_ENV];
+const writeExpectedManifest = async (outputDir, parsed) => {
+    const relativePath = normalizeMarkdownPath("manifest/expected.json");
+    await mkdir(join(outputDir, "manifest"), { recursive: true });
+    await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
+    return relativePath;
+};
+const toLoadedExpectations = (params) => {
+    const { parsed, relativePath, sourceKind, sourcePath } = params;
+    return {
+        sourcePath,
+        sourceKind,
+        relativePath,
+        raw: parsed,
+        goal: parsed.goal,
+        taskId: parsed.task_id,
+        notes: normalizeNotes(parsed.notes),
+        expected: normalizeSelectors(parsed.expected),
+        forbidden: normalizeSelectors(parsed.forbidden),
+        evidence: normalizeEvidenceExpectations(parsed.evidence),
+    };
+};
+const loadExpectations = async (outputDir, createFinding, options) => {
+    const configuredPath = options.expectationsPath;
+    const inlineExpectations = options.expectations;
+    if (!configuredPath && !inlineExpectations) {
+        return {
+            expectations: undefined,
+            findings: [],
+        };
+    }
+    if (configuredPath && inlineExpectations) {
+        return {
+            expectations: undefined,
+            findings: [
+                createFinding({
+                    subject: "run",
+                    subjectType: "run",
+                    severity: "high",
+                    category: "bootstrap",
+                    impact: "reject",
+                    checkName: "expectations-invalid",
+                    title: "Expectation input is invalid",
+                    message: "Both file and inline agent expectations were provided.",
+                    explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
+                    evidencePaths: [],
+                    remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
+                    expectedReference: undefined,
+                }),
+            ],
+        };
+    }
+    if (inlineExpectations) {
+        try {
+            assertExpectationsObject(inlineExpectations);
+            const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
+            return {
+                expectations: toLoadedExpectations({
+                    parsed: inlineExpectations,
+                    relativePath,
+                    sourceKind: "inline",
+                }),
+                findings: [],
+            };
+        }
+        catch (error) {
+            return {
+                expectations: undefined,
+                findings: [
+                    createFinding({
+                        subject: "run",
+                        subjectType: "run",
+                        severity: "high",
+                        category: "bootstrap",
+                        impact: "reject",
+                        checkName: "expectations-invalid",
+                        title: "Expectation input is invalid",
+                        message: "Could not load inline agent expectations",
+                        explanation: `The inline expectations option could not be normalized: ${error.message}`,
+                        evidencePaths: [],
+                        remediationHint: "Provide a valid expectations object before rerunning.",
+                        expectedReference: undefined,
+                    }),
+                ],
+            };
+        }
+    }
     if (!configuredPath) {
         return {
             expectations: undefined,
@@ -1285,21 +1767,15 @@ const loadExpectations = async (outputDir, createFinding) => {
     const expectationsPath = resolve(configuredPath);
     try {
         const rawContent = await readFile(expectationsPath, "utf-8");
-        const parsed = parseExpectations(rawContent);
-        const relativePath = normalizeMarkdownPath("manifest/expected.json");
-        await mkdir(join(outputDir, "manifest"), { recursive: true });
-        await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
+        const parsed = parseAgentExpectations(rawContent);
+        const relativePath = await writeExpectedManifest(outputDir, parsed);
         return {
-            expectations: {
-                sourcePath: expectationsPath,
+            expectations: toLoadedExpectations({
+                parsed,
                 relativePath,
-                raw: parsed,
-                goal: parsed.goal,
-                taskId: parsed.task_id,
-                notes: normalizeNotes(parsed.notes),
-                expected: normalizeSelectors(parsed.expected),
-                forbidden: normalizeSelectors(parsed.forbidden),
-            },
+                sourceKind: "file",
+                sourcePath: expectationsPath,
+            }),
             findings: [],
         };
     }
@@ -1312,37 +1788,19 @@ const loadExpectations = async (outputDir, createFinding) => {
                     subjectType: "run",
                     severity: "high",
                     category: "bootstrap",
-                    checkName: "invalid-expectations-file",
-                    message: `Could not load ALLURE_AGENT_EXPECTATIONS from ${expectationsPath}`,
+                    impact: "reject",
+                    checkName: "expectations-invalid",
+                    title: "Expectation input is invalid",
+                    message: `Could not load expectations from ${expectationsPath}`,
                     explanation: `The expectations file could not be parsed as YAML or JSON: ${error.message}`,
                     evidencePaths: [],
-                    remediationHint: "Provide a readable YAML or JSON file in ALLURE_AGENT_EXPECTATIONS before rerunning.",
+                    remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
                     expectedReference: undefined,
                 }),
             ],
         };
     }
 };
-const loadProjectGuide = async (outputDir) => {
-    const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
-    const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
-    try {
-        const content = await readFile(sourcePath, "utf-8");
-        const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
-        await mkdir(join(outputDir, "project", "docs"), { recursive: true });
-        await writeFile(join(outputDir, relativePath), content, "utf-8");
-        return {
-            sourcePath,
-            relativePath,
-        };
-    }
-    catch (error) {
-        if (error.code === "ENOENT") {
-            return undefined;
-        }
-        throw error;
-    }
-};
 const computeScopeEvaluation = (params) => {
     const { tr, environmentId, expectations } = params;
     if (!expectations) {
@@ -1418,22 +1876,54 @@ const collectTestEvidencePaths = (entry) => {
     }
     return uniqueValues(paths);
 };
+const getExpectationTargetEntries = (entries, expectations) => {
+    if (!hasSelector(expectations.expected)) {
+        return entries;
+    }
+    return entries.filter((entry) => entry.scope.scopeMatch === "match");
+};
+const currentAttemptStepSummary = (entry) => mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
+const nonMissingArtifacts = (entry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
+const formatAttachmentExpectation = (expectation) => [
+    expectation.name ? `name=${expectation.name}` : undefined,
+    expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
+]
+    .filter(Boolean)
+    .join(", ");
+const matchesAttachmentExpectation = (artifact, expectation) => {
+    if (artifact.missing) {
+        return false;
+    }
+    if (expectation.name && artifact.displayName !== expectation.name) {
+        return false;
+    }
+    if (expectation.contentType && artifact.contentType !== expectation.contentType) {
+        return false;
+    }
+    return true;
+};
 const buildRunAndTestFindings = (params) => {
     const { entries, expectations, globalArtifacts, modelingSummary, createFinding } = params;
     const runFindings = [];
     const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
     const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
-    if (entries.length === 0) {
+    if (entries.length === 0 && expectations?.expected.testCount !== 0) {
         runFindings.push(createFinding({
             subject: "run",
             subjectType: "run",
             severity: "high",
+            impact: "reject",
             category: "bootstrap",
-            checkName: "no-visible-tests",
+            checkName: "no-tests-observed",
+            title: "No logical tests were observed",
             message: "No visible test results were found in the run.",
             explanation: "The agent output was generated, but there were no visible logical test results to review.",
-            evidencePaths: [],
-            remediationHint: "Verify that Allure results are being generated and that the test command actually executed the intended tests.",
+            evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
+            remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
+            expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
+            observed: { test_count: 0 },
+            action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
+            confidence: 1,
         }));
     }
     if (!stdoutArtifact && !stderrArtifact) {
@@ -1446,7 +1936,7 @@ const buildRunAndTestFindings = (params) => {
             message: "The run does not include global stdout or stderr logs.",
             explanation: "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
             evidencePaths: [],
-            remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics, or use `ALLURE_AGENT_*` with `allure run` for lower-level control.",
+            remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
             confidence: 0.9,
         }));
     }
@@ -1486,19 +1976,93 @@ const buildRunAndTestFindings = (params) => {
     const actualEnvironments = uniqueValues(entries.map(({ environmentId }) => environmentId));
     if (expectations) {
         const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
+        const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
+        const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
+        if (recognizedControlCount(expectations) === 0) {
+            runFindings.push(createFinding({
+                subject: "run",
+                subjectType: "run",
+                severity: "high",
+                impact: "iterate",
+                category: "scope",
+                checkName: "expectations-empty",
+                title: "Expectation source did not contain recognized controls",
+                message: "Expectation source was provided but no recognized M1 controls were parsed.",
+                explanation: "The run can still be reviewed, but expectation precision was not requested.",
+                evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
+                remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
+                observed: { recognized_control_count: 0 },
+                action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
+                confidence: 1,
+            }));
+        }
+        if ((hasRuntimeControls && !expectations.goal) ||
+            ["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)) {
+            runFindings.push(createFinding({
+                subject: "run",
+                subjectType: "run",
+                severity: "info",
+                impact: "advisory",
+                category: "scope",
+                checkName: "expectations-weak-goal",
+                title: "Run goal is missing or too generic",
+                message: expectations.goal
+                    ? `The run goal is too generic: ${expectations.goal}`
+                    : "Runtime expectations were provided without a goal.",
+                explanation: "The goal is intent metadata and does not change the runtime evidence.",
+                evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
+                remediationHint: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
+                expected: { goal: "specific validation claim" },
+                observed: { goal: expectations.goal ?? null },
+                action: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
+                confidence: 0.9,
+            }));
+        }
+        if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
+            const severity = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
+            const impact = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
+            runFindings.push(createFinding({
+                subject: "run",
+                subjectType: "run",
+                severity,
+                impact,
+                category: "scope",
+                checkName: "expected-count-mismatch",
+                title: "Observed logical test count did not match",
+                message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
+                explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
+                evidencePaths: expectations.relativePath
+                    ? [expectations.relativePath, "manifest/tests.jsonl"]
+                    : ["manifest/tests.jsonl"],
+                remediationHint: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
+                expectedReference: "expected.test_count",
+                expected: { test_count: expectations.expected.testCount },
+                observed: { test_count: entries.length },
+                action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
+                confidence: 1,
+            }));
+        }
         expectations.expected.fullNames.forEach((fullName, index) => {
             if (!allFullNames.includes(fullName)) {
                 runFindings.push(createFinding({
                     subject: "run",
                     subjectType: "run",
                     severity: "high",
+                    impact: "reject",
                     category: "scope",
-                    checkName: "missing-expected-test",
-                    message: `Expected test did not run: ${fullName}`,
-                    explanation: "The expectations file explicitly listed this test, but it did not appear in the agentic output.",
-                    evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-                    remediationHint: "Check the test selection, environment, and feature branch scope before rerunning.",
+                    checkName: "expected-test-missing",
+                    title: "Expected test was not observed",
+                    message: "The expected test did not appear in the observed logical results.",
+                    explanation: `Expected test did not run: ${fullName}`,
+                    evidencePaths: expectations.relativePath
+                        ? [expectations.relativePath, "manifest/tests.jsonl"]
+                        : ["manifest/tests.jsonl"],
+                    remediationHint: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
                     expectedReference: `expected.full_names[${index}]`,
+                    expected: { full_names: [fullName] },
+                    observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
+                    action: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
+                    confidence: 1,
                 }));
             }
         });
@@ -1507,14 +2071,22 @@ const buildRunAndTestFindings = (params) => {
                 runFindings.push(createFinding({
                     subject: "run",
                     subjectType: "run",
-                    severity: "warning",
+                    severity: "high",
+                    impact: "reject",
                     category: "scope",
-                    checkName: "missing-expected-prefix",
-                    message: `No executed test matched the expected prefix: ${prefix}`,
-                    explanation: "The expectations file asked for tests within this name prefix, but none were recorded.",
-                    evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-                    remediationHint: "Check the expected selector or adjust the executed test target so the intended scope is covered.",
+                    checkName: "expected-prefix-missing",
+                    title: "Expected test prefix was not observed",
+                    message: `No observed test full name started with the expected prefix: ${prefix}`,
+                    explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
+                    evidencePaths: expectations.relativePath
+                        ? [expectations.relativePath, "manifest/tests.jsonl"]
+                        : ["manifest/tests.jsonl"],
+                    remediationHint: "Treat the run as wrong scope or missing coverage.",
                     expectedReference: `expected.full_name_prefixes[${index}]`,
+                    expected: { full_name_prefixes: [prefix] },
+                    observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
+                    action: "Treat the run as wrong scope or missing coverage.",
+                    confidence: 1,
                 }));
             }
         });
@@ -1523,14 +2095,22 @@ const buildRunAndTestFindings = (params) => {
                 runFindings.push(createFinding({
                     subject: "run",
                     subjectType: "run",
-                    severity: "warning",
+                    severity: "high",
+                    impact: "reject",
                     category: "scope",
-                    checkName: "missing-expected-environment",
+                    checkName: "expected-environment-missing",
+                    title: "Expected environment was not observed",
                     message: `Expected environment did not appear in the run: ${environment}`,
-                    explanation: "The expectations file scoped the run to this environment, but no logical test result matched it.",
-                    evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-                    remediationHint: "Check the environment selector or rerun the intended environment explicitly.",
+                    explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
+                    evidencePaths: expectations.relativePath
+                        ? [expectations.relativePath, "manifest/tests.jsonl"]
+                        : ["manifest/tests.jsonl"],
+                    remediationHint: "Rerun in the intended environment before making environment-specific claims.",
                     expectedReference: `expected.environments[${index}]`,
+                    expected: { environments: [environment] },
+                    observed: { environments: actualEnvironments },
+                    action: "Rerun in the intended environment before making environment-specific claims.",
+                    confidence: 1,
                 }));
             }
         });
@@ -1540,14 +2120,22 @@ const buildRunAndTestFindings = (params) => {
                 runFindings.push(createFinding({
                     subject: "run",
                     subjectType: "run",
-                    severity: "warning",
+                    severity: "high",
+                    impact: "reject",
                     category: "scope",
-                    checkName: "missing-expected-label-selector",
-                    message: `No executed test matched ${formatLabelRequirement(labelName, values)}`,
-                    explanation: "The expectations file defined a label selector for the intended scope, but no logical test result satisfied it.",
-                    evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-                    remediationHint: "Add the expected label metadata to the intended tests or adjust the expectations selector.",
+                    checkName: "expected-label-missing",
+                    title: "Expected label was not observed",
+                    message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
+                    explanation: "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
+                    evidencePaths: expectations.relativePath
+                        ? [expectations.relativePath, "manifest/tests.jsonl"]
+                        : ["manifest/tests.jsonl"],
+                    remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
                     expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
+                    expected: { label_values: { [labelName]: values } },
+                    observed: { test_count: entries.length },
+                    action: "Fix metadata, selector, or run the correct labeled scope.",
+                    confidence: 1,
                 }));
             }
         });
@@ -1570,28 +2158,53 @@ const buildRunAndTestFindings = (params) => {
             });
         }
     }
+    const evidenceTargetKeys = expectations
+        ? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
+        : new Set();
     for (const entry of entries) {
         const currentAttempt = entry.attempts[0];
         const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
         const testEvidencePaths = collectTestEvidencePaths(entry);
         const allStepSummary = mergeStepSummaries(entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])));
+        const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
+        const expectedEvidence = expectations?.evidence;
+        const currentStepSummary = currentAttemptStepSummary(entry);
+        const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
+        const currentAttachments = nonMissingArtifacts(entry);
         const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
         const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
         const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
         if (entry.scope.scopeMatch === "forbidden") {
+            const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => reference.startsWith("forbidden.label_values"));
+            const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
             entry.findings.push(createFinding({
                 subject: entry.key,
                 subjectType: "test",
                 severity: "high",
+                impact: "reject",
                 category: "scope",
-                checkName: "forbidden-selector-match",
-                message: "This test matched a forbidden selector from the expectations file.",
-                explanation: "The logical test belongs to a scope that the expectations file explicitly marked as forbidden.",
+                checkName,
+                title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
+                message: forbiddenLabelReference
+                    ? "This test has a label value that was explicitly forbidden."
+                    : "This test matched a forbidden selector from the expectations.",
+                explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
                 evidencePaths: expectations?.relativePath
                     ? [entry.relativePath, expectations.relativePath]
                     : [entry.relativePath],
-                remediationHint: "Tighten the test selection or update the expectations file before accepting the run.",
-                expectedReference: entry.scope.expectedReferences[0],
+                remediationHint: forbiddenLabelReference
+                    ? "Treat as scope drift. Split or correct the run before using it as focused validation."
+                    : "Tighten the test selection or update the expectations before accepting the run.",
+                expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
+                expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
+                observed: {
+                    full_name: entry.tr.fullName ?? entry.tr.name,
+                    labels: toLabelEntries(entry.tr.labels),
+                },
+                action: forbiddenLabelReference
+                    ? "Treat as scope drift. Split or correct the run before using it as focused validation."
+                    : "Tighten the test selection or update the expectations before accepting the run.",
+                confidence: 1,
             }));
         }
         else if (entry.scope.scopeMatch === "unexpected") {
@@ -1602,11 +2215,11 @@ const buildRunAndTestFindings = (params) => {
                 category: "scope",
                 checkName: "unexpected-test",
                 message: "This test ran outside the expected scope.",
-                explanation: "The expectations file defined positive scope selectors, but this logical test did not match any of them.",
+                explanation: "The expectations defined positive scope selectors, but this logical test did not match any of them.",
                 evidencePaths: expectations?.relativePath
                     ? [entry.relativePath, expectations.relativePath]
                     : [entry.relativePath],
-                remediationHint: "Rerun only the intended tests or broaden the expectations file if this test is part of the plan.",
+                remediationHint: "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
             }));
         }
         if (entry.scope.metadataMismatches.length > 0) {
@@ -1640,6 +2253,117 @@ const buildRunAndTestFindings = (params) => {
                 confidence: 0.85,
             }));
         }
+        expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
+            if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
+                return;
+            }
+            entry.findings.push(createFinding({
+                subject: entry.key,
+                subjectType: "test",
+                severity: "warning",
+                impact: "iterate",
+                category: "evidence",
+                checkName: "expected-step-containing-missing",
+                title: "Expected step text was not observed",
+                message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
+                explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
+                evidencePaths: expectations?.relativePath
+                    ? [entry.relativePath, expectations.relativePath]
+                    : [entry.relativePath],
+                remediationHint: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
+                expectedReference: `evidence.step_name_contains[${index}]`,
+                expected: { step_name_contains: [expectedText] },
+                observed: { steps: currentStepSummary.totalSteps, matched: false },
+                action: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
+                confidence: 0.9,
+            }));
+        });
+        if (expectedEvidenceApplies &&
+            expectedEvidence?.minSteps !== undefined &&
+            currentMeaningfulSteps < expectedEvidence.minSteps) {
+            entry.findings.push(createFinding({
+                subject: entry.key,
+                subjectType: "test",
+                severity: "warning",
+                impact: "iterate",
+                category: "evidence",
+                checkName: "insufficient-expected-steps",
+                title: "Expected step count was not met",
+                message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
+                explanation: "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
+                evidencePaths: expectations?.relativePath
+                    ? [entry.relativePath, expectations.relativePath]
+                    : [entry.relativePath],
+                remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
+                expectedReference: "evidence.min_steps",
+                expected: { min_steps: expectedEvidence.minSteps },
+                observed: { meaningful_steps: currentMeaningfulSteps },
+                action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
+                confidence: 0.9,
+            }));
+        }
+        if (expectedEvidenceApplies &&
+            expectedEvidence?.minAttachments !== undefined &&
+            currentAttachments.length < expectedEvidence.minAttachments) {
+            entry.findings.push(createFinding({
+                subject: entry.key,
+                subjectType: "test",
+                severity: "warning",
+                impact: "iterate",
+                category: "evidence",
+                checkName: "insufficient-expected-attachments",
+                title: "Expected attachment count was not met",
+                message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
+                explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
+                evidencePaths: expectations?.relativePath
+                    ? [entry.relativePath, expectations.relativePath]
+                    : [entry.relativePath],
+                remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
+                expectedReference: "evidence.min_attachments",
+                expected: { min_attachments: expectedEvidence.minAttachments },
+                observed: { attachments: currentAttachments.length },
+                action: "Attach real runtime artifacts only when they are needed for debugging or review.",
+                confidence: 0.9,
+            }));
+        }
+        expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
+            if (!expectedEvidenceApplies) {
+                return;
+            }
+            if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
+                return;
+            }
+            entry.findings.push(createFinding({
+                subject: entry.key,
+                subjectType: "test",
+                severity: "warning",
+                impact: "iterate",
+                category: "evidence",
+                checkName: "missing-expected-attachment",
+                title: "Expected attachment was not observed",
+                message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
+                explanation: "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
+                evidencePaths: expectations?.relativePath
+                    ? [entry.relativePath, expectations.relativePath]
+                    : [entry.relativePath],
+                remediationHint: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
+                expectedReference: `evidence.attachments[${index}]`,
+                expected: {
+                    attachment: {
+                        ...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
+                        ...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
+                    },
+                },
+                observed: {
+                    attachments: currentAttachments.map((attachment) => ({
+                        name: attachment.displayName,
+                        content_type: attachment.contentType ?? null,
+                    })),
+                },
+                action: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
+                confidence: 0.95,
+            }));
+        });
         if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
             entry.findings.push(createFinding({
                 subject: entry.key,
@@ -1928,11 +2652,17 @@ const appendJsonlLine = async (path, item) => {
     await appendFile(path, `${JSON.stringify(item)}\n`, "utf-8");
 };
 const toRunManifest = (params) => {
-    const { context, command, generatedAt, phase, expectations, projectGuide, snapshot } = params;
+    const { context, command, agentContext, generatedAt, phase, expectations, snapshot, humanReport } = params;
     const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
     const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
     const originalExitCode = snapshot.globalExitCode?.original ?? null;
     const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
+    const expectationResult = buildExpectationResult({
+        expectations,
+        findings: snapshot.combinedAllFindings,
+        observedTestCount: snapshot.entries.length,
+        modelingSummary: snapshot.modelingSummary,
+    });
     return {
         schema_version: AGENT_SCHEMA_VERSION,
         report_uuid: context.reportUuid,
@@ -1966,27 +2696,31 @@ const toRunManifest = (params) => {
             findings_manifest: "manifest/findings.jsonl",
             test_events_manifest: "manifest/test-events.jsonl",
             expected_manifest: expectations?.relativePath ?? null,
-            project_guide: projectGuide?.relativePath ?? null,
+            human_report_manifest: humanReport ? "manifest/human-report.json" : null,
             process_logs: {
                 stdout: stdoutArtifact?.relativePath ?? null,
                 stderr: stderrArtifact?.relativePath ?? null,
             },
         },
+        human_report: humanReport ?? null,
         expectations_present: Boolean(expectations),
+        expectations: expectations ? toExpectationModel(expectations) : null,
+        expectation_result: expectationResult,
         check_summary: buildCheckSummary(snapshot.combinedAllFindings),
         agent_context: {
-            agent_name: env[AGENT_NAME_ENV] ?? null,
-            loop_id: env[AGENT_LOOP_ID_ENV] ?? null,
-            task_id: env[AGENT_TASK_ID_ENV] ?? expectations?.taskId ?? null,
-            conversation_id: env[AGENT_CONVERSATION_ID_ENV] ?? null,
+            agent_name: agentContext.agentName ?? null,
+            loop_id: agentContext.loopId ?? null,
+            task_id: agentContext.taskId ?? expectations?.taskId ?? null,
+            conversation_id: agentContext.conversationId ?? null,
         },
     };
 };
 const writeSnapshotFiles = async (params) => {
     const { runtime, snapshot, phase } = params;
-    const { outputDir, context, command, generatedAt, expectations, projectGuide } = runtime;
+    const { outputDir, context, command, generatedAt, expectations } = runtime;
     const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
     const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
+    const humanReport = await resolveHumanReportStatus(runtime.humanReport);
     for (const stalePath of runtime.currentTestPaths) {
         if (!nextTestPaths.has(stalePath)) {
             await rm(stalePath, { force: true });
@@ -2010,12 +2744,16 @@ const writeSnapshotFiles = async (params) => {
         writeJson(join(outputDir, "manifest", "run.json"), toRunManifest({
             context,
             command,
+            agentContext: runtime.agentContext,
             generatedAt,
             phase,
             expectations,
-            projectGuide,
             snapshot,
+            humanReport,
         })),
+        ...(humanReport
+            ? [writeJson(join(outputDir, "manifest", "human-report.json"), humanReport)]
+            : [rm(join(outputDir, "manifest", "human-report.json"), { force: true })]),
         writeJsonlSnapshot(join(outputDir, "manifest", "tests.jsonl"), snapshot.entries.map(toTestsManifestLine)),
         writeJsonlSnapshot(join(outputDir, "manifest", "findings.jsonl"), snapshot.combinedAllFindings.map(toFindingManifestLine)),
         writeTextAtomic(join(outputDir, "index.md"), renderIndex({
@@ -2034,8 +2772,9 @@ const writeSnapshotFiles = async (params) => {
             globalExitCode: snapshot.globalExitCode,
             qualityGateResults: snapshot.qualityGateResults,
             findings: snapshot.combinedAllFindings,
+            humanReport,
         })),
-        writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(projectGuide?.relativePath)),
+        writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
     ]);
 };
 const createBootstrapSnapshot = () => ({
@@ -2079,7 +2818,7 @@ const createBootstrapSnapshot = () => ({
     combinedAllFindings: [],
 });
 const writeBootstrapFiles = async (runtime) => {
-    await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(runtime.projectGuide?.relativePath));
+    await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
     await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
     await writeSnapshotFiles({
         runtime,
@@ -2104,19 +2843,54 @@ const toTestsManifestLine = (entry) => ({
     markdown_path: entry.relativePath,
     assets_dir: entry.relativeAssetDir,
 });
-const toFindingManifestLine = (finding) => ({
-    finding_id: finding.findingId,
-    subject: finding.subject,
-    severity: finding.severity,
-    category: finding.category,
-    check_name: finding.checkName,
-    message: finding.message,
-    explanation: finding.explanation,
-    evidence_paths: finding.evidencePaths,
-    remediation_hint: finding.remediationHint,
-    expected_reference: finding.expectedReference,
-    confidence: finding.confidence,
+const toFindingSubject = (finding) => ({
+    type: finding.subjectType,
+    ...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
 });
+const toFindingManifestLine = (finding) => {
+    const impact = defaultImpactForFinding(finding);
+    const confidence = finding.confidence ?? 1;
+    return {
+        schema_version: "allure-agent-finding/v2",
+        check_id: finding.checkName,
+        instance_id: finding.findingId,
+        severity: finding.severity,
+        impact,
+        confidence,
+        category: finding.category,
+        title: finding.title ?? finding.message,
+        message: finding.message,
+        subject: toFindingSubject(finding),
+        expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
+        observed: finding.observed ?? { detail: finding.explanation },
+        evidence: {
+            paths: finding.evidencePaths,
+        },
+        action: finding.action ?? finding.remediationHint,
+        ...(finding.source ? { source: finding.source } : {}),
+        ...(finding.limits ? { limits: finding.limits } : {}),
+        ...(finding.affected ? { affected: finding.affected } : {}),
+        ...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
+        legacy: {
+            finding_id: finding.findingId,
+            subject: finding.subject,
+            subject_type: finding.subjectType,
+            check_name: finding.checkName,
+            explanation: finding.explanation,
+            evidence_paths: finding.evidencePaths,
+            remediation_hint: finding.remediationHint,
+            expected_reference: finding.expectedReference,
+        },
+        finding_id: finding.findingId,
+        subject_ref: finding.subject,
+        subject_type: finding.subjectType,
+        check_name: finding.checkName,
+        explanation: finding.explanation,
+        evidence_paths: finding.evidencePaths,
+        remediation_hint: finding.remediationHint,
+        expected_reference: finding.expectedReference,
+    };
+};
 const queueRuntimeTask = (runtime, task) => {
     runtime.queue = runtime.queue
         .catch(() => undefined)
@@ -2238,18 +3012,23 @@ const createRuntimeState = async (params) => {
     await cleanupManagedEntries(outputDir);
     const generatedAt = new Date().toISOString();
     const createFinding = createFindingFactory();
-    const expectationLoadResult = await loadExpectations(outputDir, createFinding);
-    const projectGuide = await loadProjectGuide(outputDir);
+    const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
     const runtime = {
         outputDir,
         context,
         store,
         generatedAt,
-        command: env[AGENT_COMMAND_ENV],
+        command: options.command,
+        agentContext: {
+            agentName: options.agentName,
+            loopId: options.loopId,
+            taskId: options.taskId,
+            conversationId: options.conversationId,
+        },
+        humanReport: options.humanReport,
         createFinding,
         expectations: expectationLoadResult.expectations,
         expectationLoadFindings: expectationLoadResult.findings,
-        projectGuide,
         unsubscribers: [],
         queue: Promise.resolve(),
         seenLogicalKeys: new Set(),