@allurereport/plugin-agent 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.js CHANGED
@@ -12,20 +12,12 @@ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (
12
12
  var _AgentPlugin_runtime;
13
13
  import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
14
14
  import { basename, dirname, extname, join, relative, resolve } from "node:path";
15
- import process, { env } from "node:process";
15
+ import process from "node:process";
16
16
  import { formatDuration, isAttachment, isStep, } from "@allurereport/core-api";
17
- import { parse } from "yaml";
18
17
  import { renderAgentsGuide } from "./guidance.js";
19
- const AGENT_OUTPUT_ENV = "ALLURE_AGENT_OUTPUT";
20
- const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
21
- const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
22
- const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
23
- const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
24
- const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
25
- const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
26
- const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
18
+ import { parseAgentExpectations } from "./model.js";
27
19
  const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
28
- const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest", "project"];
20
+ const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"];
29
21
  const STATUS_ORDER = {
30
22
  failed: 0,
31
23
  broken: 1,
@@ -126,16 +118,34 @@ const normalizeLabelValues = (value) => {
126
118
  return values.length ? [[name, values]] : [];
127
119
  }));
128
120
  };
121
+ const normalizeNonNegativeInteger = (value) => typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
122
+ const normalizePositiveInteger = (value) => typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
129
123
  const normalizeSelectors = (input) => ({
130
124
  environments: normalizeStringArray(input?.environments),
131
125
  fullNames: normalizeStringArray(input?.full_names),
132
126
  fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
133
127
  labelValues: normalizeLabelValues(input?.label_values),
128
+ testCount: normalizeNonNegativeInteger(input?.test_count),
134
129
  });
135
130
  const hasSelector = (selectors) => selectors.environments.length > 0 ||
136
131
  selectors.fullNames.length > 0 ||
137
132
  selectors.fullNamePrefixes.length > 0 ||
138
133
  Object.keys(selectors.labelValues).length > 0;
134
+ const normalizeEvidenceExpectations = (input) => ({
135
+ minSteps: normalizePositiveInteger(input?.min_steps),
136
+ minAttachments: normalizePositiveInteger(input?.min_attachments),
137
+ stepNameContains: normalizeStringArray(input?.step_name_contains),
138
+ attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
139
+ if (!attachment || typeof attachment !== "object") {
140
+ return [];
141
+ }
142
+ const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
143
+ const contentType = typeof attachment.content_type === "string" && attachment.content_type.length > 0
144
+ ? attachment.content_type
145
+ : undefined;
146
+ return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
147
+ }),
148
+ });
139
149
  const normalizeNotes = (value) => {
140
150
  if (typeof value === "string") {
141
151
  return value.length > 0 ? [value] : [];
@@ -273,6 +283,27 @@ const mergeStepSummaries = (items) => items.reduce((acc, item) => ({
273
283
  attachmentRefs: 0,
274
284
  assertionLikeSteps: 0,
275
285
  });
286
+ const collectStepNames = (steps, path = []) => {
287
+ const names = [];
288
+ for (const node of steps) {
289
+ if (!isStep(node)) {
290
+ continue;
291
+ }
292
+ const nextPath = [...path, node.name];
293
+ names.push({ name: node.name, path: nextPath });
294
+ if (node.steps.length) {
295
+ names.push(...collectStepNames(node.steps, nextPath));
296
+ }
297
+ }
298
+ return names;
299
+ };
300
+ const testStepContainsText = (entry, expectedText) => {
301
+ const expected = normalizeStepText(expectedText);
302
+ if (!expected) {
303
+ return false;
304
+ }
305
+ return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
306
+ };
276
307
  const buildAttemptSignature = (attempt) => JSON.stringify({
277
308
  status: attempt.tr.status,
278
309
  errorMessage: attempt.tr.error?.message,
@@ -357,6 +388,7 @@ const subtractStatusCounts = (left, right) => ({
357
388
  });
358
389
  const summarizeStatusCounts = (counts) => `${counts.total} total (${counts.failed} failed, ${counts.broken} broken, ${counts.unknown} unknown, ${counts.skipped} skipped, ${counts.passed} passed)`;
359
390
  const normalizeLogLine = (value) => value.replace(/\s+/g, " ").trim();
391
+ const normalizeStepText = (value) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
360
392
  const normalizeWarningLine = (value) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
361
393
  const buildCountedValues = (values) => {
362
394
  const counts = new Map();
@@ -751,11 +783,68 @@ const renderModelingSummary = (modeling) => {
751
783
  : "None");
752
784
  return lines.join("\n");
753
785
  };
786
+ const cloneHumanReportStatus = (status) => ({
787
+ ...status,
788
+ reports: status.reports.map((report) => ({ ...report })),
789
+ ...(status.errors ? { errors: status.errors.map((error) => ({ ...error })) } : {}),
790
+ });
791
+ const resolveHumanReportStatus = async (provider) => {
792
+ if (!provider) {
793
+ return undefined;
794
+ }
795
+ const status = typeof provider === "function" ? await provider() : provider;
796
+ return status ? cloneHumanReportStatus(status) : undefined;
797
+ };
798
+ const renderHumanReportSection = (humanReport) => {
799
+ if (!humanReport) {
800
+ return undefined;
801
+ }
802
+ const lines = [
803
+ "## Human Report",
804
+ "",
805
+ `- Status: ${humanReport.status}`,
806
+ `- Mode: ${humanReport.mode}`,
807
+ `- Result Count: ${humanReport.result_count ?? "unknown"}`,
808
+ `- Threshold: ${humanReport.threshold}`,
809
+ ];
810
+ if (humanReport.path) {
811
+ lines.push(`- Path: [${escapeInlineMarkdown(humanReport.path)}](${normalizeMarkdownPath(humanReport.path)})`);
812
+ }
813
+ if (humanReport.reason) {
814
+ lines.push(`- Reason: ${escapeInlineMarkdown(humanReport.reason)}`);
815
+ }
816
+ if (humanReport.error) {
817
+ lines.push(`- Error: ${escapeInlineMarkdown(humanReport.error)}`);
818
+ }
819
+ if (humanReport.reports.length > 1) {
820
+ lines.push("");
821
+ lines.push("### Reports");
822
+ lines.push("");
823
+ lines.push(humanReport.reports
824
+ .map((report) => `- ${escapeInlineMarkdown(report.plugin_id)}: [${escapeInlineMarkdown(report.path)}](${normalizeMarkdownPath(report.path)})`)
825
+ .join("\n"));
826
+ }
827
+ if (humanReport.errors?.length) {
828
+ lines.push("");
829
+ lines.push("### Report Errors");
830
+ lines.push("");
831
+ lines.push(humanReport.errors
832
+ .map((error) => {
833
+ const prefix = error.plugin_id ? `${error.plugin_id}: ` : "";
834
+ return `- ${escapeInlineMarkdown(`${prefix}${error.message}`)}`;
835
+ })
836
+ .join("\n"));
837
+ }
838
+ return lines.join("\n");
839
+ };
754
840
  const renderSelectorSummary = (title, selectors) => {
755
- if (!hasSelector(selectors)) {
841
+ if (!hasSelector(selectors) && selectors.testCount === undefined) {
756
842
  return `- ${title}: None`;
757
843
  }
758
844
  const parts = [];
845
+ if (selectors.testCount !== undefined) {
846
+ parts.push(`test count: ${selectors.testCount}`);
847
+ }
759
848
  if (selectors.environments.length) {
760
849
  parts.push(`environments: ${selectors.environments.join(", ")}`);
761
850
  }
@@ -771,6 +860,29 @@ const renderSelectorSummary = (title, selectors) => {
771
860
  }
772
861
  return `- ${title}: ${parts.join(" | ")}`;
773
862
  };
863
+ const renderEvidenceExpectationSummary = (evidence) => {
864
+ const parts = [];
865
+ if (evidence.minSteps !== undefined) {
866
+ parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
867
+ }
868
+ if (evidence.minAttachments !== undefined) {
869
+ parts.push(`attachments per test: >= ${evidence.minAttachments}`);
870
+ }
871
+ if (evidence.stepNameContains.length) {
872
+ parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
873
+ }
874
+ if (evidence.attachments.length) {
875
+ parts.push(`attachments: ${evidence.attachments
876
+ .map((attachment) => [
877
+ attachment.name ? `name=${attachment.name}` : undefined,
878
+ attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
879
+ ]
880
+ .filter(Boolean)
881
+ .join(", "))
882
+ .join("; ")}`);
883
+ }
884
+ return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
885
+ };
774
886
  const buildCheckSummary = (findings) => {
775
887
  const countsBySeverity = {
776
888
  high: 0,
@@ -794,6 +906,240 @@ const buildCheckSummary = (findings) => {
794
906
  countsByCategory,
795
907
  };
796
908
  };
909
+ const EXPECTATION_CHECK_IDS = new Set([
910
+ "expectations-invalid",
911
+ "expectations-empty",
912
+ "expectations-unsupported-control",
913
+ "expectations-weak-goal",
914
+ "expected-test-missing",
915
+ "expected-prefix-missing",
916
+ "expected-label-missing",
917
+ "expected-environment-missing",
918
+ "expected-count-mismatch",
919
+ "expected-step-containing-missing",
920
+ "insufficient-expected-steps",
921
+ "insufficient-expected-attachments",
922
+ "missing-expected-attachment",
923
+ "forbidden-label-observed",
924
+ "no-tests-observed",
925
+ ]);
926
+ const MISSING_EXPECTED_CHECK_IDS = new Set([
927
+ "expected-test-missing",
928
+ "expected-prefix-missing",
929
+ "expected-label-missing",
930
+ "expected-environment-missing",
931
+ ]);
932
+ const EVIDENCE_MISMATCH_CHECK_IDS = new Set([
933
+ "expected-step-containing-missing",
934
+ "insufficient-expected-steps",
935
+ "insufficient-expected-attachments",
936
+ "missing-expected-attachment",
937
+ ]);
938
+ const countLabelValues = (labelValues) => Object.values(labelValues).reduce((total, values) => total + values.length, 0);
939
+ const recognizedControlCount = (expectations) => {
940
+ if (!expectations) {
941
+ return 0;
942
+ }
943
+ return ((expectations.goal ? 1 : 0) +
944
+ (expectations.taskId ? 1 : 0) +
945
+ (expectations.expected.testCount !== undefined ? 1 : 0) +
946
+ expectations.expected.environments.length +
947
+ expectations.expected.fullNames.length +
948
+ expectations.expected.fullNamePrefixes.length +
949
+ countLabelValues(expectations.expected.labelValues) +
950
+ countLabelValues(expectations.forbidden.labelValues) +
951
+ (expectations.evidence.minSteps !== undefined ? 1 : 0) +
952
+ (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
953
+ expectations.evidence.stepNameContains.length +
954
+ expectations.evidence.attachments.length);
955
+ };
956
+ const runtimeMatchingControlCount = (expectations) => {
957
+ if (!expectations) {
958
+ return 0;
959
+ }
960
+ return ((expectations.expected.testCount !== undefined ? 1 : 0) +
961
+ expectations.expected.environments.length +
962
+ expectations.expected.fullNames.length +
963
+ expectations.expected.fullNamePrefixes.length +
964
+ countLabelValues(expectations.expected.labelValues) +
965
+ countLabelValues(expectations.forbidden.labelValues) +
966
+ (expectations.evidence.minSteps !== undefined ? 1 : 0) +
967
+ (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
968
+ expectations.evidence.stepNameContains.length +
969
+ expectations.evidence.attachments.length);
970
+ };
971
+ const toExpectationModel = (expectations) => {
972
+ const expected = {};
973
+ const forbidden = {};
974
+ const evidence = {};
975
+ if (expectations.expected.testCount !== undefined) {
976
+ expected.test_count = expectations.expected.testCount;
977
+ }
978
+ if (expectations.expected.environments.length) {
979
+ expected.environments = expectations.expected.environments;
980
+ }
981
+ if (expectations.expected.fullNames.length) {
982
+ expected.full_names = expectations.expected.fullNames;
983
+ }
984
+ if (expectations.expected.fullNamePrefixes.length) {
985
+ expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
986
+ }
987
+ if (Object.keys(expectations.expected.labelValues).length) {
988
+ expected.label_values = expectations.expected.labelValues;
989
+ }
990
+ if (Object.keys(expectations.forbidden.labelValues).length) {
991
+ forbidden.label_values = expectations.forbidden.labelValues;
992
+ }
993
+ if (expectations.evidence.minSteps !== undefined) {
994
+ evidence.min_steps = expectations.evidence.minSteps;
995
+ }
996
+ if (expectations.evidence.minAttachments !== undefined) {
997
+ evidence.min_attachments = expectations.evidence.minAttachments;
998
+ }
999
+ if (expectations.evidence.stepNameContains.length) {
1000
+ evidence.step_name_contains = expectations.evidence.stepNameContains;
1001
+ }
1002
+ if (expectations.evidence.attachments.length) {
1003
+ evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
1004
+ ...(attachment.name ? { name: attachment.name } : {}),
1005
+ ...(attachment.contentType ? { content_type: attachment.contentType } : {}),
1006
+ }));
1007
+ }
1008
+ return {
1009
+ ...(expectations.goal ? { goal: expectations.goal } : {}),
1010
+ ...(expectations.taskId ? { task_id: expectations.taskId } : {}),
1011
+ ...(Object.keys(expected).length ? { expected } : {}),
1012
+ ...(Object.keys(forbidden).length ? { forbidden } : {}),
1013
+ ...(Object.keys(evidence).length ? { evidence } : {}),
1014
+ ...(expectations.notes.length ? { notes: expectations.notes } : {}),
1015
+ };
1016
+ };
1017
+ const defaultImpactForFinding = (finding) => {
1018
+ if (finding.impact) {
1019
+ return finding.impact;
1020
+ }
1021
+ if ([
1022
+ "expected-test-missing",
1023
+ "expected-prefix-missing",
1024
+ "expected-label-missing",
1025
+ "expected-environment-missing",
1026
+ "forbidden-label-observed",
1027
+ "no-tests-observed",
1028
+ ].includes(finding.checkName)) {
1029
+ return "reject";
1030
+ }
1031
+ if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
1032
+ return "reject";
1033
+ }
1034
+ if ([
1035
+ "expectations-invalid",
1036
+ "expectations-empty",
1037
+ "expectations-unsupported-control",
1038
+ "expected-count-mismatch",
1039
+ "expected-step-containing-missing",
1040
+ "insufficient-expected-steps",
1041
+ "insufficient-expected-attachments",
1042
+ "missing-expected-attachment",
1043
+ "runner-failures-outside-logical-results",
1044
+ "metadata-mismatch",
1045
+ "history-id-collision",
1046
+ "failed-without-useful-steps",
1047
+ "failed-without-attachments",
1048
+ "nontrivial-run-with-empty-trace",
1049
+ "retries-without-new-evidence",
1050
+ "passed-without-observable-evidence",
1051
+ ].includes(finding.checkName)) {
1052
+ return "iterate";
1053
+ }
1054
+ if (finding.severity === "high") {
1055
+ return "iterate";
1056
+ }
1057
+ return "advisory";
1058
+ };
1059
+ const strongestImpact = (findings, fallback) => {
1060
+ if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
1061
+ return "reject";
1062
+ }
1063
+ if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
1064
+ return "iterate";
1065
+ }
1066
+ return fallback;
1067
+ };
1068
+ const buildExpectationResult = (params) => {
1069
+ const { expectations, findings, observedTestCount, modelingSummary } = params;
1070
+ const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
1071
+ const recognized = recognizedControlCount(expectations);
1072
+ const runtimeMatching = runtimeMatchingControlCount(expectations);
1073
+ const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
1074
+ const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
1075
+ const unsupportedFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-unsupported-control");
1076
+ const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
1077
+ const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
1078
+ let status;
1079
+ let impact;
1080
+ if (invalidFindings.length) {
1081
+ status = "unavailable";
1082
+ impact =
1083
+ strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
1084
+ }
1085
+ else if (emptyFindings.length || unsupportedFindings.length) {
1086
+ status = "unsupported";
1087
+ impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
1088
+ }
1089
+ else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
1090
+ status = "failed";
1091
+ impact = "reject";
1092
+ }
1093
+ else if (runtimeMatching === 0) {
1094
+ status = "not_requested";
1095
+ impact = "advisory";
1096
+ }
1097
+ else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
1098
+ status = "failed";
1099
+ impact = "reject";
1100
+ }
1101
+ else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
1102
+ status = "failed";
1103
+ impact = "iterate";
1104
+ }
1105
+ else if (modelingSummary.completeness === "partial") {
1106
+ status = "partial";
1107
+ impact = "iterate";
1108
+ }
1109
+ else {
1110
+ status = "matched";
1111
+ impact = "accept";
1112
+ }
1113
+ return {
1114
+ schema_version: "allure-agent-expectation-result/v1",
1115
+ status,
1116
+ impact,
1117
+ source: expectations
1118
+ ? {
1119
+ kind: expectations.sourceKind,
1120
+ path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
1121
+ }
1122
+ : {
1123
+ kind: "none",
1124
+ path: null,
1125
+ },
1126
+ recognized_control_count: recognized,
1127
+ unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
1128
+ degraded_controls: [],
1129
+ summary: {
1130
+ expected_tests: expectedTests,
1131
+ observed_tests: observedTestCount,
1132
+ missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
1133
+ .length,
1134
+ forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
1135
+ .length,
1136
+ unexpected_observed: 0,
1137
+ evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
1138
+ .length,
1139
+ },
1140
+ finding_ids: expectationFindings.map((finding) => finding.findingId),
1141
+ };
1142
+ };
797
1143
  const sortFindings = (findings) => [...findings].sort((left, right) => {
798
1144
  const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
799
1145
  if (bySeverity !== 0) {
@@ -818,6 +1164,28 @@ const renderFindingEvidenceLinks = (params) => {
818
1164
  })
819
1165
  .join("\n");
820
1166
  };
1167
+ const formatFindingStructuredValue = (value) => {
1168
+ if (value === undefined || value === null) {
1169
+ return undefined;
1170
+ }
1171
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
1172
+ return String(value);
1173
+ }
1174
+ if (Array.isArray(value)) {
1175
+ return value
1176
+ .map((item) => formatFindingStructuredValue(item))
1177
+ .filter(Boolean)
1178
+ .join(", ");
1179
+ }
1180
+ if (typeof value === "object") {
1181
+ const parts = Object.entries(value).flatMap(([key, item]) => {
1182
+ const formatted = formatFindingStructuredValue(item);
1183
+ return formatted ? [`${key}: ${formatted}`] : [];
1184
+ });
1185
+ return parts.length ? parts.join("; ") : undefined;
1186
+ }
1187
+ return undefined;
1188
+ };
821
1189
  const renderFindingsSection = (params) => {
822
1190
  const { title, findings, currentFilePath, outputDir } = params;
823
1191
  if (!findings.length) {
@@ -825,25 +1193,26 @@ const renderFindingsSection = (params) => {
825
1193
  }
826
1194
  const lines = [`## ${title}`, ""];
827
1195
  for (const finding of sortFindings(findings)) {
828
- lines.push(`### [${finding.severity.toUpperCase()}] ${escapeInlineMarkdown(finding.category)} / ${escapeInlineMarkdown(finding.checkName)}`);
829
- lines.push("");
830
- lines.push(`- Message: ${escapeInlineMarkdown(finding.message)}`);
831
- lines.push(`- Explanation: ${escapeInlineMarkdown(finding.explanation)}`);
832
- lines.push(`- Remediation: ${escapeInlineMarkdown(finding.remediationHint)}`);
833
- if (finding.expectedReference) {
834
- lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
835
- }
836
- if (finding.confidence !== undefined) {
837
- lines.push(`- Confidence: ${finding.confidence}`);
838
- }
839
- lines.push("- Evidence:");
840
- lines.push("");
841
- lines.push(renderFindingEvidenceLinks({
1196
+ const impact = defaultImpactForFinding(finding);
1197
+ const expected = formatFindingStructuredValue(finding.expected) ??
1198
+ (finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
1199
+ const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
1200
+ const evidenceLinks = renderFindingEvidenceLinks({
842
1201
  finding,
843
1202
  currentFilePath,
844
1203
  outputDir,
845
- }));
846
- lines.push("");
1204
+ });
1205
+ lines.push(`- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`);
1206
+ if (expected) {
1207
+ lines.push(` Expected: ${escapeInlineMarkdown(expected)}`);
1208
+ }
1209
+ if (observed) {
1210
+ lines.push(` Observed: ${escapeInlineMarkdown(observed)}`);
1211
+ }
1212
+ lines.push(` Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
1213
+ if (evidenceLinks !== "None") {
1214
+ lines.push(` Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
1215
+ }
847
1216
  }
848
1217
  return lines.join("\n").trimEnd();
849
1218
  };
@@ -858,6 +1227,25 @@ const renderExpectationSection = (entry) => {
858
1227
  ];
859
1228
  return lines.join("\n");
860
1229
  };
1230
+ const renderExpectationResultSection = (params) => {
1231
+ const result = buildExpectationResult(params);
1232
+ const summary = result.summary;
1233
+ return [
1234
+ "## Expectation Result",
1235
+ "",
1236
+ `- Status: ${result.status}`,
1237
+ `- Impact: ${result.impact}`,
1238
+ `- Recognized Controls: ${result.recognized_control_count}`,
1239
+ `- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
1240
+ `- Expected Tests: ${summary.expected_tests}`,
1241
+ `- Observed Tests: ${summary.observed_tests}`,
1242
+ `- Missing Expected: ${summary.missing_expected}`,
1243
+ `- Forbidden Observed: ${summary.forbidden_observed}`,
1244
+ `- Evidence Mismatches: ${summary.evidence_mismatches}`,
1245
+ `- Run Manifest: [manifest/run.json](manifest/run.json)`,
1246
+ `- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
1247
+ ].join("\n");
1248
+ };
861
1249
  const renderRerunGuidance = (findings) => {
862
1250
  const relevant = findings.filter(({ category }) => category === "evidence" || category === "smells" || category === "metadata");
863
1251
  if (!relevant.length) {
@@ -875,7 +1263,7 @@ const renderRerunGuidance = (findings) => {
875
1263
  if (relevant.some(({ checkName }) => checkName === "noop-dominated-steps")) {
876
1264
  lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
877
1265
  }
878
- lines.push("- Rerun only the relevant tests with the same expectations file so the next review is scoped and comparable.");
1266
+ lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
879
1267
  return lines.join("\n");
880
1268
  };
881
1269
  const renderTestFile = (params) => {
@@ -948,7 +1336,7 @@ const renderTestFile = (params) => {
948
1336
  return `${lines.join("\n").trimEnd()}\n`;
949
1337
  };
950
1338
  const renderIndex = (params) => {
951
- const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, } = params;
1339
+ const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, humanReport, } = params;
952
1340
  const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
953
1341
  const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
954
1342
  const remainingGlobalArtifacts = globalArtifacts.filter((artifact) => artifact.displayName !== "stdout.txt" && artifact.displayName !== "stderr.txt");
@@ -1009,20 +1397,35 @@ const renderIndex = (params) => {
1009
1397
  : "None");
1010
1398
  lines.push("");
1011
1399
  lines.push(renderModelingSummary(modelingSummary));
1400
+ const humanReportSection = renderHumanReportSection(humanReport);
1401
+ if (humanReportSection) {
1402
+ lines.push("");
1403
+ lines.push(humanReportSection);
1404
+ }
1012
1405
  if (expectations) {
1013
1406
  lines.push("");
1014
1407
  lines.push("## Expected Scope");
1015
1408
  lines.push("");
1016
1409
  lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
1017
1410
  lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
1018
- lines.push(`- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
1411
+ lines.push(expectations.sourceKind === "inline"
1412
+ ? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
1413
+ : `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
1019
1414
  lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
1020
1415
  lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
1416
+ lines.push(renderEvidenceExpectationSummary(expectations.evidence));
1021
1417
  if (expectations.notes.length) {
1022
1418
  lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
1023
1419
  }
1024
1420
  }
1025
1421
  lines.push("");
1422
+ lines.push(renderExpectationResultSection({
1423
+ expectations,
1424
+ findings,
1425
+ observedTestCount: tests.length,
1426
+ modelingSummary,
1427
+ }));
1428
+ lines.push("");
1026
1429
  lines.push("## Advisory Check Summary");
1027
1430
  lines.push("");
1028
1431
  lines.push(`- modeling completeness: ${modelingSummary.completeness}`);
@@ -1225,10 +1628,7 @@ const readMaterializedArtifactText = async (outputDir, artifact) => {
1225
1628
  return undefined;
1226
1629
  }
1227
1630
  };
1228
- const resolveOutputDir = (options) => {
1229
- const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
1230
- return outputDir ? resolve(outputDir) : undefined;
1231
- };
1631
+ const resolveOutputDir = (options) => (options.outputDir ? resolve(options.outputDir) : undefined);
1232
1632
  const cleanupManagedEntries = async (outputDir) => {
1233
1633
  await Promise.all(MANAGED_ENTRIES.map(async (entry) => {
1234
1634
  await rm(join(outputDir, entry), {
@@ -1267,15 +1667,97 @@ const createFindingFactory = () => {
1267
1667
  };
1268
1668
  };
1269
1669
  };
1270
- const parseExpectations = (rawContent) => {
1271
- const parsed = parse(rawContent);
1670
+ const assertExpectationsObject = (parsed) => {
1272
1671
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
1273
- throw new Error("Expected a YAML or JSON object");
1672
+ throw new Error("Expected an expectations object");
1274
1673
  }
1275
- return parsed;
1276
1674
  };
1277
- const loadExpectations = async (outputDir, createFinding) => {
1278
- const configuredPath = env[AGENT_EXPECTATIONS_ENV];
1675
+ const writeExpectedManifest = async (outputDir, parsed) => {
1676
+ const relativePath = normalizeMarkdownPath("manifest/expected.json");
1677
+ await mkdir(join(outputDir, "manifest"), { recursive: true });
1678
+ await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
1679
+ return relativePath;
1680
+ };
1681
+ const toLoadedExpectations = (params) => {
1682
+ const { parsed, relativePath, sourceKind, sourcePath } = params;
1683
+ return {
1684
+ sourcePath,
1685
+ sourceKind,
1686
+ relativePath,
1687
+ raw: parsed,
1688
+ goal: parsed.goal,
1689
+ taskId: parsed.task_id,
1690
+ notes: normalizeNotes(parsed.notes),
1691
+ expected: normalizeSelectors(parsed.expected),
1692
+ forbidden: normalizeSelectors(parsed.forbidden),
1693
+ evidence: normalizeEvidenceExpectations(parsed.evidence),
1694
+ };
1695
+ };
1696
+ const loadExpectations = async (outputDir, createFinding, options) => {
1697
+ const configuredPath = options.expectationsPath;
1698
+ const inlineExpectations = options.expectations;
1699
+ if (!configuredPath && !inlineExpectations) {
1700
+ return {
1701
+ expectations: undefined,
1702
+ findings: [],
1703
+ };
1704
+ }
1705
+ if (configuredPath && inlineExpectations) {
1706
+ return {
1707
+ expectations: undefined,
1708
+ findings: [
1709
+ createFinding({
1710
+ subject: "run",
1711
+ subjectType: "run",
1712
+ severity: "high",
1713
+ category: "bootstrap",
1714
+ impact: "reject",
1715
+ checkName: "expectations-invalid",
1716
+ title: "Expectation input is invalid",
1717
+ message: "Both file and inline agent expectations were provided.",
1718
+ explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
1719
+ evidencePaths: [],
1720
+ remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
1721
+ expectedReference: undefined,
1722
+ }),
1723
+ ],
1724
+ };
1725
+ }
1726
+ if (inlineExpectations) {
1727
+ try {
1728
+ assertExpectationsObject(inlineExpectations);
1729
+ const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
1730
+ return {
1731
+ expectations: toLoadedExpectations({
1732
+ parsed: inlineExpectations,
1733
+ relativePath,
1734
+ sourceKind: "inline",
1735
+ }),
1736
+ findings: [],
1737
+ };
1738
+ }
1739
+ catch (error) {
1740
+ return {
1741
+ expectations: undefined,
1742
+ findings: [
1743
+ createFinding({
1744
+ subject: "run",
1745
+ subjectType: "run",
1746
+ severity: "high",
1747
+ category: "bootstrap",
1748
+ impact: "reject",
1749
+ checkName: "expectations-invalid",
1750
+ title: "Expectation input is invalid",
1751
+ message: "Could not load inline agent expectations",
1752
+ explanation: `The inline expectations option could not be normalized: ${error.message}`,
1753
+ evidencePaths: [],
1754
+ remediationHint: "Provide a valid expectations object before rerunning.",
1755
+ expectedReference: undefined,
1756
+ }),
1757
+ ],
1758
+ };
1759
+ }
1760
+ }
1279
1761
  if (!configuredPath) {
1280
1762
  return {
1281
1763
  expectations: undefined,
@@ -1285,21 +1767,15 @@ const loadExpectations = async (outputDir, createFinding) => {
1285
1767
  const expectationsPath = resolve(configuredPath);
1286
1768
  try {
1287
1769
  const rawContent = await readFile(expectationsPath, "utf-8");
1288
- const parsed = parseExpectations(rawContent);
1289
- const relativePath = normalizeMarkdownPath("manifest/expected.json");
1290
- await mkdir(join(outputDir, "manifest"), { recursive: true });
1291
- await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
1770
+ const parsed = parseAgentExpectations(rawContent);
1771
+ const relativePath = await writeExpectedManifest(outputDir, parsed);
1292
1772
  return {
1293
- expectations: {
1294
- sourcePath: expectationsPath,
1773
+ expectations: toLoadedExpectations({
1774
+ parsed,
1295
1775
  relativePath,
1296
- raw: parsed,
1297
- goal: parsed.goal,
1298
- taskId: parsed.task_id,
1299
- notes: normalizeNotes(parsed.notes),
1300
- expected: normalizeSelectors(parsed.expected),
1301
- forbidden: normalizeSelectors(parsed.forbidden),
1302
- },
1776
+ sourceKind: "file",
1777
+ sourcePath: expectationsPath,
1778
+ }),
1303
1779
  findings: [],
1304
1780
  };
1305
1781
  }
@@ -1312,37 +1788,19 @@ const loadExpectations = async (outputDir, createFinding) => {
1312
1788
  subjectType: "run",
1313
1789
  severity: "high",
1314
1790
  category: "bootstrap",
1315
- checkName: "invalid-expectations-file",
1316
- message: `Could not load ALLURE_AGENT_EXPECTATIONS from ${expectationsPath}`,
1791
+ impact: "reject",
1792
+ checkName: "expectations-invalid",
1793
+ title: "Expectation input is invalid",
1794
+ message: `Could not load expectations from ${expectationsPath}`,
1317
1795
  explanation: `The expectations file could not be parsed as YAML or JSON: ${error.message}`,
1318
1796
  evidencePaths: [],
1319
- remediationHint: "Provide a readable YAML or JSON file in ALLURE_AGENT_EXPECTATIONS before rerunning.",
1797
+ remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
1320
1798
  expectedReference: undefined,
1321
1799
  }),
1322
1800
  ],
1323
1801
  };
1324
1802
  }
1325
1803
  };
1326
- const loadProjectGuide = async (outputDir) => {
1327
- const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
1328
- const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
1329
- try {
1330
- const content = await readFile(sourcePath, "utf-8");
1331
- const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
1332
- await mkdir(join(outputDir, "project", "docs"), { recursive: true });
1333
- await writeFile(join(outputDir, relativePath), content, "utf-8");
1334
- return {
1335
- sourcePath,
1336
- relativePath,
1337
- };
1338
- }
1339
- catch (error) {
1340
- if (error.code === "ENOENT") {
1341
- return undefined;
1342
- }
1343
- throw error;
1344
- }
1345
- };
1346
1804
  const computeScopeEvaluation = (params) => {
1347
1805
  const { tr, environmentId, expectations } = params;
1348
1806
  if (!expectations) {
@@ -1418,22 +1876,54 @@ const collectTestEvidencePaths = (entry) => {
1418
1876
  }
1419
1877
  return uniqueValues(paths);
1420
1878
  };
1879
+ const getExpectationTargetEntries = (entries, expectations) => {
1880
+ if (!hasSelector(expectations.expected)) {
1881
+ return entries;
1882
+ }
1883
+ return entries.filter((entry) => entry.scope.scopeMatch === "match");
1884
+ };
1885
+ const currentAttemptStepSummary = (entry) => mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
1886
+ const nonMissingArtifacts = (entry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
1887
+ const formatAttachmentExpectation = (expectation) => [
1888
+ expectation.name ? `name=${expectation.name}` : undefined,
1889
+ expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
1890
+ ]
1891
+ .filter(Boolean)
1892
+ .join(", ");
1893
+ const matchesAttachmentExpectation = (artifact, expectation) => {
1894
+ if (artifact.missing) {
1895
+ return false;
1896
+ }
1897
+ if (expectation.name && artifact.displayName !== expectation.name) {
1898
+ return false;
1899
+ }
1900
+ if (expectation.contentType && artifact.contentType !== expectation.contentType) {
1901
+ return false;
1902
+ }
1903
+ return true;
1904
+ };
1421
1905
  const buildRunAndTestFindings = (params) => {
1422
1906
  const { entries, expectations, globalArtifacts, modelingSummary, createFinding } = params;
1423
1907
  const runFindings = [];
1424
1908
  const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
1425
1909
  const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
1426
- if (entries.length === 0) {
1910
+ if (entries.length === 0 && expectations?.expected.testCount !== 0) {
1427
1911
  runFindings.push(createFinding({
1428
1912
  subject: "run",
1429
1913
  subjectType: "run",
1430
1914
  severity: "high",
1915
+ impact: "reject",
1431
1916
  category: "bootstrap",
1432
- checkName: "no-visible-tests",
1917
+ checkName: "no-tests-observed",
1918
+ title: "No logical tests were observed",
1433
1919
  message: "No visible test results were found in the run.",
1434
1920
  explanation: "The agent output was generated, but there were no visible logical test results to review.",
1435
- evidencePaths: [],
1436
- remediationHint: "Verify that Allure results are being generated and that the test command actually executed the intended tests.",
1921
+ evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
1922
+ remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
1923
+ expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
1924
+ observed: { test_count: 0 },
1925
+ action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
1926
+ confidence: 1,
1437
1927
  }));
1438
1928
  }
1439
1929
  if (!stdoutArtifact && !stderrArtifact) {
@@ -1446,7 +1936,7 @@ const buildRunAndTestFindings = (params) => {
1446
1936
  message: "The run does not include global stdout or stderr logs.",
1447
1937
  explanation: "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
1448
1938
  evidencePaths: [],
1449
- remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics, or use `ALLURE_AGENT_*` with `allure run` for lower-level control.",
1939
+ remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
1450
1940
  confidence: 0.9,
1451
1941
  }));
1452
1942
  }
@@ -1486,19 +1976,93 @@ const buildRunAndTestFindings = (params) => {
1486
1976
  const actualEnvironments = uniqueValues(entries.map(({ environmentId }) => environmentId));
1487
1977
  if (expectations) {
1488
1978
  const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
1979
+ const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
1980
+ const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
1981
+ if (recognizedControlCount(expectations) === 0) {
1982
+ runFindings.push(createFinding({
1983
+ subject: "run",
1984
+ subjectType: "run",
1985
+ severity: "high",
1986
+ impact: "iterate",
1987
+ category: "scope",
1988
+ checkName: "expectations-empty",
1989
+ title: "Expectation source did not contain recognized controls",
1990
+ message: "Expectation source was provided but no recognized M1 controls were parsed.",
1991
+ explanation: "The run can still be reviewed, but expectation precision was not requested.",
1992
+ evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1993
+ remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
1994
+ observed: { recognized_control_count: 0 },
1995
+ action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
1996
+ confidence: 1,
1997
+ }));
1998
+ }
1999
+ if ((hasRuntimeControls && !expectations.goal) ||
2000
+ ["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)) {
2001
+ runFindings.push(createFinding({
2002
+ subject: "run",
2003
+ subjectType: "run",
2004
+ severity: "info",
2005
+ impact: "advisory",
2006
+ category: "scope",
2007
+ checkName: "expectations-weak-goal",
2008
+ title: "Run goal is missing or too generic",
2009
+ message: expectations.goal
2010
+ ? `The run goal is too generic: ${expectations.goal}`
2011
+ : "Runtime expectations were provided without a goal.",
2012
+ explanation: "The goal is intent metadata and does not change the runtime evidence.",
2013
+ evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
2014
+ remediationHint: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
2015
+ expected: { goal: "specific validation claim" },
2016
+ observed: { goal: expectations.goal ?? null },
2017
+ action: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
2018
+ confidence: 0.9,
2019
+ }));
2020
+ }
2021
+ if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
2022
+ const severity = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
2023
+ const impact = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
2024
+ runFindings.push(createFinding({
2025
+ subject: "run",
2026
+ subjectType: "run",
2027
+ severity,
2028
+ impact,
2029
+ category: "scope",
2030
+ checkName: "expected-count-mismatch",
2031
+ title: "Observed logical test count did not match",
2032
+ message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
2033
+ explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
2034
+ evidencePaths: expectations.relativePath
2035
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2036
+ : ["manifest/tests.jsonl"],
2037
+ remediationHint: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
2038
+ expectedReference: "expected.test_count",
2039
+ expected: { test_count: expectations.expected.testCount },
2040
+ observed: { test_count: entries.length },
2041
+ action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
2042
+ confidence: 1,
2043
+ }));
2044
+ }
1489
2045
  expectations.expected.fullNames.forEach((fullName, index) => {
1490
2046
  if (!allFullNames.includes(fullName)) {
1491
2047
  runFindings.push(createFinding({
1492
2048
  subject: "run",
1493
2049
  subjectType: "run",
1494
2050
  severity: "high",
2051
+ impact: "reject",
1495
2052
  category: "scope",
1496
- checkName: "missing-expected-test",
1497
- message: `Expected test did not run: ${fullName}`,
1498
- explanation: "The expectations file explicitly listed this test, but it did not appear in the agentic output.",
1499
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1500
- remediationHint: "Check the test selection, environment, and feature branch scope before rerunning.",
2053
+ checkName: "expected-test-missing",
2054
+ title: "Expected test was not observed",
2055
+ message: "The expected test did not appear in the observed logical results.",
2056
+ explanation: `Expected test did not run: ${fullName}`,
2057
+ evidencePaths: expectations.relativePath
2058
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2059
+ : ["manifest/tests.jsonl"],
2060
+ remediationHint: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
1501
2061
  expectedReference: `expected.full_names[${index}]`,
2062
+ expected: { full_names: [fullName] },
2063
+ observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
2064
+ action: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
2065
+ confidence: 1,
1502
2066
  }));
1503
2067
  }
1504
2068
  });
@@ -1507,14 +2071,22 @@ const buildRunAndTestFindings = (params) => {
1507
2071
  runFindings.push(createFinding({
1508
2072
  subject: "run",
1509
2073
  subjectType: "run",
1510
- severity: "warning",
2074
+ severity: "high",
2075
+ impact: "reject",
1511
2076
  category: "scope",
1512
- checkName: "missing-expected-prefix",
1513
- message: `No executed test matched the expected prefix: ${prefix}`,
1514
- explanation: "The expectations file asked for tests within this name prefix, but none were recorded.",
1515
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1516
- remediationHint: "Check the expected selector or adjust the executed test target so the intended scope is covered.",
2077
+ checkName: "expected-prefix-missing",
2078
+ title: "Expected test prefix was not observed",
2079
+ message: `No observed test full name started with the expected prefix: ${prefix}`,
2080
+ explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
2081
+ evidencePaths: expectations.relativePath
2082
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2083
+ : ["manifest/tests.jsonl"],
2084
+ remediationHint: "Treat the run as wrong scope or missing coverage.",
1517
2085
  expectedReference: `expected.full_name_prefixes[${index}]`,
2086
+ expected: { full_name_prefixes: [prefix] },
2087
+ observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
2088
+ action: "Treat the run as wrong scope or missing coverage.",
2089
+ confidence: 1,
1518
2090
  }));
1519
2091
  }
1520
2092
  });
@@ -1523,14 +2095,22 @@ const buildRunAndTestFindings = (params) => {
1523
2095
  runFindings.push(createFinding({
1524
2096
  subject: "run",
1525
2097
  subjectType: "run",
1526
- severity: "warning",
2098
+ severity: "high",
2099
+ impact: "reject",
1527
2100
  category: "scope",
1528
- checkName: "missing-expected-environment",
2101
+ checkName: "expected-environment-missing",
2102
+ title: "Expected environment was not observed",
1529
2103
  message: `Expected environment did not appear in the run: ${environment}`,
1530
- explanation: "The expectations file scoped the run to this environment, but no logical test result matched it.",
1531
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1532
- remediationHint: "Check the environment selector or rerun the intended environment explicitly.",
2104
+ explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
2105
+ evidencePaths: expectations.relativePath
2106
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2107
+ : ["manifest/tests.jsonl"],
2108
+ remediationHint: "Rerun in the intended environment before making environment-specific claims.",
1533
2109
  expectedReference: `expected.environments[${index}]`,
2110
+ expected: { environments: [environment] },
2111
+ observed: { environments: actualEnvironments },
2112
+ action: "Rerun in the intended environment before making environment-specific claims.",
2113
+ confidence: 1,
1534
2114
  }));
1535
2115
  }
1536
2116
  });
@@ -1540,14 +2120,22 @@ const buildRunAndTestFindings = (params) => {
1540
2120
  runFindings.push(createFinding({
1541
2121
  subject: "run",
1542
2122
  subjectType: "run",
1543
- severity: "warning",
2123
+ severity: "high",
2124
+ impact: "reject",
1544
2125
  category: "scope",
1545
- checkName: "missing-expected-label-selector",
1546
- message: `No executed test matched ${formatLabelRequirement(labelName, values)}`,
1547
- explanation: "The expectations file defined a label selector for the intended scope, but no logical test result satisfied it.",
1548
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1549
- remediationHint: "Add the expected label metadata to the intended tests or adjust the expectations selector.",
2126
+ checkName: "expected-label-missing",
2127
+ title: "Expected label was not observed",
2128
+ message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
2129
+ explanation: "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
2130
+ evidencePaths: expectations.relativePath
2131
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2132
+ : ["manifest/tests.jsonl"],
2133
+ remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
1550
2134
  expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
2135
+ expected: { label_values: { [labelName]: values } },
2136
+ observed: { test_count: entries.length },
2137
+ action: "Fix metadata, selector, or run the correct labeled scope.",
2138
+ confidence: 1,
1551
2139
  }));
1552
2140
  }
1553
2141
  });
@@ -1570,28 +2158,53 @@ const buildRunAndTestFindings = (params) => {
1570
2158
  });
1571
2159
  }
1572
2160
  }
2161
+ const evidenceTargetKeys = expectations
2162
+ ? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
2163
+ : new Set();
1573
2164
  for (const entry of entries) {
1574
2165
  const currentAttempt = entry.attempts[0];
1575
2166
  const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
1576
2167
  const testEvidencePaths = collectTestEvidencePaths(entry);
1577
2168
  const allStepSummary = mergeStepSummaries(entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])));
2169
+ const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
2170
+ const expectedEvidence = expectations?.evidence;
2171
+ const currentStepSummary = currentAttemptStepSummary(entry);
2172
+ const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
2173
+ const currentAttachments = nonMissingArtifacts(entry);
1578
2174
  const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
1579
2175
  const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
1580
2176
  const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
1581
2177
  if (entry.scope.scopeMatch === "forbidden") {
2178
+ const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => reference.startsWith("forbidden.label_values"));
2179
+ const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
1582
2180
  entry.findings.push(createFinding({
1583
2181
  subject: entry.key,
1584
2182
  subjectType: "test",
1585
2183
  severity: "high",
2184
+ impact: "reject",
1586
2185
  category: "scope",
1587
- checkName: "forbidden-selector-match",
1588
- message: "This test matched a forbidden selector from the expectations file.",
1589
- explanation: "The logical test belongs to a scope that the expectations file explicitly marked as forbidden.",
2186
+ checkName,
2187
+ title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
2188
+ message: forbiddenLabelReference
2189
+ ? "This test has a label value that was explicitly forbidden."
2190
+ : "This test matched a forbidden selector from the expectations.",
2191
+ explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
1590
2192
  evidencePaths: expectations?.relativePath
1591
2193
  ? [entry.relativePath, expectations.relativePath]
1592
2194
  : [entry.relativePath],
1593
- remediationHint: "Tighten the test selection or update the expectations file before accepting the run.",
1594
- expectedReference: entry.scope.expectedReferences[0],
2195
+ remediationHint: forbiddenLabelReference
2196
+ ? "Treat as scope drift. Split or correct the run before using it as focused validation."
2197
+ : "Tighten the test selection or update the expectations before accepting the run.",
2198
+ expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
2199
+ expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
2200
+ observed: {
2201
+ full_name: entry.tr.fullName ?? entry.tr.name,
2202
+ labels: toLabelEntries(entry.tr.labels),
2203
+ },
2204
+ action: forbiddenLabelReference
2205
+ ? "Treat as scope drift. Split or correct the run before using it as focused validation."
2206
+ : "Tighten the test selection or update the expectations before accepting the run.",
2207
+ confidence: 1,
1595
2208
  }));
1596
2209
  }
1597
2210
  else if (entry.scope.scopeMatch === "unexpected") {
@@ -1602,11 +2215,11 @@ const buildRunAndTestFindings = (params) => {
1602
2215
  category: "scope",
1603
2216
  checkName: "unexpected-test",
1604
2217
  message: "This test ran outside the expected scope.",
1605
- explanation: "The expectations file defined positive scope selectors, but this logical test did not match any of them.",
2218
+ explanation: "The expectations defined positive scope selectors, but this logical test did not match any of them.",
1606
2219
  evidencePaths: expectations?.relativePath
1607
2220
  ? [entry.relativePath, expectations.relativePath]
1608
2221
  : [entry.relativePath],
1609
- remediationHint: "Rerun only the intended tests or broaden the expectations file if this test is part of the plan.",
2222
+ remediationHint: "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
1610
2223
  }));
1611
2224
  }
1612
2225
  if (entry.scope.metadataMismatches.length > 0) {
@@ -1640,6 +2253,117 @@ const buildRunAndTestFindings = (params) => {
1640
2253
  confidence: 0.85,
1641
2254
  }));
1642
2255
  }
2256
+ expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
2257
+ if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
2258
+ return;
2259
+ }
2260
+ entry.findings.push(createFinding({
2261
+ subject: entry.key,
2262
+ subjectType: "test",
2263
+ severity: "warning",
2264
+ impact: "iterate",
2265
+ category: "evidence",
2266
+ checkName: "expected-step-containing-missing",
2267
+ title: "Expected step text was not observed",
2268
+ message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
2269
+ explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
2270
+ evidencePaths: expectations?.relativePath
2271
+ ? [entry.relativePath, expectations.relativePath]
2272
+ : [entry.relativePath],
2273
+ remediationHint: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
2274
+ expectedReference: `evidence.step_name_contains[${index}]`,
2275
+ expected: { step_name_contains: [expectedText] },
2276
+ observed: { steps: currentStepSummary.totalSteps, matched: false },
2277
+ action: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
2278
+ confidence: 0.9,
2279
+ }));
2280
+ });
2281
+ if (expectedEvidenceApplies &&
2282
+ expectedEvidence?.minSteps !== undefined &&
2283
+ currentMeaningfulSteps < expectedEvidence.minSteps) {
2284
+ entry.findings.push(createFinding({
2285
+ subject: entry.key,
2286
+ subjectType: "test",
2287
+ severity: "warning",
2288
+ impact: "iterate",
2289
+ category: "evidence",
2290
+ checkName: "insufficient-expected-steps",
2291
+ title: "Expected step count was not met",
2292
+ message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
2293
+ explanation: "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
2294
+ evidencePaths: expectations?.relativePath
2295
+ ? [entry.relativePath, expectations.relativePath]
2296
+ : [entry.relativePath],
2297
+ remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
2298
+ expectedReference: "evidence.min_steps",
2299
+ expected: { min_steps: expectedEvidence.minSteps },
2300
+ observed: { meaningful_steps: currentMeaningfulSteps },
2301
+ action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
2302
+ confidence: 0.9,
2303
+ }));
2304
+ }
2305
+ if (expectedEvidenceApplies &&
2306
+ expectedEvidence?.minAttachments !== undefined &&
2307
+ currentAttachments.length < expectedEvidence.minAttachments) {
2308
+ entry.findings.push(createFinding({
2309
+ subject: entry.key,
2310
+ subjectType: "test",
2311
+ severity: "warning",
2312
+ impact: "iterate",
2313
+ category: "evidence",
2314
+ checkName: "insufficient-expected-attachments",
2315
+ title: "Expected attachment count was not met",
2316
+ message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
2317
+ explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
2318
+ evidencePaths: expectations?.relativePath
2319
+ ? [entry.relativePath, expectations.relativePath]
2320
+ : [entry.relativePath],
2321
+ remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
2322
+ expectedReference: "evidence.min_attachments",
2323
+ expected: { min_attachments: expectedEvidence.minAttachments },
2324
+ observed: { attachments: currentAttachments.length },
2325
+ action: "Attach real runtime artifacts only when they are needed for debugging or review.",
2326
+ confidence: 0.9,
2327
+ }));
2328
+ }
2329
+ expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
2330
+ if (!expectedEvidenceApplies) {
2331
+ return;
2332
+ }
2333
+ if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
2334
+ return;
2335
+ }
2336
+ entry.findings.push(createFinding({
2337
+ subject: entry.key,
2338
+ subjectType: "test",
2339
+ severity: "warning",
2340
+ impact: "iterate",
2341
+ category: "evidence",
2342
+ checkName: "missing-expected-attachment",
2343
+ title: "Expected attachment was not observed",
2344
+ message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
2345
+ explanation: "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
2346
+ evidencePaths: expectations?.relativePath
2347
+ ? [entry.relativePath, expectations.relativePath]
2348
+ : [entry.relativePath],
2349
+ remediationHint: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
2350
+ expectedReference: `evidence.attachments[${index}]`,
2351
+ expected: {
2352
+ attachment: {
2353
+ ...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
2354
+ ...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
2355
+ },
2356
+ },
2357
+ observed: {
2358
+ attachments: currentAttachments.map((attachment) => ({
2359
+ name: attachment.displayName,
2360
+ content_type: attachment.contentType ?? null,
2361
+ })),
2362
+ },
2363
+ action: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
2364
+ confidence: 0.95,
2365
+ }));
2366
+ });
1643
2367
  if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
1644
2368
  entry.findings.push(createFinding({
1645
2369
  subject: entry.key,
@@ -1928,11 +2652,17 @@ const appendJsonlLine = async (path, item) => {
1928
2652
  await appendFile(path, `${JSON.stringify(item)}\n`, "utf-8");
1929
2653
  };
1930
2654
  const toRunManifest = (params) => {
1931
- const { context, command, generatedAt, phase, expectations, projectGuide, snapshot } = params;
2655
+ const { context, command, agentContext, generatedAt, phase, expectations, snapshot, humanReport } = params;
1932
2656
  const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
1933
2657
  const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
1934
2658
  const originalExitCode = snapshot.globalExitCode?.original ?? null;
1935
2659
  const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
2660
+ const expectationResult = buildExpectationResult({
2661
+ expectations,
2662
+ findings: snapshot.combinedAllFindings,
2663
+ observedTestCount: snapshot.entries.length,
2664
+ modelingSummary: snapshot.modelingSummary,
2665
+ });
1936
2666
  return {
1937
2667
  schema_version: AGENT_SCHEMA_VERSION,
1938
2668
  report_uuid: context.reportUuid,
@@ -1966,27 +2696,31 @@ const toRunManifest = (params) => {
1966
2696
  findings_manifest: "manifest/findings.jsonl",
1967
2697
  test_events_manifest: "manifest/test-events.jsonl",
1968
2698
  expected_manifest: expectations?.relativePath ?? null,
1969
- project_guide: projectGuide?.relativePath ?? null,
2699
+ human_report_manifest: humanReport ? "manifest/human-report.json" : null,
1970
2700
  process_logs: {
1971
2701
  stdout: stdoutArtifact?.relativePath ?? null,
1972
2702
  stderr: stderrArtifact?.relativePath ?? null,
1973
2703
  },
1974
2704
  },
2705
+ human_report: humanReport ?? null,
1975
2706
  expectations_present: Boolean(expectations),
2707
+ expectations: expectations ? toExpectationModel(expectations) : null,
2708
+ expectation_result: expectationResult,
1976
2709
  check_summary: buildCheckSummary(snapshot.combinedAllFindings),
1977
2710
  agent_context: {
1978
- agent_name: env[AGENT_NAME_ENV] ?? null,
1979
- loop_id: env[AGENT_LOOP_ID_ENV] ?? null,
1980
- task_id: env[AGENT_TASK_ID_ENV] ?? expectations?.taskId ?? null,
1981
- conversation_id: env[AGENT_CONVERSATION_ID_ENV] ?? null,
2711
+ agent_name: agentContext.agentName ?? null,
2712
+ loop_id: agentContext.loopId ?? null,
2713
+ task_id: agentContext.taskId ?? expectations?.taskId ?? null,
2714
+ conversation_id: agentContext.conversationId ?? null,
1982
2715
  },
1983
2716
  };
1984
2717
  };
1985
2718
  const writeSnapshotFiles = async (params) => {
1986
2719
  const { runtime, snapshot, phase } = params;
1987
- const { outputDir, context, command, generatedAt, expectations, projectGuide } = runtime;
2720
+ const { outputDir, context, command, generatedAt, expectations } = runtime;
1988
2721
  const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
1989
2722
  const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
2723
+ const humanReport = await resolveHumanReportStatus(runtime.humanReport);
1990
2724
  for (const stalePath of runtime.currentTestPaths) {
1991
2725
  if (!nextTestPaths.has(stalePath)) {
1992
2726
  await rm(stalePath, { force: true });
@@ -2010,12 +2744,16 @@ const writeSnapshotFiles = async (params) => {
2010
2744
  writeJson(join(outputDir, "manifest", "run.json"), toRunManifest({
2011
2745
  context,
2012
2746
  command,
2747
+ agentContext: runtime.agentContext,
2013
2748
  generatedAt,
2014
2749
  phase,
2015
2750
  expectations,
2016
- projectGuide,
2017
2751
  snapshot,
2752
+ humanReport,
2018
2753
  })),
2754
+ ...(humanReport
2755
+ ? [writeJson(join(outputDir, "manifest", "human-report.json"), humanReport)]
2756
+ : [rm(join(outputDir, "manifest", "human-report.json"), { force: true })]),
2019
2757
  writeJsonlSnapshot(join(outputDir, "manifest", "tests.jsonl"), snapshot.entries.map(toTestsManifestLine)),
2020
2758
  writeJsonlSnapshot(join(outputDir, "manifest", "findings.jsonl"), snapshot.combinedAllFindings.map(toFindingManifestLine)),
2021
2759
  writeTextAtomic(join(outputDir, "index.md"), renderIndex({
@@ -2034,8 +2772,9 @@ const writeSnapshotFiles = async (params) => {
2034
2772
  globalExitCode: snapshot.globalExitCode,
2035
2773
  qualityGateResults: snapshot.qualityGateResults,
2036
2774
  findings: snapshot.combinedAllFindings,
2775
+ humanReport,
2037
2776
  })),
2038
- writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(projectGuide?.relativePath)),
2777
+ writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
2039
2778
  ]);
2040
2779
  };
2041
2780
  const createBootstrapSnapshot = () => ({
@@ -2079,7 +2818,7 @@ const createBootstrapSnapshot = () => ({
2079
2818
  combinedAllFindings: [],
2080
2819
  });
2081
2820
  const writeBootstrapFiles = async (runtime) => {
2082
- await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(runtime.projectGuide?.relativePath));
2821
+ await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
2083
2822
  await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
2084
2823
  await writeSnapshotFiles({
2085
2824
  runtime,
@@ -2104,19 +2843,54 @@ const toTestsManifestLine = (entry) => ({
2104
2843
  markdown_path: entry.relativePath,
2105
2844
  assets_dir: entry.relativeAssetDir,
2106
2845
  });
2107
- const toFindingManifestLine = (finding) => ({
2108
- finding_id: finding.findingId,
2109
- subject: finding.subject,
2110
- severity: finding.severity,
2111
- category: finding.category,
2112
- check_name: finding.checkName,
2113
- message: finding.message,
2114
- explanation: finding.explanation,
2115
- evidence_paths: finding.evidencePaths,
2116
- remediation_hint: finding.remediationHint,
2117
- expected_reference: finding.expectedReference,
2118
- confidence: finding.confidence,
2846
+ const toFindingSubject = (finding) => ({
2847
+ type: finding.subjectType,
2848
+ ...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
2119
2849
  });
2850
+ const toFindingManifestLine = (finding) => {
2851
+ const impact = defaultImpactForFinding(finding);
2852
+ const confidence = finding.confidence ?? 1;
2853
+ return {
2854
+ schema_version: "allure-agent-finding/v2",
2855
+ check_id: finding.checkName,
2856
+ instance_id: finding.findingId,
2857
+ severity: finding.severity,
2858
+ impact,
2859
+ confidence,
2860
+ category: finding.category,
2861
+ title: finding.title ?? finding.message,
2862
+ message: finding.message,
2863
+ subject: toFindingSubject(finding),
2864
+ expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
2865
+ observed: finding.observed ?? { detail: finding.explanation },
2866
+ evidence: {
2867
+ paths: finding.evidencePaths,
2868
+ },
2869
+ action: finding.action ?? finding.remediationHint,
2870
+ ...(finding.source ? { source: finding.source } : {}),
2871
+ ...(finding.limits ? { limits: finding.limits } : {}),
2872
+ ...(finding.affected ? { affected: finding.affected } : {}),
2873
+ ...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
2874
+ legacy: {
2875
+ finding_id: finding.findingId,
2876
+ subject: finding.subject,
2877
+ subject_type: finding.subjectType,
2878
+ check_name: finding.checkName,
2879
+ explanation: finding.explanation,
2880
+ evidence_paths: finding.evidencePaths,
2881
+ remediation_hint: finding.remediationHint,
2882
+ expected_reference: finding.expectedReference,
2883
+ },
2884
+ finding_id: finding.findingId,
2885
+ subject_ref: finding.subject,
2886
+ subject_type: finding.subjectType,
2887
+ check_name: finding.checkName,
2888
+ explanation: finding.explanation,
2889
+ evidence_paths: finding.evidencePaths,
2890
+ remediation_hint: finding.remediationHint,
2891
+ expected_reference: finding.expectedReference,
2892
+ };
2893
+ };
2120
2894
  const queueRuntimeTask = (runtime, task) => {
2121
2895
  runtime.queue = runtime.queue
2122
2896
  .catch(() => undefined)
@@ -2238,18 +3012,23 @@ const createRuntimeState = async (params) => {
2238
3012
  await cleanupManagedEntries(outputDir);
2239
3013
  const generatedAt = new Date().toISOString();
2240
3014
  const createFinding = createFindingFactory();
2241
- const expectationLoadResult = await loadExpectations(outputDir, createFinding);
2242
- const projectGuide = await loadProjectGuide(outputDir);
3015
+ const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
2243
3016
  const runtime = {
2244
3017
  outputDir,
2245
3018
  context,
2246
3019
  store,
2247
3020
  generatedAt,
2248
- command: env[AGENT_COMMAND_ENV],
3021
+ command: options.command,
3022
+ agentContext: {
3023
+ agentName: options.agentName,
3024
+ loopId: options.loopId,
3025
+ taskId: options.taskId,
3026
+ conversationId: options.conversationId,
3027
+ },
3028
+ humanReport: options.humanReport,
2249
3029
  createFinding,
2250
3030
  expectations: expectationLoadResult.expectations,
2251
3031
  expectationLoadFindings: expectationLoadResult.findings,
2252
- projectGuide,
2253
3032
  unsubscribers: [],
2254
3033
  queue: Promise.resolve(),
2255
3034
  seenLogicalKeys: new Set(),