@allurereport/plugin-agent 3.9.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.js CHANGED
@@ -12,20 +12,12 @@ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (
12
12
  var _AgentPlugin_runtime;
13
13
  import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
14
14
  import { basename, dirname, extname, join, relative, resolve } from "node:path";
15
- import process, { env } from "node:process";
15
+ import process from "node:process";
16
16
  import { formatDuration, isAttachment, isStep, } from "@allurereport/core-api";
17
- import { parse } from "yaml";
18
17
  import { renderAgentsGuide } from "./guidance.js";
19
- const AGENT_OUTPUT_ENV = "ALLURE_AGENT_OUTPUT";
20
- const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
21
- const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
22
- const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
23
- const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
24
- const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
25
- const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
26
- const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
18
+ import { parseAgentExpectations } from "./model.js";
27
19
  const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
28
- const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest", "project"];
20
+ const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"];
29
21
  const STATUS_ORDER = {
30
22
  failed: 0,
31
23
  broken: 1,
@@ -126,16 +118,34 @@ const normalizeLabelValues = (value) => {
126
118
  return values.length ? [[name, values]] : [];
127
119
  }));
128
120
  };
121
+ const normalizeNonNegativeInteger = (value) => typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
122
+ const normalizePositiveInteger = (value) => typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
129
123
  const normalizeSelectors = (input) => ({
130
124
  environments: normalizeStringArray(input?.environments),
131
125
  fullNames: normalizeStringArray(input?.full_names),
132
126
  fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
133
127
  labelValues: normalizeLabelValues(input?.label_values),
128
+ testCount: normalizeNonNegativeInteger(input?.test_count),
134
129
  });
135
130
  const hasSelector = (selectors) => selectors.environments.length > 0 ||
136
131
  selectors.fullNames.length > 0 ||
137
132
  selectors.fullNamePrefixes.length > 0 ||
138
133
  Object.keys(selectors.labelValues).length > 0;
134
+ const normalizeEvidenceExpectations = (input) => ({
135
+ minSteps: normalizePositiveInteger(input?.min_steps),
136
+ minAttachments: normalizePositiveInteger(input?.min_attachments),
137
+ stepNameContains: normalizeStringArray(input?.step_name_contains),
138
+ attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
139
+ if (!attachment || typeof attachment !== "object") {
140
+ return [];
141
+ }
142
+ const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
143
+ const contentType = typeof attachment.content_type === "string" && attachment.content_type.length > 0
144
+ ? attachment.content_type
145
+ : undefined;
146
+ return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
147
+ }),
148
+ });
139
149
  const normalizeNotes = (value) => {
140
150
  if (typeof value === "string") {
141
151
  return value.length > 0 ? [value] : [];
@@ -273,6 +283,27 @@ const mergeStepSummaries = (items) => items.reduce((acc, item) => ({
273
283
  attachmentRefs: 0,
274
284
  assertionLikeSteps: 0,
275
285
  });
286
+ const collectStepNames = (steps, path = []) => {
287
+ const names = [];
288
+ for (const node of steps) {
289
+ if (!isStep(node)) {
290
+ continue;
291
+ }
292
+ const nextPath = [...path, node.name];
293
+ names.push({ name: node.name, path: nextPath });
294
+ if (node.steps.length) {
295
+ names.push(...collectStepNames(node.steps, nextPath));
296
+ }
297
+ }
298
+ return names;
299
+ };
300
+ const testStepContainsText = (entry, expectedText) => {
301
+ const expected = normalizeStepText(expectedText);
302
+ if (!expected) {
303
+ return false;
304
+ }
305
+ return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
306
+ };
276
307
  const buildAttemptSignature = (attempt) => JSON.stringify({
277
308
  status: attempt.tr.status,
278
309
  errorMessage: attempt.tr.error?.message,
@@ -357,6 +388,7 @@ const subtractStatusCounts = (left, right) => ({
357
388
  });
358
389
  const summarizeStatusCounts = (counts) => `${counts.total} total (${counts.failed} failed, ${counts.broken} broken, ${counts.unknown} unknown, ${counts.skipped} skipped, ${counts.passed} passed)`;
359
390
  const normalizeLogLine = (value) => value.replace(/\s+/g, " ").trim();
391
+ const normalizeStepText = (value) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
360
392
  const normalizeWarningLine = (value) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
361
393
  const buildCountedValues = (values) => {
362
394
  const counts = new Map();
@@ -752,10 +784,13 @@ const renderModelingSummary = (modeling) => {
752
784
  return lines.join("\n");
753
785
  };
754
786
  const renderSelectorSummary = (title, selectors) => {
755
- if (!hasSelector(selectors)) {
787
+ if (!hasSelector(selectors) && selectors.testCount === undefined) {
756
788
  return `- ${title}: None`;
757
789
  }
758
790
  const parts = [];
791
+ if (selectors.testCount !== undefined) {
792
+ parts.push(`test count: ${selectors.testCount}`);
793
+ }
759
794
  if (selectors.environments.length) {
760
795
  parts.push(`environments: ${selectors.environments.join(", ")}`);
761
796
  }
@@ -771,6 +806,29 @@ const renderSelectorSummary = (title, selectors) => {
771
806
  }
772
807
  return `- ${title}: ${parts.join(" | ")}`;
773
808
  };
809
+ const renderEvidenceExpectationSummary = (evidence) => {
810
+ const parts = [];
811
+ if (evidence.minSteps !== undefined) {
812
+ parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
813
+ }
814
+ if (evidence.minAttachments !== undefined) {
815
+ parts.push(`attachments per test: >= ${evidence.minAttachments}`);
816
+ }
817
+ if (evidence.stepNameContains.length) {
818
+ parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
819
+ }
820
+ if (evidence.attachments.length) {
821
+ parts.push(`attachments: ${evidence.attachments
822
+ .map((attachment) => [
823
+ attachment.name ? `name=${attachment.name}` : undefined,
824
+ attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
825
+ ]
826
+ .filter(Boolean)
827
+ .join(", "))
828
+ .join("; ")}`);
829
+ }
830
+ return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
831
+ };
774
832
  const buildCheckSummary = (findings) => {
775
833
  const countsBySeverity = {
776
834
  high: 0,
@@ -794,6 +852,240 @@ const buildCheckSummary = (findings) => {
794
852
  countsByCategory,
795
853
  };
796
854
  };
855
+ const EXPECTATION_CHECK_IDS = new Set([
856
+ "expectations-invalid",
857
+ "expectations-empty",
858
+ "expectations-unsupported-control",
859
+ "expectations-weak-goal",
860
+ "expected-test-missing",
861
+ "expected-prefix-missing",
862
+ "expected-label-missing",
863
+ "expected-environment-missing",
864
+ "expected-count-mismatch",
865
+ "expected-step-containing-missing",
866
+ "insufficient-expected-steps",
867
+ "insufficient-expected-attachments",
868
+ "missing-expected-attachment",
869
+ "forbidden-label-observed",
870
+ "no-tests-observed",
871
+ ]);
872
+ const MISSING_EXPECTED_CHECK_IDS = new Set([
873
+ "expected-test-missing",
874
+ "expected-prefix-missing",
875
+ "expected-label-missing",
876
+ "expected-environment-missing",
877
+ ]);
878
+ const EVIDENCE_MISMATCH_CHECK_IDS = new Set([
879
+ "expected-step-containing-missing",
880
+ "insufficient-expected-steps",
881
+ "insufficient-expected-attachments",
882
+ "missing-expected-attachment",
883
+ ]);
884
+ const countLabelValues = (labelValues) => Object.values(labelValues).reduce((total, values) => total + values.length, 0);
885
+ const recognizedControlCount = (expectations) => {
886
+ if (!expectations) {
887
+ return 0;
888
+ }
889
+ return ((expectations.goal ? 1 : 0) +
890
+ (expectations.taskId ? 1 : 0) +
891
+ (expectations.expected.testCount !== undefined ? 1 : 0) +
892
+ expectations.expected.environments.length +
893
+ expectations.expected.fullNames.length +
894
+ expectations.expected.fullNamePrefixes.length +
895
+ countLabelValues(expectations.expected.labelValues) +
896
+ countLabelValues(expectations.forbidden.labelValues) +
897
+ (expectations.evidence.minSteps !== undefined ? 1 : 0) +
898
+ (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
899
+ expectations.evidence.stepNameContains.length +
900
+ expectations.evidence.attachments.length);
901
+ };
902
+ const runtimeMatchingControlCount = (expectations) => {
903
+ if (!expectations) {
904
+ return 0;
905
+ }
906
+ return ((expectations.expected.testCount !== undefined ? 1 : 0) +
907
+ expectations.expected.environments.length +
908
+ expectations.expected.fullNames.length +
909
+ expectations.expected.fullNamePrefixes.length +
910
+ countLabelValues(expectations.expected.labelValues) +
911
+ countLabelValues(expectations.forbidden.labelValues) +
912
+ (expectations.evidence.minSteps !== undefined ? 1 : 0) +
913
+ (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
914
+ expectations.evidence.stepNameContains.length +
915
+ expectations.evidence.attachments.length);
916
+ };
917
+ const toExpectationModel = (expectations) => {
918
+ const expected = {};
919
+ const forbidden = {};
920
+ const evidence = {};
921
+ if (expectations.expected.testCount !== undefined) {
922
+ expected.test_count = expectations.expected.testCount;
923
+ }
924
+ if (expectations.expected.environments.length) {
925
+ expected.environments = expectations.expected.environments;
926
+ }
927
+ if (expectations.expected.fullNames.length) {
928
+ expected.full_names = expectations.expected.fullNames;
929
+ }
930
+ if (expectations.expected.fullNamePrefixes.length) {
931
+ expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
932
+ }
933
+ if (Object.keys(expectations.expected.labelValues).length) {
934
+ expected.label_values = expectations.expected.labelValues;
935
+ }
936
+ if (Object.keys(expectations.forbidden.labelValues).length) {
937
+ forbidden.label_values = expectations.forbidden.labelValues;
938
+ }
939
+ if (expectations.evidence.minSteps !== undefined) {
940
+ evidence.min_steps = expectations.evidence.minSteps;
941
+ }
942
+ if (expectations.evidence.minAttachments !== undefined) {
943
+ evidence.min_attachments = expectations.evidence.minAttachments;
944
+ }
945
+ if (expectations.evidence.stepNameContains.length) {
946
+ evidence.step_name_contains = expectations.evidence.stepNameContains;
947
+ }
948
+ if (expectations.evidence.attachments.length) {
949
+ evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
950
+ ...(attachment.name ? { name: attachment.name } : {}),
951
+ ...(attachment.contentType ? { content_type: attachment.contentType } : {}),
952
+ }));
953
+ }
954
+ return {
955
+ ...(expectations.goal ? { goal: expectations.goal } : {}),
956
+ ...(expectations.taskId ? { task_id: expectations.taskId } : {}),
957
+ ...(Object.keys(expected).length ? { expected } : {}),
958
+ ...(Object.keys(forbidden).length ? { forbidden } : {}),
959
+ ...(Object.keys(evidence).length ? { evidence } : {}),
960
+ ...(expectations.notes.length ? { notes: expectations.notes } : {}),
961
+ };
962
+ };
963
+ const defaultImpactForFinding = (finding) => {
964
+ if (finding.impact) {
965
+ return finding.impact;
966
+ }
967
+ if ([
968
+ "expected-test-missing",
969
+ "expected-prefix-missing",
970
+ "expected-label-missing",
971
+ "expected-environment-missing",
972
+ "forbidden-label-observed",
973
+ "no-tests-observed",
974
+ ].includes(finding.checkName)) {
975
+ return "reject";
976
+ }
977
+ if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
978
+ return "reject";
979
+ }
980
+ if ([
981
+ "expectations-invalid",
982
+ "expectations-empty",
983
+ "expectations-unsupported-control",
984
+ "expected-count-mismatch",
985
+ "expected-step-containing-missing",
986
+ "insufficient-expected-steps",
987
+ "insufficient-expected-attachments",
988
+ "missing-expected-attachment",
989
+ "runner-failures-outside-logical-results",
990
+ "metadata-mismatch",
991
+ "history-id-collision",
992
+ "failed-without-useful-steps",
993
+ "failed-without-attachments",
994
+ "nontrivial-run-with-empty-trace",
995
+ "retries-without-new-evidence",
996
+ "passed-without-observable-evidence",
997
+ ].includes(finding.checkName)) {
998
+ return "iterate";
999
+ }
1000
+ if (finding.severity === "high") {
1001
+ return "iterate";
1002
+ }
1003
+ return "advisory";
1004
+ };
1005
+ const strongestImpact = (findings, fallback) => {
1006
+ if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
1007
+ return "reject";
1008
+ }
1009
+ if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
1010
+ return "iterate";
1011
+ }
1012
+ return fallback;
1013
+ };
1014
+ const buildExpectationResult = (params) => {
1015
+ const { expectations, findings, observedTestCount, modelingSummary } = params;
1016
+ const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
1017
+ const recognized = recognizedControlCount(expectations);
1018
+ const runtimeMatching = runtimeMatchingControlCount(expectations);
1019
+ const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
1020
+ const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
1021
+ const unsupportedFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-unsupported-control");
1022
+ const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
1023
+ const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
1024
+ let status;
1025
+ let impact;
1026
+ if (invalidFindings.length) {
1027
+ status = "unavailable";
1028
+ impact =
1029
+ strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
1030
+ }
1031
+ else if (emptyFindings.length || unsupportedFindings.length) {
1032
+ status = "unsupported";
1033
+ impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
1034
+ }
1035
+ else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
1036
+ status = "failed";
1037
+ impact = "reject";
1038
+ }
1039
+ else if (runtimeMatching === 0) {
1040
+ status = "not_requested";
1041
+ impact = "advisory";
1042
+ }
1043
+ else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
1044
+ status = "failed";
1045
+ impact = "reject";
1046
+ }
1047
+ else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
1048
+ status = "failed";
1049
+ impact = "iterate";
1050
+ }
1051
+ else if (modelingSummary.completeness === "partial") {
1052
+ status = "partial";
1053
+ impact = "iterate";
1054
+ }
1055
+ else {
1056
+ status = "matched";
1057
+ impact = "accept";
1058
+ }
1059
+ return {
1060
+ schema_version: "allure-agent-expectation-result/v1",
1061
+ status,
1062
+ impact,
1063
+ source: expectations
1064
+ ? {
1065
+ kind: expectations.sourceKind,
1066
+ path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
1067
+ }
1068
+ : {
1069
+ kind: "none",
1070
+ path: null,
1071
+ },
1072
+ recognized_control_count: recognized,
1073
+ unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
1074
+ degraded_controls: [],
1075
+ summary: {
1076
+ expected_tests: expectedTests,
1077
+ observed_tests: observedTestCount,
1078
+ missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
1079
+ .length,
1080
+ forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
1081
+ .length,
1082
+ unexpected_observed: 0,
1083
+ evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
1084
+ .length,
1085
+ },
1086
+ finding_ids: expectationFindings.map((finding) => finding.findingId),
1087
+ };
1088
+ };
797
1089
  const sortFindings = (findings) => [...findings].sort((left, right) => {
798
1090
  const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
799
1091
  if (bySeverity !== 0) {
@@ -818,6 +1110,28 @@ const renderFindingEvidenceLinks = (params) => {
818
1110
  })
819
1111
  .join("\n");
820
1112
  };
1113
+ const formatFindingStructuredValue = (value) => {
1114
+ if (value === undefined || value === null) {
1115
+ return undefined;
1116
+ }
1117
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
1118
+ return String(value);
1119
+ }
1120
+ if (Array.isArray(value)) {
1121
+ return value
1122
+ .map((item) => formatFindingStructuredValue(item))
1123
+ .filter(Boolean)
1124
+ .join(", ");
1125
+ }
1126
+ if (typeof value === "object") {
1127
+ const parts = Object.entries(value).flatMap(([key, item]) => {
1128
+ const formatted = formatFindingStructuredValue(item);
1129
+ return formatted ? [`${key}: ${formatted}`] : [];
1130
+ });
1131
+ return parts.length ? parts.join("; ") : undefined;
1132
+ }
1133
+ return undefined;
1134
+ };
821
1135
  const renderFindingsSection = (params) => {
822
1136
  const { title, findings, currentFilePath, outputDir } = params;
823
1137
  if (!findings.length) {
@@ -825,25 +1139,26 @@ const renderFindingsSection = (params) => {
825
1139
  }
826
1140
  const lines = [`## ${title}`, ""];
827
1141
  for (const finding of sortFindings(findings)) {
828
- lines.push(`### [${finding.severity.toUpperCase()}] ${escapeInlineMarkdown(finding.category)} / ${escapeInlineMarkdown(finding.checkName)}`);
829
- lines.push("");
830
- lines.push(`- Message: ${escapeInlineMarkdown(finding.message)}`);
831
- lines.push(`- Explanation: ${escapeInlineMarkdown(finding.explanation)}`);
832
- lines.push(`- Remediation: ${escapeInlineMarkdown(finding.remediationHint)}`);
833
- if (finding.expectedReference) {
834
- lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
835
- }
836
- if (finding.confidence !== undefined) {
837
- lines.push(`- Confidence: ${finding.confidence}`);
838
- }
839
- lines.push("- Evidence:");
840
- lines.push("");
841
- lines.push(renderFindingEvidenceLinks({
1142
+ const impact = defaultImpactForFinding(finding);
1143
+ const expected = formatFindingStructuredValue(finding.expected) ??
1144
+ (finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
1145
+ const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
1146
+ const evidenceLinks = renderFindingEvidenceLinks({
842
1147
  finding,
843
1148
  currentFilePath,
844
1149
  outputDir,
845
- }));
846
- lines.push("");
1150
+ });
1151
+ lines.push(`- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`);
1152
+ if (expected) {
1153
+ lines.push(` Expected: ${escapeInlineMarkdown(expected)}`);
1154
+ }
1155
+ if (observed) {
1156
+ lines.push(` Observed: ${escapeInlineMarkdown(observed)}`);
1157
+ }
1158
+ lines.push(` Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
1159
+ if (evidenceLinks !== "None") {
1160
+ lines.push(` Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
1161
+ }
847
1162
  }
848
1163
  return lines.join("\n").trimEnd();
849
1164
  };
@@ -858,6 +1173,25 @@ const renderExpectationSection = (entry) => {
858
1173
  ];
859
1174
  return lines.join("\n");
860
1175
  };
1176
+ const renderExpectationResultSection = (params) => {
1177
+ const result = buildExpectationResult(params);
1178
+ const summary = result.summary;
1179
+ return [
1180
+ "## Expectation Result",
1181
+ "",
1182
+ `- Status: ${result.status}`,
1183
+ `- Impact: ${result.impact}`,
1184
+ `- Recognized Controls: ${result.recognized_control_count}`,
1185
+ `- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
1186
+ `- Expected Tests: ${summary.expected_tests}`,
1187
+ `- Observed Tests: ${summary.observed_tests}`,
1188
+ `- Missing Expected: ${summary.missing_expected}`,
1189
+ `- Forbidden Observed: ${summary.forbidden_observed}`,
1190
+ `- Evidence Mismatches: ${summary.evidence_mismatches}`,
1191
+ `- Run Manifest: [manifest/run.json](manifest/run.json)`,
1192
+ `- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
1193
+ ].join("\n");
1194
+ };
861
1195
  const renderRerunGuidance = (findings) => {
862
1196
  const relevant = findings.filter(({ category }) => category === "evidence" || category === "smells" || category === "metadata");
863
1197
  if (!relevant.length) {
@@ -875,7 +1209,7 @@ const renderRerunGuidance = (findings) => {
875
1209
  if (relevant.some(({ checkName }) => checkName === "noop-dominated-steps")) {
876
1210
  lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
877
1211
  }
878
- lines.push("- Rerun only the relevant tests with the same expectations file so the next review is scoped and comparable.");
1212
+ lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
879
1213
  return lines.join("\n");
880
1214
  };
881
1215
  const renderTestFile = (params) => {
@@ -1015,14 +1349,24 @@ const renderIndex = (params) => {
1015
1349
  lines.push("");
1016
1350
  lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
1017
1351
  lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
1018
- lines.push(`- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
1352
+ lines.push(expectations.sourceKind === "inline"
1353
+ ? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
1354
+ : `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
1019
1355
  lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
1020
1356
  lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
1357
+ lines.push(renderEvidenceExpectationSummary(expectations.evidence));
1021
1358
  if (expectations.notes.length) {
1022
1359
  lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
1023
1360
  }
1024
1361
  }
1025
1362
  lines.push("");
1363
+ lines.push(renderExpectationResultSection({
1364
+ expectations,
1365
+ findings,
1366
+ observedTestCount: tests.length,
1367
+ modelingSummary,
1368
+ }));
1369
+ lines.push("");
1026
1370
  lines.push("## Advisory Check Summary");
1027
1371
  lines.push("");
1028
1372
  lines.push(`- modeling completeness: ${modelingSummary.completeness}`);
@@ -1225,10 +1569,7 @@ const readMaterializedArtifactText = async (outputDir, artifact) => {
1225
1569
  return undefined;
1226
1570
  }
1227
1571
  };
1228
- const resolveOutputDir = (options) => {
1229
- const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
1230
- return outputDir ? resolve(outputDir) : undefined;
1231
- };
1572
+ const resolveOutputDir = (options) => (options.outputDir ? resolve(options.outputDir) : undefined);
1232
1573
  const cleanupManagedEntries = async (outputDir) => {
1233
1574
  await Promise.all(MANAGED_ENTRIES.map(async (entry) => {
1234
1575
  await rm(join(outputDir, entry), {
@@ -1267,15 +1608,97 @@ const createFindingFactory = () => {
1267
1608
  };
1268
1609
  };
1269
1610
  };
1270
- const parseExpectations = (rawContent) => {
1271
- const parsed = parse(rawContent);
1611
+ const assertExpectationsObject = (parsed) => {
1272
1612
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
1273
- throw new Error("Expected a YAML or JSON object");
1613
+ throw new Error("Expected an expectations object");
1274
1614
  }
1275
- return parsed;
1276
1615
  };
1277
- const loadExpectations = async (outputDir, createFinding) => {
1278
- const configuredPath = env[AGENT_EXPECTATIONS_ENV];
1616
+ const writeExpectedManifest = async (outputDir, parsed) => {
1617
+ const relativePath = normalizeMarkdownPath("manifest/expected.json");
1618
+ await mkdir(join(outputDir, "manifest"), { recursive: true });
1619
+ await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
1620
+ return relativePath;
1621
+ };
1622
+ const toLoadedExpectations = (params) => {
1623
+ const { parsed, relativePath, sourceKind, sourcePath } = params;
1624
+ return {
1625
+ sourcePath,
1626
+ sourceKind,
1627
+ relativePath,
1628
+ raw: parsed,
1629
+ goal: parsed.goal,
1630
+ taskId: parsed.task_id,
1631
+ notes: normalizeNotes(parsed.notes),
1632
+ expected: normalizeSelectors(parsed.expected),
1633
+ forbidden: normalizeSelectors(parsed.forbidden),
1634
+ evidence: normalizeEvidenceExpectations(parsed.evidence),
1635
+ };
1636
+ };
1637
+ const loadExpectations = async (outputDir, createFinding, options) => {
1638
+ const configuredPath = options.expectationsPath;
1639
+ const inlineExpectations = options.expectations;
1640
+ if (!configuredPath && !inlineExpectations) {
1641
+ return {
1642
+ expectations: undefined,
1643
+ findings: [],
1644
+ };
1645
+ }
1646
+ if (configuredPath && inlineExpectations) {
1647
+ return {
1648
+ expectations: undefined,
1649
+ findings: [
1650
+ createFinding({
1651
+ subject: "run",
1652
+ subjectType: "run",
1653
+ severity: "high",
1654
+ category: "bootstrap",
1655
+ impact: "reject",
1656
+ checkName: "expectations-invalid",
1657
+ title: "Expectation input is invalid",
1658
+ message: "Both file and inline agent expectations were provided.",
1659
+ explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
1660
+ evidencePaths: [],
1661
+ remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
1662
+ expectedReference: undefined,
1663
+ }),
1664
+ ],
1665
+ };
1666
+ }
1667
+ if (inlineExpectations) {
1668
+ try {
1669
+ assertExpectationsObject(inlineExpectations);
1670
+ const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
1671
+ return {
1672
+ expectations: toLoadedExpectations({
1673
+ parsed: inlineExpectations,
1674
+ relativePath,
1675
+ sourceKind: "inline",
1676
+ }),
1677
+ findings: [],
1678
+ };
1679
+ }
1680
+ catch (error) {
1681
+ return {
1682
+ expectations: undefined,
1683
+ findings: [
1684
+ createFinding({
1685
+ subject: "run",
1686
+ subjectType: "run",
1687
+ severity: "high",
1688
+ category: "bootstrap",
1689
+ impact: "reject",
1690
+ checkName: "expectations-invalid",
1691
+ title: "Expectation input is invalid",
1692
+ message: "Could not load inline agent expectations",
1693
+ explanation: `The inline expectations option could not be normalized: ${error.message}`,
1694
+ evidencePaths: [],
1695
+ remediationHint: "Provide a valid expectations object before rerunning.",
1696
+ expectedReference: undefined,
1697
+ }),
1698
+ ],
1699
+ };
1700
+ }
1701
+ }
1279
1702
  if (!configuredPath) {
1280
1703
  return {
1281
1704
  expectations: undefined,
@@ -1285,21 +1708,15 @@ const loadExpectations = async (outputDir, createFinding) => {
1285
1708
  const expectationsPath = resolve(configuredPath);
1286
1709
  try {
1287
1710
  const rawContent = await readFile(expectationsPath, "utf-8");
1288
- const parsed = parseExpectations(rawContent);
1289
- const relativePath = normalizeMarkdownPath("manifest/expected.json");
1290
- await mkdir(join(outputDir, "manifest"), { recursive: true });
1291
- await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
1711
+ const parsed = parseAgentExpectations(rawContent);
1712
+ const relativePath = await writeExpectedManifest(outputDir, parsed);
1292
1713
  return {
1293
- expectations: {
1294
- sourcePath: expectationsPath,
1714
+ expectations: toLoadedExpectations({
1715
+ parsed,
1295
1716
  relativePath,
1296
- raw: parsed,
1297
- goal: parsed.goal,
1298
- taskId: parsed.task_id,
1299
- notes: normalizeNotes(parsed.notes),
1300
- expected: normalizeSelectors(parsed.expected),
1301
- forbidden: normalizeSelectors(parsed.forbidden),
1302
- },
1717
+ sourceKind: "file",
1718
+ sourcePath: expectationsPath,
1719
+ }),
1303
1720
  findings: [],
1304
1721
  };
1305
1722
  }
@@ -1312,37 +1729,19 @@ const loadExpectations = async (outputDir, createFinding) => {
1312
1729
  subjectType: "run",
1313
1730
  severity: "high",
1314
1731
  category: "bootstrap",
1315
- checkName: "invalid-expectations-file",
1316
- message: `Could not load ALLURE_AGENT_EXPECTATIONS from ${expectationsPath}`,
1732
+ impact: "reject",
1733
+ checkName: "expectations-invalid",
1734
+ title: "Expectation input is invalid",
1735
+ message: `Could not load expectations from ${expectationsPath}`,
1317
1736
  explanation: `The expectations file could not be parsed as YAML or JSON: ${error.message}`,
1318
1737
  evidencePaths: [],
1319
- remediationHint: "Provide a readable YAML or JSON file in ALLURE_AGENT_EXPECTATIONS before rerunning.",
1738
+ remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
1320
1739
  expectedReference: undefined,
1321
1740
  }),
1322
1741
  ],
1323
1742
  };
1324
1743
  }
1325
1744
  };
1326
- const loadProjectGuide = async (outputDir) => {
1327
- const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
1328
- const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
1329
- try {
1330
- const content = await readFile(sourcePath, "utf-8");
1331
- const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
1332
- await mkdir(join(outputDir, "project", "docs"), { recursive: true });
1333
- await writeFile(join(outputDir, relativePath), content, "utf-8");
1334
- return {
1335
- sourcePath,
1336
- relativePath,
1337
- };
1338
- }
1339
- catch (error) {
1340
- if (error.code === "ENOENT") {
1341
- return undefined;
1342
- }
1343
- throw error;
1344
- }
1345
- };
1346
1745
  const computeScopeEvaluation = (params) => {
1347
1746
  const { tr, environmentId, expectations } = params;
1348
1747
  if (!expectations) {
@@ -1418,22 +1817,54 @@ const collectTestEvidencePaths = (entry) => {
1418
1817
  }
1419
1818
  return uniqueValues(paths);
1420
1819
  };
1820
+ const getExpectationTargetEntries = (entries, expectations) => {
1821
+ if (!hasSelector(expectations.expected)) {
1822
+ return entries;
1823
+ }
1824
+ return entries.filter((entry) => entry.scope.scopeMatch === "match");
1825
+ };
1826
+ const currentAttemptStepSummary = (entry) => mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
1827
+ const nonMissingArtifacts = (entry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
1828
+ const formatAttachmentExpectation = (expectation) => [
1829
+ expectation.name ? `name=${expectation.name}` : undefined,
1830
+ expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
1831
+ ]
1832
+ .filter(Boolean)
1833
+ .join(", ");
1834
+ const matchesAttachmentExpectation = (artifact, expectation) => {
1835
+ if (artifact.missing) {
1836
+ return false;
1837
+ }
1838
+ if (expectation.name && artifact.displayName !== expectation.name) {
1839
+ return false;
1840
+ }
1841
+ if (expectation.contentType && artifact.contentType !== expectation.contentType) {
1842
+ return false;
1843
+ }
1844
+ return true;
1845
+ };
1421
1846
  const buildRunAndTestFindings = (params) => {
1422
1847
  const { entries, expectations, globalArtifacts, modelingSummary, createFinding } = params;
1423
1848
  const runFindings = [];
1424
1849
  const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
1425
1850
  const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
1426
- if (entries.length === 0) {
1851
+ if (entries.length === 0 && expectations?.expected.testCount !== 0) {
1427
1852
  runFindings.push(createFinding({
1428
1853
  subject: "run",
1429
1854
  subjectType: "run",
1430
1855
  severity: "high",
1856
+ impact: "reject",
1431
1857
  category: "bootstrap",
1432
- checkName: "no-visible-tests",
1858
+ checkName: "no-tests-observed",
1859
+ title: "No logical tests were observed",
1433
1860
  message: "No visible test results were found in the run.",
1434
1861
  explanation: "The agent output was generated, but there were no visible logical test results to review.",
1435
- evidencePaths: [],
1436
- remediationHint: "Verify that Allure results are being generated and that the test command actually executed the intended tests.",
1862
+ evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
1863
+ remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
1864
+ expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
1865
+ observed: { test_count: 0 },
1866
+ action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
1867
+ confidence: 1,
1437
1868
  }));
1438
1869
  }
1439
1870
  if (!stdoutArtifact && !stderrArtifact) {
@@ -1446,7 +1877,7 @@ const buildRunAndTestFindings = (params) => {
1446
1877
  message: "The run does not include global stdout or stderr logs.",
1447
1878
  explanation: "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
1448
1879
  evidencePaths: [],
1449
- remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics, or use `ALLURE_AGENT_*` with `allure run` for lower-level control.",
1880
+ remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
1450
1881
  confidence: 0.9,
1451
1882
  }));
1452
1883
  }
@@ -1486,19 +1917,93 @@ const buildRunAndTestFindings = (params) => {
1486
1917
  const actualEnvironments = uniqueValues(entries.map(({ environmentId }) => environmentId));
1487
1918
  if (expectations) {
1488
1919
  const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
1920
+ const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
1921
+ const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
1922
+ if (recognizedControlCount(expectations) === 0) {
1923
+ runFindings.push(createFinding({
1924
+ subject: "run",
1925
+ subjectType: "run",
1926
+ severity: "high",
1927
+ impact: "iterate",
1928
+ category: "scope",
1929
+ checkName: "expectations-empty",
1930
+ title: "Expectation source did not contain recognized controls",
1931
+ message: "Expectation source was provided but no recognized M1 controls were parsed.",
1932
+ explanation: "The run can still be reviewed, but expectation precision was not requested.",
1933
+ evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1934
+ remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
1935
+ observed: { recognized_control_count: 0 },
1936
+ action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
1937
+ confidence: 1,
1938
+ }));
1939
+ }
1940
+ if ((hasRuntimeControls && !expectations.goal) ||
1941
+ ["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)) {
1942
+ runFindings.push(createFinding({
1943
+ subject: "run",
1944
+ subjectType: "run",
1945
+ severity: "info",
1946
+ impact: "advisory",
1947
+ category: "scope",
1948
+ checkName: "expectations-weak-goal",
1949
+ title: "Run goal is missing or too generic",
1950
+ message: expectations.goal
1951
+ ? `The run goal is too generic: ${expectations.goal}`
1952
+ : "Runtime expectations were provided without a goal.",
1953
+ explanation: "The goal is intent metadata and does not change the runtime evidence.",
1954
+ evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1955
+ remediationHint: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
1956
+ expected: { goal: "specific validation claim" },
1957
+ observed: { goal: expectations.goal ?? null },
1958
+ action: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
1959
+ confidence: 0.9,
1960
+ }));
1961
+ }
1962
+ if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
1963
+ const severity = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
1964
+ const impact = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
1965
+ runFindings.push(createFinding({
1966
+ subject: "run",
1967
+ subjectType: "run",
1968
+ severity,
1969
+ impact,
1970
+ category: "scope",
1971
+ checkName: "expected-count-mismatch",
1972
+ title: "Observed logical test count did not match",
1973
+ message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
1974
+ explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
1975
+ evidencePaths: expectations.relativePath
1976
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
1977
+ : ["manifest/tests.jsonl"],
1978
+ remediationHint: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
1979
+ expectedReference: "expected.test_count",
1980
+ expected: { test_count: expectations.expected.testCount },
1981
+ observed: { test_count: entries.length },
1982
+ action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
1983
+ confidence: 1,
1984
+ }));
1985
+ }
1489
1986
  expectations.expected.fullNames.forEach((fullName, index) => {
1490
1987
  if (!allFullNames.includes(fullName)) {
1491
1988
  runFindings.push(createFinding({
1492
1989
  subject: "run",
1493
1990
  subjectType: "run",
1494
1991
  severity: "high",
1992
+ impact: "reject",
1495
1993
  category: "scope",
1496
- checkName: "missing-expected-test",
1497
- message: `Expected test did not run: ${fullName}`,
1498
- explanation: "The expectations file explicitly listed this test, but it did not appear in the agentic output.",
1499
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1500
- remediationHint: "Check the test selection, environment, and feature branch scope before rerunning.",
1994
+ checkName: "expected-test-missing",
1995
+ title: "Expected test was not observed",
1996
+ message: "The expected test did not appear in the observed logical results.",
1997
+ explanation: `Expected test did not run: ${fullName}`,
1998
+ evidencePaths: expectations.relativePath
1999
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2000
+ : ["manifest/tests.jsonl"],
2001
+ remediationHint: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
1501
2002
  expectedReference: `expected.full_names[${index}]`,
2003
+ expected: { full_names: [fullName] },
2004
+ observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
2005
+ action: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
2006
+ confidence: 1,
1502
2007
  }));
1503
2008
  }
1504
2009
  });
@@ -1507,14 +2012,22 @@ const buildRunAndTestFindings = (params) => {
1507
2012
  runFindings.push(createFinding({
1508
2013
  subject: "run",
1509
2014
  subjectType: "run",
1510
- severity: "warning",
2015
+ severity: "high",
2016
+ impact: "reject",
1511
2017
  category: "scope",
1512
- checkName: "missing-expected-prefix",
1513
- message: `No executed test matched the expected prefix: ${prefix}`,
1514
- explanation: "The expectations file asked for tests within this name prefix, but none were recorded.",
1515
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1516
- remediationHint: "Check the expected selector or adjust the executed test target so the intended scope is covered.",
2018
+ checkName: "expected-prefix-missing",
2019
+ title: "Expected test prefix was not observed",
2020
+ message: `No observed test full name started with the expected prefix: ${prefix}`,
2021
+ explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
2022
+ evidencePaths: expectations.relativePath
2023
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2024
+ : ["manifest/tests.jsonl"],
2025
+ remediationHint: "Treat the run as wrong scope or missing coverage.",
1517
2026
  expectedReference: `expected.full_name_prefixes[${index}]`,
2027
+ expected: { full_name_prefixes: [prefix] },
2028
+ observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
2029
+ action: "Treat the run as wrong scope or missing coverage.",
2030
+ confidence: 1,
1518
2031
  }));
1519
2032
  }
1520
2033
  });
@@ -1523,14 +2036,22 @@ const buildRunAndTestFindings = (params) => {
1523
2036
  runFindings.push(createFinding({
1524
2037
  subject: "run",
1525
2038
  subjectType: "run",
1526
- severity: "warning",
2039
+ severity: "high",
2040
+ impact: "reject",
1527
2041
  category: "scope",
1528
- checkName: "missing-expected-environment",
2042
+ checkName: "expected-environment-missing",
2043
+ title: "Expected environment was not observed",
1529
2044
  message: `Expected environment did not appear in the run: ${environment}`,
1530
- explanation: "The expectations file scoped the run to this environment, but no logical test result matched it.",
1531
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1532
- remediationHint: "Check the environment selector or rerun the intended environment explicitly.",
2045
+ explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
2046
+ evidencePaths: expectations.relativePath
2047
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2048
+ : ["manifest/tests.jsonl"],
2049
+ remediationHint: "Rerun in the intended environment before making environment-specific claims.",
1533
2050
  expectedReference: `expected.environments[${index}]`,
2051
+ expected: { environments: [environment] },
2052
+ observed: { environments: actualEnvironments },
2053
+ action: "Rerun in the intended environment before making environment-specific claims.",
2054
+ confidence: 1,
1534
2055
  }));
1535
2056
  }
1536
2057
  });
@@ -1540,14 +2061,22 @@ const buildRunAndTestFindings = (params) => {
1540
2061
  runFindings.push(createFinding({
1541
2062
  subject: "run",
1542
2063
  subjectType: "run",
1543
- severity: "warning",
2064
+ severity: "high",
2065
+ impact: "reject",
1544
2066
  category: "scope",
1545
- checkName: "missing-expected-label-selector",
1546
- message: `No executed test matched ${formatLabelRequirement(labelName, values)}`,
1547
- explanation: "The expectations file defined a label selector for the intended scope, but no logical test result satisfied it.",
1548
- evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
1549
- remediationHint: "Add the expected label metadata to the intended tests or adjust the expectations selector.",
2067
+ checkName: "expected-label-missing",
2068
+ title: "Expected label was not observed",
2069
+ message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
2070
+ explanation: "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
2071
+ evidencePaths: expectations.relativePath
2072
+ ? [expectations.relativePath, "manifest/tests.jsonl"]
2073
+ : ["manifest/tests.jsonl"],
2074
+ remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
1550
2075
  expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
2076
+ expected: { label_values: { [labelName]: values } },
2077
+ observed: { test_count: entries.length },
2078
+ action: "Fix metadata, selector, or run the correct labeled scope.",
2079
+ confidence: 1,
1551
2080
  }));
1552
2081
  }
1553
2082
  });
@@ -1570,28 +2099,53 @@ const buildRunAndTestFindings = (params) => {
1570
2099
  });
1571
2100
  }
1572
2101
  }
2102
+ const evidenceTargetKeys = expectations
2103
+ ? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
2104
+ : new Set();
1573
2105
  for (const entry of entries) {
1574
2106
  const currentAttempt = entry.attempts[0];
1575
2107
  const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
1576
2108
  const testEvidencePaths = collectTestEvidencePaths(entry);
1577
2109
  const allStepSummary = mergeStepSummaries(entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])));
2110
+ const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
2111
+ const expectedEvidence = expectations?.evidence;
2112
+ const currentStepSummary = currentAttemptStepSummary(entry);
2113
+ const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
2114
+ const currentAttachments = nonMissingArtifacts(entry);
1578
2115
  const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
1579
2116
  const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
1580
2117
  const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
1581
2118
  if (entry.scope.scopeMatch === "forbidden") {
2119
+ const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => reference.startsWith("forbidden.label_values"));
2120
+ const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
1582
2121
  entry.findings.push(createFinding({
1583
2122
  subject: entry.key,
1584
2123
  subjectType: "test",
1585
2124
  severity: "high",
2125
+ impact: "reject",
1586
2126
  category: "scope",
1587
- checkName: "forbidden-selector-match",
1588
- message: "This test matched a forbidden selector from the expectations file.",
1589
- explanation: "The logical test belongs to a scope that the expectations file explicitly marked as forbidden.",
2127
+ checkName,
2128
+ title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
2129
+ message: forbiddenLabelReference
2130
+ ? "This test has a label value that was explicitly forbidden."
2131
+ : "This test matched a forbidden selector from the expectations.",
2132
+ explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
1590
2133
  evidencePaths: expectations?.relativePath
1591
2134
  ? [entry.relativePath, expectations.relativePath]
1592
2135
  : [entry.relativePath],
1593
- remediationHint: "Tighten the test selection or update the expectations file before accepting the run.",
1594
- expectedReference: entry.scope.expectedReferences[0],
2136
+ remediationHint: forbiddenLabelReference
2137
+ ? "Treat as scope drift. Split or correct the run before using it as focused validation."
2138
+ : "Tighten the test selection or update the expectations before accepting the run.",
2139
+ expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
2140
+ expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
2141
+ observed: {
2142
+ full_name: entry.tr.fullName ?? entry.tr.name,
2143
+ labels: toLabelEntries(entry.tr.labels),
2144
+ },
2145
+ action: forbiddenLabelReference
2146
+ ? "Treat as scope drift. Split or correct the run before using it as focused validation."
2147
+ : "Tighten the test selection or update the expectations before accepting the run.",
2148
+ confidence: 1,
1595
2149
  }));
1596
2150
  }
1597
2151
  else if (entry.scope.scopeMatch === "unexpected") {
@@ -1602,11 +2156,11 @@ const buildRunAndTestFindings = (params) => {
1602
2156
  category: "scope",
1603
2157
  checkName: "unexpected-test",
1604
2158
  message: "This test ran outside the expected scope.",
1605
- explanation: "The expectations file defined positive scope selectors, but this logical test did not match any of them.",
2159
+ explanation: "The expectations defined positive scope selectors, but this logical test did not match any of them.",
1606
2160
  evidencePaths: expectations?.relativePath
1607
2161
  ? [entry.relativePath, expectations.relativePath]
1608
2162
  : [entry.relativePath],
1609
- remediationHint: "Rerun only the intended tests or broaden the expectations file if this test is part of the plan.",
2163
+ remediationHint: "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
1610
2164
  }));
1611
2165
  }
1612
2166
  if (entry.scope.metadataMismatches.length > 0) {
@@ -1640,6 +2194,117 @@ const buildRunAndTestFindings = (params) => {
1640
2194
  confidence: 0.85,
1641
2195
  }));
1642
2196
  }
2197
+ expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
2198
+ if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
2199
+ return;
2200
+ }
2201
+ entry.findings.push(createFinding({
2202
+ subject: entry.key,
2203
+ subjectType: "test",
2204
+ severity: "warning",
2205
+ impact: "iterate",
2206
+ category: "evidence",
2207
+ checkName: "expected-step-containing-missing",
2208
+ title: "Expected step text was not observed",
2209
+ message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
2210
+ explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
2211
+ evidencePaths: expectations?.relativePath
2212
+ ? [entry.relativePath, expectations.relativePath]
2213
+ : [entry.relativePath],
2214
+ remediationHint: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
2215
+ expectedReference: `evidence.step_name_contains[${index}]`,
2216
+ expected: { step_name_contains: [expectedText] },
2217
+ observed: { steps: currentStepSummary.totalSteps, matched: false },
2218
+ action: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
2219
+ confidence: 0.9,
2220
+ }));
2221
+ });
2222
+ if (expectedEvidenceApplies &&
2223
+ expectedEvidence?.minSteps !== undefined &&
2224
+ currentMeaningfulSteps < expectedEvidence.minSteps) {
2225
+ entry.findings.push(createFinding({
2226
+ subject: entry.key,
2227
+ subjectType: "test",
2228
+ severity: "warning",
2229
+ impact: "iterate",
2230
+ category: "evidence",
2231
+ checkName: "insufficient-expected-steps",
2232
+ title: "Expected step count was not met",
2233
+ message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
2234
+ explanation: "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
2235
+ evidencePaths: expectations?.relativePath
2236
+ ? [entry.relativePath, expectations.relativePath]
2237
+ : [entry.relativePath],
2238
+ remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
2239
+ expectedReference: "evidence.min_steps",
2240
+ expected: { min_steps: expectedEvidence.minSteps },
2241
+ observed: { meaningful_steps: currentMeaningfulSteps },
2242
+ action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
2243
+ confidence: 0.9,
2244
+ }));
2245
+ }
2246
+ if (expectedEvidenceApplies &&
2247
+ expectedEvidence?.minAttachments !== undefined &&
2248
+ currentAttachments.length < expectedEvidence.minAttachments) {
2249
+ entry.findings.push(createFinding({
2250
+ subject: entry.key,
2251
+ subjectType: "test",
2252
+ severity: "warning",
2253
+ impact: "iterate",
2254
+ category: "evidence",
2255
+ checkName: "insufficient-expected-attachments",
2256
+ title: "Expected attachment count was not met",
2257
+ message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
2258
+ explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
2259
+ evidencePaths: expectations?.relativePath
2260
+ ? [entry.relativePath, expectations.relativePath]
2261
+ : [entry.relativePath],
2262
+ remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
2263
+ expectedReference: "evidence.min_attachments",
2264
+ expected: { min_attachments: expectedEvidence.minAttachments },
2265
+ observed: { attachments: currentAttachments.length },
2266
+ action: "Attach real runtime artifacts only when they are needed for debugging or review.",
2267
+ confidence: 0.9,
2268
+ }));
2269
+ }
2270
+ expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
2271
+ if (!expectedEvidenceApplies) {
2272
+ return;
2273
+ }
2274
+ if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
2275
+ return;
2276
+ }
2277
+ entry.findings.push(createFinding({
2278
+ subject: entry.key,
2279
+ subjectType: "test",
2280
+ severity: "warning",
2281
+ impact: "iterate",
2282
+ category: "evidence",
2283
+ checkName: "missing-expected-attachment",
2284
+ title: "Expected attachment was not observed",
2285
+ message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
2286
+ explanation: "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
2287
+ evidencePaths: expectations?.relativePath
2288
+ ? [entry.relativePath, expectations.relativePath]
2289
+ : [entry.relativePath],
2290
+ remediationHint: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
2291
+ expectedReference: `evidence.attachments[${index}]`,
2292
+ expected: {
2293
+ attachment: {
2294
+ ...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
2295
+ ...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
2296
+ },
2297
+ },
2298
+ observed: {
2299
+ attachments: currentAttachments.map((attachment) => ({
2300
+ name: attachment.displayName,
2301
+ content_type: attachment.contentType ?? null,
2302
+ })),
2303
+ },
2304
+ action: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
2305
+ confidence: 0.95,
2306
+ }));
2307
+ });
1643
2308
  if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
1644
2309
  entry.findings.push(createFinding({
1645
2310
  subject: entry.key,
@@ -1928,11 +2593,17 @@ const appendJsonlLine = async (path, item) => {
1928
2593
  await appendFile(path, `${JSON.stringify(item)}\n`, "utf-8");
1929
2594
  };
1930
2595
  const toRunManifest = (params) => {
1931
- const { context, command, generatedAt, phase, expectations, projectGuide, snapshot } = params;
2596
+ const { context, command, agentContext, generatedAt, phase, expectations, snapshot } = params;
1932
2597
  const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
1933
2598
  const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
1934
2599
  const originalExitCode = snapshot.globalExitCode?.original ?? null;
1935
2600
  const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
2601
+ const expectationResult = buildExpectationResult({
2602
+ expectations,
2603
+ findings: snapshot.combinedAllFindings,
2604
+ observedTestCount: snapshot.entries.length,
2605
+ modelingSummary: snapshot.modelingSummary,
2606
+ });
1936
2607
  return {
1937
2608
  schema_version: AGENT_SCHEMA_VERSION,
1938
2609
  report_uuid: context.reportUuid,
@@ -1966,25 +2637,26 @@ const toRunManifest = (params) => {
1966
2637
  findings_manifest: "manifest/findings.jsonl",
1967
2638
  test_events_manifest: "manifest/test-events.jsonl",
1968
2639
  expected_manifest: expectations?.relativePath ?? null,
1969
- project_guide: projectGuide?.relativePath ?? null,
1970
2640
  process_logs: {
1971
2641
  stdout: stdoutArtifact?.relativePath ?? null,
1972
2642
  stderr: stderrArtifact?.relativePath ?? null,
1973
2643
  },
1974
2644
  },
1975
2645
  expectations_present: Boolean(expectations),
2646
+ expectations: expectations ? toExpectationModel(expectations) : null,
2647
+ expectation_result: expectationResult,
1976
2648
  check_summary: buildCheckSummary(snapshot.combinedAllFindings),
1977
2649
  agent_context: {
1978
- agent_name: env[AGENT_NAME_ENV] ?? null,
1979
- loop_id: env[AGENT_LOOP_ID_ENV] ?? null,
1980
- task_id: env[AGENT_TASK_ID_ENV] ?? expectations?.taskId ?? null,
1981
- conversation_id: env[AGENT_CONVERSATION_ID_ENV] ?? null,
2650
+ agent_name: agentContext.agentName ?? null,
2651
+ loop_id: agentContext.loopId ?? null,
2652
+ task_id: agentContext.taskId ?? expectations?.taskId ?? null,
2653
+ conversation_id: agentContext.conversationId ?? null,
1982
2654
  },
1983
2655
  };
1984
2656
  };
1985
2657
  const writeSnapshotFiles = async (params) => {
1986
2658
  const { runtime, snapshot, phase } = params;
1987
- const { outputDir, context, command, generatedAt, expectations, projectGuide } = runtime;
2659
+ const { outputDir, context, command, generatedAt, expectations } = runtime;
1988
2660
  const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
1989
2661
  const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
1990
2662
  for (const stalePath of runtime.currentTestPaths) {
@@ -2010,10 +2682,10 @@ const writeSnapshotFiles = async (params) => {
2010
2682
  writeJson(join(outputDir, "manifest", "run.json"), toRunManifest({
2011
2683
  context,
2012
2684
  command,
2685
+ agentContext: runtime.agentContext,
2013
2686
  generatedAt,
2014
2687
  phase,
2015
2688
  expectations,
2016
- projectGuide,
2017
2689
  snapshot,
2018
2690
  })),
2019
2691
  writeJsonlSnapshot(join(outputDir, "manifest", "tests.jsonl"), snapshot.entries.map(toTestsManifestLine)),
@@ -2035,7 +2707,7 @@ const writeSnapshotFiles = async (params) => {
2035
2707
  qualityGateResults: snapshot.qualityGateResults,
2036
2708
  findings: snapshot.combinedAllFindings,
2037
2709
  })),
2038
- writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(projectGuide?.relativePath)),
2710
+ writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
2039
2711
  ]);
2040
2712
  };
2041
2713
  const createBootstrapSnapshot = () => ({
@@ -2079,7 +2751,7 @@ const createBootstrapSnapshot = () => ({
2079
2751
  combinedAllFindings: [],
2080
2752
  });
2081
2753
  const writeBootstrapFiles = async (runtime) => {
2082
- await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(runtime.projectGuide?.relativePath));
2754
+ await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
2083
2755
  await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
2084
2756
  await writeSnapshotFiles({
2085
2757
  runtime,
@@ -2104,19 +2776,54 @@ const toTestsManifestLine = (entry) => ({
2104
2776
  markdown_path: entry.relativePath,
2105
2777
  assets_dir: entry.relativeAssetDir,
2106
2778
  });
2107
- const toFindingManifestLine = (finding) => ({
2108
- finding_id: finding.findingId,
2109
- subject: finding.subject,
2110
- severity: finding.severity,
2111
- category: finding.category,
2112
- check_name: finding.checkName,
2113
- message: finding.message,
2114
- explanation: finding.explanation,
2115
- evidence_paths: finding.evidencePaths,
2116
- remediation_hint: finding.remediationHint,
2117
- expected_reference: finding.expectedReference,
2118
- confidence: finding.confidence,
2779
+ const toFindingSubject = (finding) => ({
2780
+ type: finding.subjectType,
2781
+ ...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
2119
2782
  });
2783
+ const toFindingManifestLine = (finding) => {
2784
+ const impact = defaultImpactForFinding(finding);
2785
+ const confidence = finding.confidence ?? 1;
2786
+ return {
2787
+ schema_version: "allure-agent-finding/v2",
2788
+ check_id: finding.checkName,
2789
+ instance_id: finding.findingId,
2790
+ severity: finding.severity,
2791
+ impact,
2792
+ confidence,
2793
+ category: finding.category,
2794
+ title: finding.title ?? finding.message,
2795
+ message: finding.message,
2796
+ subject: toFindingSubject(finding),
2797
+ expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
2798
+ observed: finding.observed ?? { detail: finding.explanation },
2799
+ evidence: {
2800
+ paths: finding.evidencePaths,
2801
+ },
2802
+ action: finding.action ?? finding.remediationHint,
2803
+ ...(finding.source ? { source: finding.source } : {}),
2804
+ ...(finding.limits ? { limits: finding.limits } : {}),
2805
+ ...(finding.affected ? { affected: finding.affected } : {}),
2806
+ ...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
2807
+ legacy: {
2808
+ finding_id: finding.findingId,
2809
+ subject: finding.subject,
2810
+ subject_type: finding.subjectType,
2811
+ check_name: finding.checkName,
2812
+ explanation: finding.explanation,
2813
+ evidence_paths: finding.evidencePaths,
2814
+ remediation_hint: finding.remediationHint,
2815
+ expected_reference: finding.expectedReference,
2816
+ },
2817
+ finding_id: finding.findingId,
2818
+ subject_ref: finding.subject,
2819
+ subject_type: finding.subjectType,
2820
+ check_name: finding.checkName,
2821
+ explanation: finding.explanation,
2822
+ evidence_paths: finding.evidencePaths,
2823
+ remediation_hint: finding.remediationHint,
2824
+ expected_reference: finding.expectedReference,
2825
+ };
2826
+ };
2120
2827
  const queueRuntimeTask = (runtime, task) => {
2121
2828
  runtime.queue = runtime.queue
2122
2829
  .catch(() => undefined)
@@ -2238,18 +2945,22 @@ const createRuntimeState = async (params) => {
2238
2945
  await cleanupManagedEntries(outputDir);
2239
2946
  const generatedAt = new Date().toISOString();
2240
2947
  const createFinding = createFindingFactory();
2241
- const expectationLoadResult = await loadExpectations(outputDir, createFinding);
2242
- const projectGuide = await loadProjectGuide(outputDir);
2948
+ const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
2243
2949
  const runtime = {
2244
2950
  outputDir,
2245
2951
  context,
2246
2952
  store,
2247
2953
  generatedAt,
2248
- command: env[AGENT_COMMAND_ENV],
2954
+ command: options.command,
2955
+ agentContext: {
2956
+ agentName: options.agentName,
2957
+ loopId: options.loopId,
2958
+ taskId: options.taskId,
2959
+ conversationId: options.conversationId,
2960
+ },
2249
2961
  createFinding,
2250
2962
  expectations: expectationLoadResult.expectations,
2251
2963
  expectationLoadFindings: expectationLoadResult.findings,
2252
- projectGuide,
2253
2964
  unsubscribers: [],
2254
2965
  queue: Promise.resolve(),
2255
2966
  seenLogicalKeys: new Set(),