@allurereport/plugin-agent 3.10.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -77
- package/dist/capabilities.d.ts +127 -0
- package/dist/capabilities.js +266 -0
- package/dist/errors.d.ts +9 -0
- package/dist/errors.js +15 -0
- package/dist/guidance.d.ts +4 -5
- package/dist/guidance.js +223 -60
- package/dist/harness.d.ts +72 -4
- package/dist/harness.js +49 -17
- package/dist/index.d.ts +9 -1
- package/dist/index.js +9 -0
- package/dist/inline-expectations.d.ts +23 -0
- package/dist/inline-expectations.js +186 -0
- package/dist/invalid-output.d.ts +58 -0
- package/dist/invalid-output.js +238 -0
- package/dist/model.d.ts +59 -0
- package/dist/model.js +8 -1
- package/dist/paths.d.ts +3 -0
- package/dist/paths.js +10 -0
- package/dist/plugin.js +916 -137
- package/dist/query.d.ts +195 -0
- package/dist/query.js +177 -0
- package/dist/selection.d.ts +42 -0
- package/dist/selection.js +141 -0
- package/dist/state.d.ts +56 -0
- package/dist/state.js +277 -0
- package/dist/utils.d.ts +17 -0
- package/dist/utils.js +171 -0
- package/package.json +6 -6
package/dist/plugin.js
CHANGED
|
@@ -12,20 +12,12 @@ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (
|
|
|
12
12
|
var _AgentPlugin_runtime;
|
|
13
13
|
import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
|
|
14
14
|
import { basename, dirname, extname, join, relative, resolve } from "node:path";
|
|
15
|
-
import process
|
|
15
|
+
import process from "node:process";
|
|
16
16
|
import { formatDuration, isAttachment, isStep, } from "@allurereport/core-api";
|
|
17
|
-
import { parse } from "yaml";
|
|
18
17
|
import { renderAgentsGuide } from "./guidance.js";
|
|
19
|
-
|
|
20
|
-
const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
|
|
21
|
-
const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
|
|
22
|
-
const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
|
|
23
|
-
const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
|
|
24
|
-
const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
|
|
25
|
-
const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
|
|
26
|
-
const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
|
|
18
|
+
import { parseAgentExpectations } from "./model.js";
|
|
27
19
|
const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
|
|
28
|
-
const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"
|
|
20
|
+
const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"];
|
|
29
21
|
const STATUS_ORDER = {
|
|
30
22
|
failed: 0,
|
|
31
23
|
broken: 1,
|
|
@@ -126,16 +118,34 @@ const normalizeLabelValues = (value) => {
|
|
|
126
118
|
return values.length ? [[name, values]] : [];
|
|
127
119
|
}));
|
|
128
120
|
};
|
|
121
|
+
const normalizeNonNegativeInteger = (value) => typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
|
|
122
|
+
const normalizePositiveInteger = (value) => typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
|
|
129
123
|
const normalizeSelectors = (input) => ({
|
|
130
124
|
environments: normalizeStringArray(input?.environments),
|
|
131
125
|
fullNames: normalizeStringArray(input?.full_names),
|
|
132
126
|
fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
|
|
133
127
|
labelValues: normalizeLabelValues(input?.label_values),
|
|
128
|
+
testCount: normalizeNonNegativeInteger(input?.test_count),
|
|
134
129
|
});
|
|
135
130
|
const hasSelector = (selectors) => selectors.environments.length > 0 ||
|
|
136
131
|
selectors.fullNames.length > 0 ||
|
|
137
132
|
selectors.fullNamePrefixes.length > 0 ||
|
|
138
133
|
Object.keys(selectors.labelValues).length > 0;
|
|
134
|
+
const normalizeEvidenceExpectations = (input) => ({
|
|
135
|
+
minSteps: normalizePositiveInteger(input?.min_steps),
|
|
136
|
+
minAttachments: normalizePositiveInteger(input?.min_attachments),
|
|
137
|
+
stepNameContains: normalizeStringArray(input?.step_name_contains),
|
|
138
|
+
attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
|
|
139
|
+
if (!attachment || typeof attachment !== "object") {
|
|
140
|
+
return [];
|
|
141
|
+
}
|
|
142
|
+
const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
|
|
143
|
+
const contentType = typeof attachment.content_type === "string" && attachment.content_type.length > 0
|
|
144
|
+
? attachment.content_type
|
|
145
|
+
: undefined;
|
|
146
|
+
return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
|
|
147
|
+
}),
|
|
148
|
+
});
|
|
139
149
|
const normalizeNotes = (value) => {
|
|
140
150
|
if (typeof value === "string") {
|
|
141
151
|
return value.length > 0 ? [value] : [];
|
|
@@ -273,6 +283,27 @@ const mergeStepSummaries = (items) => items.reduce((acc, item) => ({
|
|
|
273
283
|
attachmentRefs: 0,
|
|
274
284
|
assertionLikeSteps: 0,
|
|
275
285
|
});
|
|
286
|
+
const collectStepNames = (steps, path = []) => {
|
|
287
|
+
const names = [];
|
|
288
|
+
for (const node of steps) {
|
|
289
|
+
if (!isStep(node)) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
const nextPath = [...path, node.name];
|
|
293
|
+
names.push({ name: node.name, path: nextPath });
|
|
294
|
+
if (node.steps.length) {
|
|
295
|
+
names.push(...collectStepNames(node.steps, nextPath));
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return names;
|
|
299
|
+
};
|
|
300
|
+
const testStepContainsText = (entry, expectedText) => {
|
|
301
|
+
const expected = normalizeStepText(expectedText);
|
|
302
|
+
if (!expected) {
|
|
303
|
+
return false;
|
|
304
|
+
}
|
|
305
|
+
return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
|
|
306
|
+
};
|
|
276
307
|
const buildAttemptSignature = (attempt) => JSON.stringify({
|
|
277
308
|
status: attempt.tr.status,
|
|
278
309
|
errorMessage: attempt.tr.error?.message,
|
|
@@ -357,6 +388,7 @@ const subtractStatusCounts = (left, right) => ({
|
|
|
357
388
|
});
|
|
358
389
|
const summarizeStatusCounts = (counts) => `${counts.total} total (${counts.failed} failed, ${counts.broken} broken, ${counts.unknown} unknown, ${counts.skipped} skipped, ${counts.passed} passed)`;
|
|
359
390
|
const normalizeLogLine = (value) => value.replace(/\s+/g, " ").trim();
|
|
391
|
+
const normalizeStepText = (value) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
|
|
360
392
|
const normalizeWarningLine = (value) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
|
|
361
393
|
const buildCountedValues = (values) => {
|
|
362
394
|
const counts = new Map();
|
|
@@ -751,11 +783,68 @@ const renderModelingSummary = (modeling) => {
|
|
|
751
783
|
: "None");
|
|
752
784
|
return lines.join("\n");
|
|
753
785
|
};
|
|
786
|
+
const cloneHumanReportStatus = (status) => ({
|
|
787
|
+
...status,
|
|
788
|
+
reports: status.reports.map((report) => ({ ...report })),
|
|
789
|
+
...(status.errors ? { errors: status.errors.map((error) => ({ ...error })) } : {}),
|
|
790
|
+
});
|
|
791
|
+
const resolveHumanReportStatus = async (provider) => {
|
|
792
|
+
if (!provider) {
|
|
793
|
+
return undefined;
|
|
794
|
+
}
|
|
795
|
+
const status = typeof provider === "function" ? await provider() : provider;
|
|
796
|
+
return status ? cloneHumanReportStatus(status) : undefined;
|
|
797
|
+
};
|
|
798
|
+
const renderHumanReportSection = (humanReport) => {
|
|
799
|
+
if (!humanReport) {
|
|
800
|
+
return undefined;
|
|
801
|
+
}
|
|
802
|
+
const lines = [
|
|
803
|
+
"## Human Report",
|
|
804
|
+
"",
|
|
805
|
+
`- Status: ${humanReport.status}`,
|
|
806
|
+
`- Mode: ${humanReport.mode}`,
|
|
807
|
+
`- Result Count: ${humanReport.result_count ?? "unknown"}`,
|
|
808
|
+
`- Threshold: ${humanReport.threshold}`,
|
|
809
|
+
];
|
|
810
|
+
if (humanReport.path) {
|
|
811
|
+
lines.push(`- Path: [${escapeInlineMarkdown(humanReport.path)}](${normalizeMarkdownPath(humanReport.path)})`);
|
|
812
|
+
}
|
|
813
|
+
if (humanReport.reason) {
|
|
814
|
+
lines.push(`- Reason: ${escapeInlineMarkdown(humanReport.reason)}`);
|
|
815
|
+
}
|
|
816
|
+
if (humanReport.error) {
|
|
817
|
+
lines.push(`- Error: ${escapeInlineMarkdown(humanReport.error)}`);
|
|
818
|
+
}
|
|
819
|
+
if (humanReport.reports.length > 1) {
|
|
820
|
+
lines.push("");
|
|
821
|
+
lines.push("### Reports");
|
|
822
|
+
lines.push("");
|
|
823
|
+
lines.push(humanReport.reports
|
|
824
|
+
.map((report) => `- ${escapeInlineMarkdown(report.plugin_id)}: [${escapeInlineMarkdown(report.path)}](${normalizeMarkdownPath(report.path)})`)
|
|
825
|
+
.join("\n"));
|
|
826
|
+
}
|
|
827
|
+
if (humanReport.errors?.length) {
|
|
828
|
+
lines.push("");
|
|
829
|
+
lines.push("### Report Errors");
|
|
830
|
+
lines.push("");
|
|
831
|
+
lines.push(humanReport.errors
|
|
832
|
+
.map((error) => {
|
|
833
|
+
const prefix = error.plugin_id ? `${error.plugin_id}: ` : "";
|
|
834
|
+
return `- ${escapeInlineMarkdown(`${prefix}${error.message}`)}`;
|
|
835
|
+
})
|
|
836
|
+
.join("\n"));
|
|
837
|
+
}
|
|
838
|
+
return lines.join("\n");
|
|
839
|
+
};
|
|
754
840
|
const renderSelectorSummary = (title, selectors) => {
|
|
755
|
-
if (!hasSelector(selectors)) {
|
|
841
|
+
if (!hasSelector(selectors) && selectors.testCount === undefined) {
|
|
756
842
|
return `- ${title}: None`;
|
|
757
843
|
}
|
|
758
844
|
const parts = [];
|
|
845
|
+
if (selectors.testCount !== undefined) {
|
|
846
|
+
parts.push(`test count: ${selectors.testCount}`);
|
|
847
|
+
}
|
|
759
848
|
if (selectors.environments.length) {
|
|
760
849
|
parts.push(`environments: ${selectors.environments.join(", ")}`);
|
|
761
850
|
}
|
|
@@ -771,6 +860,29 @@ const renderSelectorSummary = (title, selectors) => {
|
|
|
771
860
|
}
|
|
772
861
|
return `- ${title}: ${parts.join(" | ")}`;
|
|
773
862
|
};
|
|
863
|
+
const renderEvidenceExpectationSummary = (evidence) => {
|
|
864
|
+
const parts = [];
|
|
865
|
+
if (evidence.minSteps !== undefined) {
|
|
866
|
+
parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
|
|
867
|
+
}
|
|
868
|
+
if (evidence.minAttachments !== undefined) {
|
|
869
|
+
parts.push(`attachments per test: >= ${evidence.minAttachments}`);
|
|
870
|
+
}
|
|
871
|
+
if (evidence.stepNameContains.length) {
|
|
872
|
+
parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
|
|
873
|
+
}
|
|
874
|
+
if (evidence.attachments.length) {
|
|
875
|
+
parts.push(`attachments: ${evidence.attachments
|
|
876
|
+
.map((attachment) => [
|
|
877
|
+
attachment.name ? `name=${attachment.name}` : undefined,
|
|
878
|
+
attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
|
|
879
|
+
]
|
|
880
|
+
.filter(Boolean)
|
|
881
|
+
.join(", "))
|
|
882
|
+
.join("; ")}`);
|
|
883
|
+
}
|
|
884
|
+
return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
|
|
885
|
+
};
|
|
774
886
|
const buildCheckSummary = (findings) => {
|
|
775
887
|
const countsBySeverity = {
|
|
776
888
|
high: 0,
|
|
@@ -794,6 +906,240 @@ const buildCheckSummary = (findings) => {
|
|
|
794
906
|
countsByCategory,
|
|
795
907
|
};
|
|
796
908
|
};
|
|
909
|
+
const EXPECTATION_CHECK_IDS = new Set([
|
|
910
|
+
"expectations-invalid",
|
|
911
|
+
"expectations-empty",
|
|
912
|
+
"expectations-unsupported-control",
|
|
913
|
+
"expectations-weak-goal",
|
|
914
|
+
"expected-test-missing",
|
|
915
|
+
"expected-prefix-missing",
|
|
916
|
+
"expected-label-missing",
|
|
917
|
+
"expected-environment-missing",
|
|
918
|
+
"expected-count-mismatch",
|
|
919
|
+
"expected-step-containing-missing",
|
|
920
|
+
"insufficient-expected-steps",
|
|
921
|
+
"insufficient-expected-attachments",
|
|
922
|
+
"missing-expected-attachment",
|
|
923
|
+
"forbidden-label-observed",
|
|
924
|
+
"no-tests-observed",
|
|
925
|
+
]);
|
|
926
|
+
const MISSING_EXPECTED_CHECK_IDS = new Set([
|
|
927
|
+
"expected-test-missing",
|
|
928
|
+
"expected-prefix-missing",
|
|
929
|
+
"expected-label-missing",
|
|
930
|
+
"expected-environment-missing",
|
|
931
|
+
]);
|
|
932
|
+
const EVIDENCE_MISMATCH_CHECK_IDS = new Set([
|
|
933
|
+
"expected-step-containing-missing",
|
|
934
|
+
"insufficient-expected-steps",
|
|
935
|
+
"insufficient-expected-attachments",
|
|
936
|
+
"missing-expected-attachment",
|
|
937
|
+
]);
|
|
938
|
+
const countLabelValues = (labelValues) => Object.values(labelValues).reduce((total, values) => total + values.length, 0);
|
|
939
|
+
const recognizedControlCount = (expectations) => {
|
|
940
|
+
if (!expectations) {
|
|
941
|
+
return 0;
|
|
942
|
+
}
|
|
943
|
+
return ((expectations.goal ? 1 : 0) +
|
|
944
|
+
(expectations.taskId ? 1 : 0) +
|
|
945
|
+
(expectations.expected.testCount !== undefined ? 1 : 0) +
|
|
946
|
+
expectations.expected.environments.length +
|
|
947
|
+
expectations.expected.fullNames.length +
|
|
948
|
+
expectations.expected.fullNamePrefixes.length +
|
|
949
|
+
countLabelValues(expectations.expected.labelValues) +
|
|
950
|
+
countLabelValues(expectations.forbidden.labelValues) +
|
|
951
|
+
(expectations.evidence.minSteps !== undefined ? 1 : 0) +
|
|
952
|
+
(expectations.evidence.minAttachments !== undefined ? 1 : 0) +
|
|
953
|
+
expectations.evidence.stepNameContains.length +
|
|
954
|
+
expectations.evidence.attachments.length);
|
|
955
|
+
};
|
|
956
|
+
const runtimeMatchingControlCount = (expectations) => {
|
|
957
|
+
if (!expectations) {
|
|
958
|
+
return 0;
|
|
959
|
+
}
|
|
960
|
+
return ((expectations.expected.testCount !== undefined ? 1 : 0) +
|
|
961
|
+
expectations.expected.environments.length +
|
|
962
|
+
expectations.expected.fullNames.length +
|
|
963
|
+
expectations.expected.fullNamePrefixes.length +
|
|
964
|
+
countLabelValues(expectations.expected.labelValues) +
|
|
965
|
+
countLabelValues(expectations.forbidden.labelValues) +
|
|
966
|
+
(expectations.evidence.minSteps !== undefined ? 1 : 0) +
|
|
967
|
+
(expectations.evidence.minAttachments !== undefined ? 1 : 0) +
|
|
968
|
+
expectations.evidence.stepNameContains.length +
|
|
969
|
+
expectations.evidence.attachments.length);
|
|
970
|
+
};
|
|
971
|
+
const toExpectationModel = (expectations) => {
|
|
972
|
+
const expected = {};
|
|
973
|
+
const forbidden = {};
|
|
974
|
+
const evidence = {};
|
|
975
|
+
if (expectations.expected.testCount !== undefined) {
|
|
976
|
+
expected.test_count = expectations.expected.testCount;
|
|
977
|
+
}
|
|
978
|
+
if (expectations.expected.environments.length) {
|
|
979
|
+
expected.environments = expectations.expected.environments;
|
|
980
|
+
}
|
|
981
|
+
if (expectations.expected.fullNames.length) {
|
|
982
|
+
expected.full_names = expectations.expected.fullNames;
|
|
983
|
+
}
|
|
984
|
+
if (expectations.expected.fullNamePrefixes.length) {
|
|
985
|
+
expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
|
|
986
|
+
}
|
|
987
|
+
if (Object.keys(expectations.expected.labelValues).length) {
|
|
988
|
+
expected.label_values = expectations.expected.labelValues;
|
|
989
|
+
}
|
|
990
|
+
if (Object.keys(expectations.forbidden.labelValues).length) {
|
|
991
|
+
forbidden.label_values = expectations.forbidden.labelValues;
|
|
992
|
+
}
|
|
993
|
+
if (expectations.evidence.minSteps !== undefined) {
|
|
994
|
+
evidence.min_steps = expectations.evidence.minSteps;
|
|
995
|
+
}
|
|
996
|
+
if (expectations.evidence.minAttachments !== undefined) {
|
|
997
|
+
evidence.min_attachments = expectations.evidence.minAttachments;
|
|
998
|
+
}
|
|
999
|
+
if (expectations.evidence.stepNameContains.length) {
|
|
1000
|
+
evidence.step_name_contains = expectations.evidence.stepNameContains;
|
|
1001
|
+
}
|
|
1002
|
+
if (expectations.evidence.attachments.length) {
|
|
1003
|
+
evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
|
|
1004
|
+
...(attachment.name ? { name: attachment.name } : {}),
|
|
1005
|
+
...(attachment.contentType ? { content_type: attachment.contentType } : {}),
|
|
1006
|
+
}));
|
|
1007
|
+
}
|
|
1008
|
+
return {
|
|
1009
|
+
...(expectations.goal ? { goal: expectations.goal } : {}),
|
|
1010
|
+
...(expectations.taskId ? { task_id: expectations.taskId } : {}),
|
|
1011
|
+
...(Object.keys(expected).length ? { expected } : {}),
|
|
1012
|
+
...(Object.keys(forbidden).length ? { forbidden } : {}),
|
|
1013
|
+
...(Object.keys(evidence).length ? { evidence } : {}),
|
|
1014
|
+
...(expectations.notes.length ? { notes: expectations.notes } : {}),
|
|
1015
|
+
};
|
|
1016
|
+
};
|
|
1017
|
+
const defaultImpactForFinding = (finding) => {
|
|
1018
|
+
if (finding.impact) {
|
|
1019
|
+
return finding.impact;
|
|
1020
|
+
}
|
|
1021
|
+
if ([
|
|
1022
|
+
"expected-test-missing",
|
|
1023
|
+
"expected-prefix-missing",
|
|
1024
|
+
"expected-label-missing",
|
|
1025
|
+
"expected-environment-missing",
|
|
1026
|
+
"forbidden-label-observed",
|
|
1027
|
+
"no-tests-observed",
|
|
1028
|
+
].includes(finding.checkName)) {
|
|
1029
|
+
return "reject";
|
|
1030
|
+
}
|
|
1031
|
+
if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
|
|
1032
|
+
return "reject";
|
|
1033
|
+
}
|
|
1034
|
+
if ([
|
|
1035
|
+
"expectations-invalid",
|
|
1036
|
+
"expectations-empty",
|
|
1037
|
+
"expectations-unsupported-control",
|
|
1038
|
+
"expected-count-mismatch",
|
|
1039
|
+
"expected-step-containing-missing",
|
|
1040
|
+
"insufficient-expected-steps",
|
|
1041
|
+
"insufficient-expected-attachments",
|
|
1042
|
+
"missing-expected-attachment",
|
|
1043
|
+
"runner-failures-outside-logical-results",
|
|
1044
|
+
"metadata-mismatch",
|
|
1045
|
+
"history-id-collision",
|
|
1046
|
+
"failed-without-useful-steps",
|
|
1047
|
+
"failed-without-attachments",
|
|
1048
|
+
"nontrivial-run-with-empty-trace",
|
|
1049
|
+
"retries-without-new-evidence",
|
|
1050
|
+
"passed-without-observable-evidence",
|
|
1051
|
+
].includes(finding.checkName)) {
|
|
1052
|
+
return "iterate";
|
|
1053
|
+
}
|
|
1054
|
+
if (finding.severity === "high") {
|
|
1055
|
+
return "iterate";
|
|
1056
|
+
}
|
|
1057
|
+
return "advisory";
|
|
1058
|
+
};
|
|
1059
|
+
const strongestImpact = (findings, fallback) => {
|
|
1060
|
+
if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
|
|
1061
|
+
return "reject";
|
|
1062
|
+
}
|
|
1063
|
+
if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
|
|
1064
|
+
return "iterate";
|
|
1065
|
+
}
|
|
1066
|
+
return fallback;
|
|
1067
|
+
};
|
|
1068
|
+
const buildExpectationResult = (params) => {
|
|
1069
|
+
const { expectations, findings, observedTestCount, modelingSummary } = params;
|
|
1070
|
+
const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
|
|
1071
|
+
const recognized = recognizedControlCount(expectations);
|
|
1072
|
+
const runtimeMatching = runtimeMatchingControlCount(expectations);
|
|
1073
|
+
const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
|
|
1074
|
+
const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
|
|
1075
|
+
const unsupportedFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-unsupported-control");
|
|
1076
|
+
const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
|
|
1077
|
+
const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
|
|
1078
|
+
let status;
|
|
1079
|
+
let impact;
|
|
1080
|
+
if (invalidFindings.length) {
|
|
1081
|
+
status = "unavailable";
|
|
1082
|
+
impact =
|
|
1083
|
+
strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
|
|
1084
|
+
}
|
|
1085
|
+
else if (emptyFindings.length || unsupportedFindings.length) {
|
|
1086
|
+
status = "unsupported";
|
|
1087
|
+
impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
|
|
1088
|
+
}
|
|
1089
|
+
else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
|
|
1090
|
+
status = "failed";
|
|
1091
|
+
impact = "reject";
|
|
1092
|
+
}
|
|
1093
|
+
else if (runtimeMatching === 0) {
|
|
1094
|
+
status = "not_requested";
|
|
1095
|
+
impact = "advisory";
|
|
1096
|
+
}
|
|
1097
|
+
else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
|
|
1098
|
+
status = "failed";
|
|
1099
|
+
impact = "reject";
|
|
1100
|
+
}
|
|
1101
|
+
else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
|
|
1102
|
+
status = "failed";
|
|
1103
|
+
impact = "iterate";
|
|
1104
|
+
}
|
|
1105
|
+
else if (modelingSummary.completeness === "partial") {
|
|
1106
|
+
status = "partial";
|
|
1107
|
+
impact = "iterate";
|
|
1108
|
+
}
|
|
1109
|
+
else {
|
|
1110
|
+
status = "matched";
|
|
1111
|
+
impact = "accept";
|
|
1112
|
+
}
|
|
1113
|
+
return {
|
|
1114
|
+
schema_version: "allure-agent-expectation-result/v1",
|
|
1115
|
+
status,
|
|
1116
|
+
impact,
|
|
1117
|
+
source: expectations
|
|
1118
|
+
? {
|
|
1119
|
+
kind: expectations.sourceKind,
|
|
1120
|
+
path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
|
|
1121
|
+
}
|
|
1122
|
+
: {
|
|
1123
|
+
kind: "none",
|
|
1124
|
+
path: null,
|
|
1125
|
+
},
|
|
1126
|
+
recognized_control_count: recognized,
|
|
1127
|
+
unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
|
|
1128
|
+
degraded_controls: [],
|
|
1129
|
+
summary: {
|
|
1130
|
+
expected_tests: expectedTests,
|
|
1131
|
+
observed_tests: observedTestCount,
|
|
1132
|
+
missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
|
|
1133
|
+
.length,
|
|
1134
|
+
forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
|
|
1135
|
+
.length,
|
|
1136
|
+
unexpected_observed: 0,
|
|
1137
|
+
evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
|
|
1138
|
+
.length,
|
|
1139
|
+
},
|
|
1140
|
+
finding_ids: expectationFindings.map((finding) => finding.findingId),
|
|
1141
|
+
};
|
|
1142
|
+
};
|
|
797
1143
|
const sortFindings = (findings) => [...findings].sort((left, right) => {
|
|
798
1144
|
const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
|
|
799
1145
|
if (bySeverity !== 0) {
|
|
@@ -818,6 +1164,28 @@ const renderFindingEvidenceLinks = (params) => {
|
|
|
818
1164
|
})
|
|
819
1165
|
.join("\n");
|
|
820
1166
|
};
|
|
1167
|
+
const formatFindingStructuredValue = (value) => {
|
|
1168
|
+
if (value === undefined || value === null) {
|
|
1169
|
+
return undefined;
|
|
1170
|
+
}
|
|
1171
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
1172
|
+
return String(value);
|
|
1173
|
+
}
|
|
1174
|
+
if (Array.isArray(value)) {
|
|
1175
|
+
return value
|
|
1176
|
+
.map((item) => formatFindingStructuredValue(item))
|
|
1177
|
+
.filter(Boolean)
|
|
1178
|
+
.join(", ");
|
|
1179
|
+
}
|
|
1180
|
+
if (typeof value === "object") {
|
|
1181
|
+
const parts = Object.entries(value).flatMap(([key, item]) => {
|
|
1182
|
+
const formatted = formatFindingStructuredValue(item);
|
|
1183
|
+
return formatted ? [`${key}: ${formatted}`] : [];
|
|
1184
|
+
});
|
|
1185
|
+
return parts.length ? parts.join("; ") : undefined;
|
|
1186
|
+
}
|
|
1187
|
+
return undefined;
|
|
1188
|
+
};
|
|
821
1189
|
const renderFindingsSection = (params) => {
|
|
822
1190
|
const { title, findings, currentFilePath, outputDir } = params;
|
|
823
1191
|
if (!findings.length) {
|
|
@@ -825,25 +1193,26 @@ const renderFindingsSection = (params) => {
|
|
|
825
1193
|
}
|
|
826
1194
|
const lines = [`## ${title}`, ""];
|
|
827
1195
|
for (const finding of sortFindings(findings)) {
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
if (finding.expectedReference) {
|
|
834
|
-
lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
|
|
835
|
-
}
|
|
836
|
-
if (finding.confidence !== undefined) {
|
|
837
|
-
lines.push(`- Confidence: ${finding.confidence}`);
|
|
838
|
-
}
|
|
839
|
-
lines.push("- Evidence:");
|
|
840
|
-
lines.push("");
|
|
841
|
-
lines.push(renderFindingEvidenceLinks({
|
|
1196
|
+
const impact = defaultImpactForFinding(finding);
|
|
1197
|
+
const expected = formatFindingStructuredValue(finding.expected) ??
|
|
1198
|
+
(finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
|
|
1199
|
+
const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
|
|
1200
|
+
const evidenceLinks = renderFindingEvidenceLinks({
|
|
842
1201
|
finding,
|
|
843
1202
|
currentFilePath,
|
|
844
1203
|
outputDir,
|
|
845
|
-
})
|
|
846
|
-
lines.push(
|
|
1204
|
+
});
|
|
1205
|
+
lines.push(`- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`);
|
|
1206
|
+
if (expected) {
|
|
1207
|
+
lines.push(` Expected: ${escapeInlineMarkdown(expected)}`);
|
|
1208
|
+
}
|
|
1209
|
+
if (observed) {
|
|
1210
|
+
lines.push(` Observed: ${escapeInlineMarkdown(observed)}`);
|
|
1211
|
+
}
|
|
1212
|
+
lines.push(` Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
|
|
1213
|
+
if (evidenceLinks !== "None") {
|
|
1214
|
+
lines.push(` Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
|
|
1215
|
+
}
|
|
847
1216
|
}
|
|
848
1217
|
return lines.join("\n").trimEnd();
|
|
849
1218
|
};
|
|
@@ -858,6 +1227,25 @@ const renderExpectationSection = (entry) => {
|
|
|
858
1227
|
];
|
|
859
1228
|
return lines.join("\n");
|
|
860
1229
|
};
|
|
1230
|
+
const renderExpectationResultSection = (params) => {
|
|
1231
|
+
const result = buildExpectationResult(params);
|
|
1232
|
+
const summary = result.summary;
|
|
1233
|
+
return [
|
|
1234
|
+
"## Expectation Result",
|
|
1235
|
+
"",
|
|
1236
|
+
`- Status: ${result.status}`,
|
|
1237
|
+
`- Impact: ${result.impact}`,
|
|
1238
|
+
`- Recognized Controls: ${result.recognized_control_count}`,
|
|
1239
|
+
`- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
|
|
1240
|
+
`- Expected Tests: ${summary.expected_tests}`,
|
|
1241
|
+
`- Observed Tests: ${summary.observed_tests}`,
|
|
1242
|
+
`- Missing Expected: ${summary.missing_expected}`,
|
|
1243
|
+
`- Forbidden Observed: ${summary.forbidden_observed}`,
|
|
1244
|
+
`- Evidence Mismatches: ${summary.evidence_mismatches}`,
|
|
1245
|
+
`- Run Manifest: [manifest/run.json](manifest/run.json)`,
|
|
1246
|
+
`- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
|
|
1247
|
+
].join("\n");
|
|
1248
|
+
};
|
|
861
1249
|
const renderRerunGuidance = (findings) => {
|
|
862
1250
|
const relevant = findings.filter(({ category }) => category === "evidence" || category === "smells" || category === "metadata");
|
|
863
1251
|
if (!relevant.length) {
|
|
@@ -875,7 +1263,7 @@ const renderRerunGuidance = (findings) => {
|
|
|
875
1263
|
if (relevant.some(({ checkName }) => checkName === "noop-dominated-steps")) {
|
|
876
1264
|
lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
|
|
877
1265
|
}
|
|
878
|
-
lines.push("- Rerun only the relevant tests with the same expectations
|
|
1266
|
+
lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
|
|
879
1267
|
return lines.join("\n");
|
|
880
1268
|
};
|
|
881
1269
|
const renderTestFile = (params) => {
|
|
@@ -948,7 +1336,7 @@ const renderTestFile = (params) => {
|
|
|
948
1336
|
return `${lines.join("\n").trimEnd()}\n`;
|
|
949
1337
|
};
|
|
950
1338
|
const renderIndex = (params) => {
|
|
951
|
-
const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, } = params;
|
|
1339
|
+
const { context, command, generatedAt, phase, stats, durationSummary, environmentSummary, modelingSummary, expectations, tests, globalArtifacts, globalErrors, globalExitCode, qualityGateResults, findings, humanReport, } = params;
|
|
952
1340
|
const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
|
|
953
1341
|
const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
|
|
954
1342
|
const remainingGlobalArtifacts = globalArtifacts.filter((artifact) => artifact.displayName !== "stdout.txt" && artifact.displayName !== "stderr.txt");
|
|
@@ -1009,20 +1397,35 @@ const renderIndex = (params) => {
|
|
|
1009
1397
|
: "None");
|
|
1010
1398
|
lines.push("");
|
|
1011
1399
|
lines.push(renderModelingSummary(modelingSummary));
|
|
1400
|
+
const humanReportSection = renderHumanReportSection(humanReport);
|
|
1401
|
+
if (humanReportSection) {
|
|
1402
|
+
lines.push("");
|
|
1403
|
+
lines.push(humanReportSection);
|
|
1404
|
+
}
|
|
1012
1405
|
if (expectations) {
|
|
1013
1406
|
lines.push("");
|
|
1014
1407
|
lines.push("## Expected Scope");
|
|
1015
1408
|
lines.push("");
|
|
1016
1409
|
lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
|
|
1017
1410
|
lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
|
|
1018
|
-
lines.push(
|
|
1411
|
+
lines.push(expectations.sourceKind === "inline"
|
|
1412
|
+
? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
|
|
1413
|
+
: `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
|
|
1019
1414
|
lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
|
|
1020
1415
|
lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
|
|
1416
|
+
lines.push(renderEvidenceExpectationSummary(expectations.evidence));
|
|
1021
1417
|
if (expectations.notes.length) {
|
|
1022
1418
|
lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
|
|
1023
1419
|
}
|
|
1024
1420
|
}
|
|
1025
1421
|
lines.push("");
|
|
1422
|
+
lines.push(renderExpectationResultSection({
|
|
1423
|
+
expectations,
|
|
1424
|
+
findings,
|
|
1425
|
+
observedTestCount: tests.length,
|
|
1426
|
+
modelingSummary,
|
|
1427
|
+
}));
|
|
1428
|
+
lines.push("");
|
|
1026
1429
|
lines.push("## Advisory Check Summary");
|
|
1027
1430
|
lines.push("");
|
|
1028
1431
|
lines.push(`- modeling completeness: ${modelingSummary.completeness}`);
|
|
@@ -1225,10 +1628,7 @@ const readMaterializedArtifactText = async (outputDir, artifact) => {
|
|
|
1225
1628
|
return undefined;
|
|
1226
1629
|
}
|
|
1227
1630
|
};
|
|
1228
|
-
const resolveOutputDir = (options) =>
|
|
1229
|
-
const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
|
|
1230
|
-
return outputDir ? resolve(outputDir) : undefined;
|
|
1231
|
-
};
|
|
1631
|
+
const resolveOutputDir = (options) => (options.outputDir ? resolve(options.outputDir) : undefined);
|
|
1232
1632
|
const cleanupManagedEntries = async (outputDir) => {
|
|
1233
1633
|
await Promise.all(MANAGED_ENTRIES.map(async (entry) => {
|
|
1234
1634
|
await rm(join(outputDir, entry), {
|
|
@@ -1267,15 +1667,97 @@ const createFindingFactory = () => {
|
|
|
1267
1667
|
};
|
|
1268
1668
|
};
|
|
1269
1669
|
};
|
|
1270
|
-
const
|
|
1271
|
-
const parsed = parse(rawContent);
|
|
1670
|
+
const assertExpectationsObject = (parsed) => {
|
|
1272
1671
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
1273
|
-
throw new Error("Expected
|
|
1672
|
+
throw new Error("Expected an expectations object");
|
|
1274
1673
|
}
|
|
1275
|
-
return parsed;
|
|
1276
1674
|
};
|
|
1277
|
-
const
|
|
1278
|
-
const
|
|
1675
|
+
const writeExpectedManifest = async (outputDir, parsed) => {
|
|
1676
|
+
const relativePath = normalizeMarkdownPath("manifest/expected.json");
|
|
1677
|
+
await mkdir(join(outputDir, "manifest"), { recursive: true });
|
|
1678
|
+
await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
|
|
1679
|
+
return relativePath;
|
|
1680
|
+
};
|
|
1681
|
+
const toLoadedExpectations = (params) => {
|
|
1682
|
+
const { parsed, relativePath, sourceKind, sourcePath } = params;
|
|
1683
|
+
return {
|
|
1684
|
+
sourcePath,
|
|
1685
|
+
sourceKind,
|
|
1686
|
+
relativePath,
|
|
1687
|
+
raw: parsed,
|
|
1688
|
+
goal: parsed.goal,
|
|
1689
|
+
taskId: parsed.task_id,
|
|
1690
|
+
notes: normalizeNotes(parsed.notes),
|
|
1691
|
+
expected: normalizeSelectors(parsed.expected),
|
|
1692
|
+
forbidden: normalizeSelectors(parsed.forbidden),
|
|
1693
|
+
evidence: normalizeEvidenceExpectations(parsed.evidence),
|
|
1694
|
+
};
|
|
1695
|
+
};
|
|
1696
|
+
const loadExpectations = async (outputDir, createFinding, options) => {
|
|
1697
|
+
const configuredPath = options.expectationsPath;
|
|
1698
|
+
const inlineExpectations = options.expectations;
|
|
1699
|
+
if (!configuredPath && !inlineExpectations) {
|
|
1700
|
+
return {
|
|
1701
|
+
expectations: undefined,
|
|
1702
|
+
findings: [],
|
|
1703
|
+
};
|
|
1704
|
+
}
|
|
1705
|
+
if (configuredPath && inlineExpectations) {
|
|
1706
|
+
return {
|
|
1707
|
+
expectations: undefined,
|
|
1708
|
+
findings: [
|
|
1709
|
+
createFinding({
|
|
1710
|
+
subject: "run",
|
|
1711
|
+
subjectType: "run",
|
|
1712
|
+
severity: "high",
|
|
1713
|
+
category: "bootstrap",
|
|
1714
|
+
impact: "reject",
|
|
1715
|
+
checkName: "expectations-invalid",
|
|
1716
|
+
title: "Expectation input is invalid",
|
|
1717
|
+
message: "Both file and inline agent expectations were provided.",
|
|
1718
|
+
explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
|
|
1719
|
+
evidencePaths: [],
|
|
1720
|
+
remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
|
|
1721
|
+
expectedReference: undefined,
|
|
1722
|
+
}),
|
|
1723
|
+
],
|
|
1724
|
+
};
|
|
1725
|
+
}
|
|
1726
|
+
if (inlineExpectations) {
|
|
1727
|
+
try {
|
|
1728
|
+
assertExpectationsObject(inlineExpectations);
|
|
1729
|
+
const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
|
|
1730
|
+
return {
|
|
1731
|
+
expectations: toLoadedExpectations({
|
|
1732
|
+
parsed: inlineExpectations,
|
|
1733
|
+
relativePath,
|
|
1734
|
+
sourceKind: "inline",
|
|
1735
|
+
}),
|
|
1736
|
+
findings: [],
|
|
1737
|
+
};
|
|
1738
|
+
}
|
|
1739
|
+
catch (error) {
|
|
1740
|
+
return {
|
|
1741
|
+
expectations: undefined,
|
|
1742
|
+
findings: [
|
|
1743
|
+
createFinding({
|
|
1744
|
+
subject: "run",
|
|
1745
|
+
subjectType: "run",
|
|
1746
|
+
severity: "high",
|
|
1747
|
+
category: "bootstrap",
|
|
1748
|
+
impact: "reject",
|
|
1749
|
+
checkName: "expectations-invalid",
|
|
1750
|
+
title: "Expectation input is invalid",
|
|
1751
|
+
message: "Could not load inline agent expectations",
|
|
1752
|
+
explanation: `The inline expectations option could not be normalized: ${error.message}`,
|
|
1753
|
+
evidencePaths: [],
|
|
1754
|
+
remediationHint: "Provide a valid expectations object before rerunning.",
|
|
1755
|
+
expectedReference: undefined,
|
|
1756
|
+
}),
|
|
1757
|
+
],
|
|
1758
|
+
};
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1279
1761
|
if (!configuredPath) {
|
|
1280
1762
|
return {
|
|
1281
1763
|
expectations: undefined,
|
|
@@ -1285,21 +1767,15 @@ const loadExpectations = async (outputDir, createFinding) => {
|
|
|
1285
1767
|
const expectationsPath = resolve(configuredPath);
|
|
1286
1768
|
try {
|
|
1287
1769
|
const rawContent = await readFile(expectationsPath, "utf-8");
|
|
1288
|
-
const parsed =
|
|
1289
|
-
const relativePath =
|
|
1290
|
-
await mkdir(join(outputDir, "manifest"), { recursive: true });
|
|
1291
|
-
await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
|
|
1770
|
+
const parsed = parseAgentExpectations(rawContent);
|
|
1771
|
+
const relativePath = await writeExpectedManifest(outputDir, parsed);
|
|
1292
1772
|
return {
|
|
1293
|
-
expectations: {
|
|
1294
|
-
|
|
1773
|
+
expectations: toLoadedExpectations({
|
|
1774
|
+
parsed,
|
|
1295
1775
|
relativePath,
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
notes: normalizeNotes(parsed.notes),
|
|
1300
|
-
expected: normalizeSelectors(parsed.expected),
|
|
1301
|
-
forbidden: normalizeSelectors(parsed.forbidden),
|
|
1302
|
-
},
|
|
1776
|
+
sourceKind: "file",
|
|
1777
|
+
sourcePath: expectationsPath,
|
|
1778
|
+
}),
|
|
1303
1779
|
findings: [],
|
|
1304
1780
|
};
|
|
1305
1781
|
}
|
|
@@ -1312,37 +1788,19 @@ const loadExpectations = async (outputDir, createFinding) => {
|
|
|
1312
1788
|
subjectType: "run",
|
|
1313
1789
|
severity: "high",
|
|
1314
1790
|
category: "bootstrap",
|
|
1315
|
-
|
|
1316
|
-
|
|
1791
|
+
impact: "reject",
|
|
1792
|
+
checkName: "expectations-invalid",
|
|
1793
|
+
title: "Expectation input is invalid",
|
|
1794
|
+
message: `Could not load expectations from ${expectationsPath}`,
|
|
1317
1795
|
explanation: `The expectations file could not be parsed as YAML or JSON: ${error.message}`,
|
|
1318
1796
|
evidencePaths: [],
|
|
1319
|
-
remediationHint: "Provide a readable YAML or JSON file
|
|
1797
|
+
remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
|
|
1320
1798
|
expectedReference: undefined,
|
|
1321
1799
|
}),
|
|
1322
1800
|
],
|
|
1323
1801
|
};
|
|
1324
1802
|
}
|
|
1325
1803
|
};
|
|
1326
|
-
const loadProjectGuide = async (outputDir) => {
|
|
1327
|
-
const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
|
|
1328
|
-
const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
|
|
1329
|
-
try {
|
|
1330
|
-
const content = await readFile(sourcePath, "utf-8");
|
|
1331
|
-
const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
|
|
1332
|
-
await mkdir(join(outputDir, "project", "docs"), { recursive: true });
|
|
1333
|
-
await writeFile(join(outputDir, relativePath), content, "utf-8");
|
|
1334
|
-
return {
|
|
1335
|
-
sourcePath,
|
|
1336
|
-
relativePath,
|
|
1337
|
-
};
|
|
1338
|
-
}
|
|
1339
|
-
catch (error) {
|
|
1340
|
-
if (error.code === "ENOENT") {
|
|
1341
|
-
return undefined;
|
|
1342
|
-
}
|
|
1343
|
-
throw error;
|
|
1344
|
-
}
|
|
1345
|
-
};
|
|
1346
1804
|
const computeScopeEvaluation = (params) => {
|
|
1347
1805
|
const { tr, environmentId, expectations } = params;
|
|
1348
1806
|
if (!expectations) {
|
|
@@ -1418,22 +1876,54 @@ const collectTestEvidencePaths = (entry) => {
|
|
|
1418
1876
|
}
|
|
1419
1877
|
return uniqueValues(paths);
|
|
1420
1878
|
};
|
|
1879
|
+
const getExpectationTargetEntries = (entries, expectations) => {
|
|
1880
|
+
if (!hasSelector(expectations.expected)) {
|
|
1881
|
+
return entries;
|
|
1882
|
+
}
|
|
1883
|
+
return entries.filter((entry) => entry.scope.scopeMatch === "match");
|
|
1884
|
+
};
|
|
1885
|
+
const currentAttemptStepSummary = (entry) => mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
|
|
1886
|
+
const nonMissingArtifacts = (entry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
|
|
1887
|
+
const formatAttachmentExpectation = (expectation) => [
|
|
1888
|
+
expectation.name ? `name=${expectation.name}` : undefined,
|
|
1889
|
+
expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
|
|
1890
|
+
]
|
|
1891
|
+
.filter(Boolean)
|
|
1892
|
+
.join(", ");
|
|
1893
|
+
const matchesAttachmentExpectation = (artifact, expectation) => {
|
|
1894
|
+
if (artifact.missing) {
|
|
1895
|
+
return false;
|
|
1896
|
+
}
|
|
1897
|
+
if (expectation.name && artifact.displayName !== expectation.name) {
|
|
1898
|
+
return false;
|
|
1899
|
+
}
|
|
1900
|
+
if (expectation.contentType && artifact.contentType !== expectation.contentType) {
|
|
1901
|
+
return false;
|
|
1902
|
+
}
|
|
1903
|
+
return true;
|
|
1904
|
+
};
|
|
1421
1905
|
const buildRunAndTestFindings = (params) => {
|
|
1422
1906
|
const { entries, expectations, globalArtifacts, modelingSummary, createFinding } = params;
|
|
1423
1907
|
const runFindings = [];
|
|
1424
1908
|
const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
|
|
1425
1909
|
const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
|
|
1426
|
-
if (entries.length === 0) {
|
|
1910
|
+
if (entries.length === 0 && expectations?.expected.testCount !== 0) {
|
|
1427
1911
|
runFindings.push(createFinding({
|
|
1428
1912
|
subject: "run",
|
|
1429
1913
|
subjectType: "run",
|
|
1430
1914
|
severity: "high",
|
|
1915
|
+
impact: "reject",
|
|
1431
1916
|
category: "bootstrap",
|
|
1432
|
-
checkName: "no-
|
|
1917
|
+
checkName: "no-tests-observed",
|
|
1918
|
+
title: "No logical tests were observed",
|
|
1433
1919
|
message: "No visible test results were found in the run.",
|
|
1434
1920
|
explanation: "The agent output was generated, but there were no visible logical test results to review.",
|
|
1435
|
-
evidencePaths: [],
|
|
1436
|
-
remediationHint: "
|
|
1921
|
+
evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
|
|
1922
|
+
remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
|
|
1923
|
+
expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
|
|
1924
|
+
observed: { test_count: 0 },
|
|
1925
|
+
action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
|
|
1926
|
+
confidence: 1,
|
|
1437
1927
|
}));
|
|
1438
1928
|
}
|
|
1439
1929
|
if (!stdoutArtifact && !stderrArtifact) {
|
|
@@ -1446,7 +1936,7 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1446
1936
|
message: "The run does not include global stdout or stderr logs.",
|
|
1447
1937
|
explanation: "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
|
|
1448
1938
|
evidencePaths: [],
|
|
1449
|
-
remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics
|
|
1939
|
+
remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
|
|
1450
1940
|
confidence: 0.9,
|
|
1451
1941
|
}));
|
|
1452
1942
|
}
|
|
@@ -1486,19 +1976,93 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1486
1976
|
const actualEnvironments = uniqueValues(entries.map(({ environmentId }) => environmentId));
|
|
1487
1977
|
if (expectations) {
|
|
1488
1978
|
const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
|
|
1979
|
+
const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
|
|
1980
|
+
const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
|
|
1981
|
+
if (recognizedControlCount(expectations) === 0) {
|
|
1982
|
+
runFindings.push(createFinding({
|
|
1983
|
+
subject: "run",
|
|
1984
|
+
subjectType: "run",
|
|
1985
|
+
severity: "high",
|
|
1986
|
+
impact: "iterate",
|
|
1987
|
+
category: "scope",
|
|
1988
|
+
checkName: "expectations-empty",
|
|
1989
|
+
title: "Expectation source did not contain recognized controls",
|
|
1990
|
+
message: "Expectation source was provided but no recognized M1 controls were parsed.",
|
|
1991
|
+
explanation: "The run can still be reviewed, but expectation precision was not requested.",
|
|
1992
|
+
evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
|
|
1993
|
+
remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
|
|
1994
|
+
observed: { recognized_control_count: 0 },
|
|
1995
|
+
action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
|
|
1996
|
+
confidence: 1,
|
|
1997
|
+
}));
|
|
1998
|
+
}
|
|
1999
|
+
if ((hasRuntimeControls && !expectations.goal) ||
|
|
2000
|
+
["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)) {
|
|
2001
|
+
runFindings.push(createFinding({
|
|
2002
|
+
subject: "run",
|
|
2003
|
+
subjectType: "run",
|
|
2004
|
+
severity: "info",
|
|
2005
|
+
impact: "advisory",
|
|
2006
|
+
category: "scope",
|
|
2007
|
+
checkName: "expectations-weak-goal",
|
|
2008
|
+
title: "Run goal is missing or too generic",
|
|
2009
|
+
message: expectations.goal
|
|
2010
|
+
? `The run goal is too generic: ${expectations.goal}`
|
|
2011
|
+
: "Runtime expectations were provided without a goal.",
|
|
2012
|
+
explanation: "The goal is intent metadata and does not change the runtime evidence.",
|
|
2013
|
+
evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
|
|
2014
|
+
remediationHint: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
|
|
2015
|
+
expected: { goal: "specific validation claim" },
|
|
2016
|
+
observed: { goal: expectations.goal ?? null },
|
|
2017
|
+
action: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
|
|
2018
|
+
confidence: 0.9,
|
|
2019
|
+
}));
|
|
2020
|
+
}
|
|
2021
|
+
if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
|
|
2022
|
+
const severity = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
|
|
2023
|
+
const impact = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
|
|
2024
|
+
runFindings.push(createFinding({
|
|
2025
|
+
subject: "run",
|
|
2026
|
+
subjectType: "run",
|
|
2027
|
+
severity,
|
|
2028
|
+
impact,
|
|
2029
|
+
category: "scope",
|
|
2030
|
+
checkName: "expected-count-mismatch",
|
|
2031
|
+
title: "Observed logical test count did not match",
|
|
2032
|
+
message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
|
|
2033
|
+
explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
|
|
2034
|
+
evidencePaths: expectations.relativePath
|
|
2035
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2036
|
+
: ["manifest/tests.jsonl"],
|
|
2037
|
+
remediationHint: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
|
|
2038
|
+
expectedReference: "expected.test_count",
|
|
2039
|
+
expected: { test_count: expectations.expected.testCount },
|
|
2040
|
+
observed: { test_count: entries.length },
|
|
2041
|
+
action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
|
|
2042
|
+
confidence: 1,
|
|
2043
|
+
}));
|
|
2044
|
+
}
|
|
1489
2045
|
expectations.expected.fullNames.forEach((fullName, index) => {
|
|
1490
2046
|
if (!allFullNames.includes(fullName)) {
|
|
1491
2047
|
runFindings.push(createFinding({
|
|
1492
2048
|
subject: "run",
|
|
1493
2049
|
subjectType: "run",
|
|
1494
2050
|
severity: "high",
|
|
2051
|
+
impact: "reject",
|
|
1495
2052
|
category: "scope",
|
|
1496
|
-
checkName: "
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
2053
|
+
checkName: "expected-test-missing",
|
|
2054
|
+
title: "Expected test was not observed",
|
|
2055
|
+
message: "The expected test did not appear in the observed logical results.",
|
|
2056
|
+
explanation: `Expected test did not run: ${fullName}`,
|
|
2057
|
+
evidencePaths: expectations.relativePath
|
|
2058
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2059
|
+
: ["manifest/tests.jsonl"],
|
|
2060
|
+
remediationHint: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
|
|
1501
2061
|
expectedReference: `expected.full_names[${index}]`,
|
|
2062
|
+
expected: { full_names: [fullName] },
|
|
2063
|
+
observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
|
|
2064
|
+
action: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
|
|
2065
|
+
confidence: 1,
|
|
1502
2066
|
}));
|
|
1503
2067
|
}
|
|
1504
2068
|
});
|
|
@@ -1507,14 +2071,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1507
2071
|
runFindings.push(createFinding({
|
|
1508
2072
|
subject: "run",
|
|
1509
2073
|
subjectType: "run",
|
|
1510
|
-
severity: "
|
|
2074
|
+
severity: "high",
|
|
2075
|
+
impact: "reject",
|
|
1511
2076
|
category: "scope",
|
|
1512
|
-
checkName: "
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
2077
|
+
checkName: "expected-prefix-missing",
|
|
2078
|
+
title: "Expected test prefix was not observed",
|
|
2079
|
+
message: `No observed test full name started with the expected prefix: ${prefix}`,
|
|
2080
|
+
explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
|
|
2081
|
+
evidencePaths: expectations.relativePath
|
|
2082
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2083
|
+
: ["manifest/tests.jsonl"],
|
|
2084
|
+
remediationHint: "Treat the run as wrong scope or missing coverage.",
|
|
1517
2085
|
expectedReference: `expected.full_name_prefixes[${index}]`,
|
|
2086
|
+
expected: { full_name_prefixes: [prefix] },
|
|
2087
|
+
observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
|
|
2088
|
+
action: "Treat the run as wrong scope or missing coverage.",
|
|
2089
|
+
confidence: 1,
|
|
1518
2090
|
}));
|
|
1519
2091
|
}
|
|
1520
2092
|
});
|
|
@@ -1523,14 +2095,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1523
2095
|
runFindings.push(createFinding({
|
|
1524
2096
|
subject: "run",
|
|
1525
2097
|
subjectType: "run",
|
|
1526
|
-
severity: "
|
|
2098
|
+
severity: "high",
|
|
2099
|
+
impact: "reject",
|
|
1527
2100
|
category: "scope",
|
|
1528
|
-
checkName: "
|
|
2101
|
+
checkName: "expected-environment-missing",
|
|
2102
|
+
title: "Expected environment was not observed",
|
|
1529
2103
|
message: `Expected environment did not appear in the run: ${environment}`,
|
|
1530
|
-
explanation: "The expectations
|
|
1531
|
-
evidencePaths: expectations.relativePath
|
|
1532
|
-
|
|
2104
|
+
explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
|
|
2105
|
+
evidencePaths: expectations.relativePath
|
|
2106
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2107
|
+
: ["manifest/tests.jsonl"],
|
|
2108
|
+
remediationHint: "Rerun in the intended environment before making environment-specific claims.",
|
|
1533
2109
|
expectedReference: `expected.environments[${index}]`,
|
|
2110
|
+
expected: { environments: [environment] },
|
|
2111
|
+
observed: { environments: actualEnvironments },
|
|
2112
|
+
action: "Rerun in the intended environment before making environment-specific claims.",
|
|
2113
|
+
confidence: 1,
|
|
1534
2114
|
}));
|
|
1535
2115
|
}
|
|
1536
2116
|
});
|
|
@@ -1540,14 +2120,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1540
2120
|
runFindings.push(createFinding({
|
|
1541
2121
|
subject: "run",
|
|
1542
2122
|
subjectType: "run",
|
|
1543
|
-
severity: "
|
|
2123
|
+
severity: "high",
|
|
2124
|
+
impact: "reject",
|
|
1544
2125
|
category: "scope",
|
|
1545
|
-
checkName: "
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
2126
|
+
checkName: "expected-label-missing",
|
|
2127
|
+
title: "Expected label was not observed",
|
|
2128
|
+
message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
|
|
2129
|
+
explanation: "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
|
|
2130
|
+
evidencePaths: expectations.relativePath
|
|
2131
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2132
|
+
: ["manifest/tests.jsonl"],
|
|
2133
|
+
remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
|
|
1550
2134
|
expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
|
|
2135
|
+
expected: { label_values: { [labelName]: values } },
|
|
2136
|
+
observed: { test_count: entries.length },
|
|
2137
|
+
action: "Fix metadata, selector, or run the correct labeled scope.",
|
|
2138
|
+
confidence: 1,
|
|
1551
2139
|
}));
|
|
1552
2140
|
}
|
|
1553
2141
|
});
|
|
@@ -1570,28 +2158,53 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1570
2158
|
});
|
|
1571
2159
|
}
|
|
1572
2160
|
}
|
|
2161
|
+
const evidenceTargetKeys = expectations
|
|
2162
|
+
? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
|
|
2163
|
+
: new Set();
|
|
1573
2164
|
for (const entry of entries) {
|
|
1574
2165
|
const currentAttempt = entry.attempts[0];
|
|
1575
2166
|
const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
|
|
1576
2167
|
const testEvidencePaths = collectTestEvidencePaths(entry);
|
|
1577
2168
|
const allStepSummary = mergeStepSummaries(entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])));
|
|
2169
|
+
const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
|
|
2170
|
+
const expectedEvidence = expectations?.evidence;
|
|
2171
|
+
const currentStepSummary = currentAttemptStepSummary(entry);
|
|
2172
|
+
const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
|
|
2173
|
+
const currentAttachments = nonMissingArtifacts(entry);
|
|
1578
2174
|
const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
|
|
1579
2175
|
const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
|
|
1580
2176
|
const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
|
|
1581
2177
|
if (entry.scope.scopeMatch === "forbidden") {
|
|
2178
|
+
const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => reference.startsWith("forbidden.label_values"));
|
|
2179
|
+
const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
|
|
1582
2180
|
entry.findings.push(createFinding({
|
|
1583
2181
|
subject: entry.key,
|
|
1584
2182
|
subjectType: "test",
|
|
1585
2183
|
severity: "high",
|
|
2184
|
+
impact: "reject",
|
|
1586
2185
|
category: "scope",
|
|
1587
|
-
checkName
|
|
1588
|
-
|
|
1589
|
-
|
|
2186
|
+
checkName,
|
|
2187
|
+
title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
|
|
2188
|
+
message: forbiddenLabelReference
|
|
2189
|
+
? "This test has a label value that was explicitly forbidden."
|
|
2190
|
+
: "This test matched a forbidden selector from the expectations.",
|
|
2191
|
+
explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
|
|
1590
2192
|
evidencePaths: expectations?.relativePath
|
|
1591
2193
|
? [entry.relativePath, expectations.relativePath]
|
|
1592
2194
|
: [entry.relativePath],
|
|
1593
|
-
remediationHint:
|
|
1594
|
-
|
|
2195
|
+
remediationHint: forbiddenLabelReference
|
|
2196
|
+
? "Treat as scope drift. Split or correct the run before using it as focused validation."
|
|
2197
|
+
: "Tighten the test selection or update the expectations before accepting the run.",
|
|
2198
|
+
expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
|
|
2199
|
+
expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
|
|
2200
|
+
observed: {
|
|
2201
|
+
full_name: entry.tr.fullName ?? entry.tr.name,
|
|
2202
|
+
labels: toLabelEntries(entry.tr.labels),
|
|
2203
|
+
},
|
|
2204
|
+
action: forbiddenLabelReference
|
|
2205
|
+
? "Treat as scope drift. Split or correct the run before using it as focused validation."
|
|
2206
|
+
: "Tighten the test selection or update the expectations before accepting the run.",
|
|
2207
|
+
confidence: 1,
|
|
1595
2208
|
}));
|
|
1596
2209
|
}
|
|
1597
2210
|
else if (entry.scope.scopeMatch === "unexpected") {
|
|
@@ -1602,11 +2215,11 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1602
2215
|
category: "scope",
|
|
1603
2216
|
checkName: "unexpected-test",
|
|
1604
2217
|
message: "This test ran outside the expected scope.",
|
|
1605
|
-
explanation: "The expectations
|
|
2218
|
+
explanation: "The expectations defined positive scope selectors, but this logical test did not match any of them.",
|
|
1606
2219
|
evidencePaths: expectations?.relativePath
|
|
1607
2220
|
? [entry.relativePath, expectations.relativePath]
|
|
1608
2221
|
: [entry.relativePath],
|
|
1609
|
-
remediationHint: "Rerun only the intended tests or broaden the expectations
|
|
2222
|
+
remediationHint: "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
|
|
1610
2223
|
}));
|
|
1611
2224
|
}
|
|
1612
2225
|
if (entry.scope.metadataMismatches.length > 0) {
|
|
@@ -1640,6 +2253,117 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1640
2253
|
confidence: 0.85,
|
|
1641
2254
|
}));
|
|
1642
2255
|
}
|
|
2256
|
+
expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
|
|
2257
|
+
if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
|
|
2258
|
+
return;
|
|
2259
|
+
}
|
|
2260
|
+
entry.findings.push(createFinding({
|
|
2261
|
+
subject: entry.key,
|
|
2262
|
+
subjectType: "test",
|
|
2263
|
+
severity: "warning",
|
|
2264
|
+
impact: "iterate",
|
|
2265
|
+
category: "evidence",
|
|
2266
|
+
checkName: "expected-step-containing-missing",
|
|
2267
|
+
title: "Expected step text was not observed",
|
|
2268
|
+
message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
|
|
2269
|
+
explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
|
|
2270
|
+
evidencePaths: expectations?.relativePath
|
|
2271
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2272
|
+
: [entry.relativePath],
|
|
2273
|
+
remediationHint: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
|
|
2274
|
+
expectedReference: `evidence.step_name_contains[${index}]`,
|
|
2275
|
+
expected: { step_name_contains: [expectedText] },
|
|
2276
|
+
observed: { steps: currentStepSummary.totalSteps, matched: false },
|
|
2277
|
+
action: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
|
|
2278
|
+
confidence: 0.9,
|
|
2279
|
+
}));
|
|
2280
|
+
});
|
|
2281
|
+
if (expectedEvidenceApplies &&
|
|
2282
|
+
expectedEvidence?.minSteps !== undefined &&
|
|
2283
|
+
currentMeaningfulSteps < expectedEvidence.minSteps) {
|
|
2284
|
+
entry.findings.push(createFinding({
|
|
2285
|
+
subject: entry.key,
|
|
2286
|
+
subjectType: "test",
|
|
2287
|
+
severity: "warning",
|
|
2288
|
+
impact: "iterate",
|
|
2289
|
+
category: "evidence",
|
|
2290
|
+
checkName: "insufficient-expected-steps",
|
|
2291
|
+
title: "Expected step count was not met",
|
|
2292
|
+
message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
|
|
2293
|
+
explanation: "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
|
|
2294
|
+
evidencePaths: expectations?.relativePath
|
|
2295
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2296
|
+
: [entry.relativePath],
|
|
2297
|
+
remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
|
|
2298
|
+
expectedReference: "evidence.min_steps",
|
|
2299
|
+
expected: { min_steps: expectedEvidence.minSteps },
|
|
2300
|
+
observed: { meaningful_steps: currentMeaningfulSteps },
|
|
2301
|
+
action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
|
|
2302
|
+
confidence: 0.9,
|
|
2303
|
+
}));
|
|
2304
|
+
}
|
|
2305
|
+
if (expectedEvidenceApplies &&
|
|
2306
|
+
expectedEvidence?.minAttachments !== undefined &&
|
|
2307
|
+
currentAttachments.length < expectedEvidence.minAttachments) {
|
|
2308
|
+
entry.findings.push(createFinding({
|
|
2309
|
+
subject: entry.key,
|
|
2310
|
+
subjectType: "test",
|
|
2311
|
+
severity: "warning",
|
|
2312
|
+
impact: "iterate",
|
|
2313
|
+
category: "evidence",
|
|
2314
|
+
checkName: "insufficient-expected-attachments",
|
|
2315
|
+
title: "Expected attachment count was not met",
|
|
2316
|
+
message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
|
|
2317
|
+
explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
|
|
2318
|
+
evidencePaths: expectations?.relativePath
|
|
2319
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2320
|
+
: [entry.relativePath],
|
|
2321
|
+
remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
|
|
2322
|
+
expectedReference: "evidence.min_attachments",
|
|
2323
|
+
expected: { min_attachments: expectedEvidence.minAttachments },
|
|
2324
|
+
observed: { attachments: currentAttachments.length },
|
|
2325
|
+
action: "Attach real runtime artifacts only when they are needed for debugging or review.",
|
|
2326
|
+
confidence: 0.9,
|
|
2327
|
+
}));
|
|
2328
|
+
}
|
|
2329
|
+
expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
|
|
2330
|
+
if (!expectedEvidenceApplies) {
|
|
2331
|
+
return;
|
|
2332
|
+
}
|
|
2333
|
+
if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
|
|
2334
|
+
return;
|
|
2335
|
+
}
|
|
2336
|
+
entry.findings.push(createFinding({
|
|
2337
|
+
subject: entry.key,
|
|
2338
|
+
subjectType: "test",
|
|
2339
|
+
severity: "warning",
|
|
2340
|
+
impact: "iterate",
|
|
2341
|
+
category: "evidence",
|
|
2342
|
+
checkName: "missing-expected-attachment",
|
|
2343
|
+
title: "Expected attachment was not observed",
|
|
2344
|
+
message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
|
|
2345
|
+
explanation: "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
|
|
2346
|
+
evidencePaths: expectations?.relativePath
|
|
2347
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2348
|
+
: [entry.relativePath],
|
|
2349
|
+
remediationHint: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
|
|
2350
|
+
expectedReference: `evidence.attachments[${index}]`,
|
|
2351
|
+
expected: {
|
|
2352
|
+
attachment: {
|
|
2353
|
+
...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
|
|
2354
|
+
...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
|
|
2355
|
+
},
|
|
2356
|
+
},
|
|
2357
|
+
observed: {
|
|
2358
|
+
attachments: currentAttachments.map((attachment) => ({
|
|
2359
|
+
name: attachment.displayName,
|
|
2360
|
+
content_type: attachment.contentType ?? null,
|
|
2361
|
+
})),
|
|
2362
|
+
},
|
|
2363
|
+
action: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
|
|
2364
|
+
confidence: 0.95,
|
|
2365
|
+
}));
|
|
2366
|
+
});
|
|
1643
2367
|
if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
|
|
1644
2368
|
entry.findings.push(createFinding({
|
|
1645
2369
|
subject: entry.key,
|
|
@@ -1928,11 +2652,17 @@ const appendJsonlLine = async (path, item) => {
|
|
|
1928
2652
|
await appendFile(path, `${JSON.stringify(item)}\n`, "utf-8");
|
|
1929
2653
|
};
|
|
1930
2654
|
const toRunManifest = (params) => {
|
|
1931
|
-
const { context, command, generatedAt, phase, expectations,
|
|
2655
|
+
const { context, command, agentContext, generatedAt, phase, expectations, snapshot, humanReport } = params;
|
|
1932
2656
|
const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
|
|
1933
2657
|
const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
|
|
1934
2658
|
const originalExitCode = snapshot.globalExitCode?.original ?? null;
|
|
1935
2659
|
const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
|
|
2660
|
+
const expectationResult = buildExpectationResult({
|
|
2661
|
+
expectations,
|
|
2662
|
+
findings: snapshot.combinedAllFindings,
|
|
2663
|
+
observedTestCount: snapshot.entries.length,
|
|
2664
|
+
modelingSummary: snapshot.modelingSummary,
|
|
2665
|
+
});
|
|
1936
2666
|
return {
|
|
1937
2667
|
schema_version: AGENT_SCHEMA_VERSION,
|
|
1938
2668
|
report_uuid: context.reportUuid,
|
|
@@ -1966,27 +2696,31 @@ const toRunManifest = (params) => {
|
|
|
1966
2696
|
findings_manifest: "manifest/findings.jsonl",
|
|
1967
2697
|
test_events_manifest: "manifest/test-events.jsonl",
|
|
1968
2698
|
expected_manifest: expectations?.relativePath ?? null,
|
|
1969
|
-
|
|
2699
|
+
human_report_manifest: humanReport ? "manifest/human-report.json" : null,
|
|
1970
2700
|
process_logs: {
|
|
1971
2701
|
stdout: stdoutArtifact?.relativePath ?? null,
|
|
1972
2702
|
stderr: stderrArtifact?.relativePath ?? null,
|
|
1973
2703
|
},
|
|
1974
2704
|
},
|
|
2705
|
+
human_report: humanReport ?? null,
|
|
1975
2706
|
expectations_present: Boolean(expectations),
|
|
2707
|
+
expectations: expectations ? toExpectationModel(expectations) : null,
|
|
2708
|
+
expectation_result: expectationResult,
|
|
1976
2709
|
check_summary: buildCheckSummary(snapshot.combinedAllFindings),
|
|
1977
2710
|
agent_context: {
|
|
1978
|
-
agent_name:
|
|
1979
|
-
loop_id:
|
|
1980
|
-
task_id:
|
|
1981
|
-
conversation_id:
|
|
2711
|
+
agent_name: agentContext.agentName ?? null,
|
|
2712
|
+
loop_id: agentContext.loopId ?? null,
|
|
2713
|
+
task_id: agentContext.taskId ?? expectations?.taskId ?? null,
|
|
2714
|
+
conversation_id: agentContext.conversationId ?? null,
|
|
1982
2715
|
},
|
|
1983
2716
|
};
|
|
1984
2717
|
};
|
|
1985
2718
|
const writeSnapshotFiles = async (params) => {
|
|
1986
2719
|
const { runtime, snapshot, phase } = params;
|
|
1987
|
-
const { outputDir, context, command, generatedAt, expectations
|
|
2720
|
+
const { outputDir, context, command, generatedAt, expectations } = runtime;
|
|
1988
2721
|
const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
|
|
1989
2722
|
const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
|
|
2723
|
+
const humanReport = await resolveHumanReportStatus(runtime.humanReport);
|
|
1990
2724
|
for (const stalePath of runtime.currentTestPaths) {
|
|
1991
2725
|
if (!nextTestPaths.has(stalePath)) {
|
|
1992
2726
|
await rm(stalePath, { force: true });
|
|
@@ -2010,12 +2744,16 @@ const writeSnapshotFiles = async (params) => {
|
|
|
2010
2744
|
writeJson(join(outputDir, "manifest", "run.json"), toRunManifest({
|
|
2011
2745
|
context,
|
|
2012
2746
|
command,
|
|
2747
|
+
agentContext: runtime.agentContext,
|
|
2013
2748
|
generatedAt,
|
|
2014
2749
|
phase,
|
|
2015
2750
|
expectations,
|
|
2016
|
-
projectGuide,
|
|
2017
2751
|
snapshot,
|
|
2752
|
+
humanReport,
|
|
2018
2753
|
})),
|
|
2754
|
+
...(humanReport
|
|
2755
|
+
? [writeJson(join(outputDir, "manifest", "human-report.json"), humanReport)]
|
|
2756
|
+
: [rm(join(outputDir, "manifest", "human-report.json"), { force: true })]),
|
|
2019
2757
|
writeJsonlSnapshot(join(outputDir, "manifest", "tests.jsonl"), snapshot.entries.map(toTestsManifestLine)),
|
|
2020
2758
|
writeJsonlSnapshot(join(outputDir, "manifest", "findings.jsonl"), snapshot.combinedAllFindings.map(toFindingManifestLine)),
|
|
2021
2759
|
writeTextAtomic(join(outputDir, "index.md"), renderIndex({
|
|
@@ -2034,8 +2772,9 @@ const writeSnapshotFiles = async (params) => {
|
|
|
2034
2772
|
globalExitCode: snapshot.globalExitCode,
|
|
2035
2773
|
qualityGateResults: snapshot.qualityGateResults,
|
|
2036
2774
|
findings: snapshot.combinedAllFindings,
|
|
2775
|
+
humanReport,
|
|
2037
2776
|
})),
|
|
2038
|
-
writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(
|
|
2777
|
+
writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
|
|
2039
2778
|
]);
|
|
2040
2779
|
};
|
|
2041
2780
|
const createBootstrapSnapshot = () => ({
|
|
@@ -2079,7 +2818,7 @@ const createBootstrapSnapshot = () => ({
|
|
|
2079
2818
|
combinedAllFindings: [],
|
|
2080
2819
|
});
|
|
2081
2820
|
const writeBootstrapFiles = async (runtime) => {
|
|
2082
|
-
await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(
|
|
2821
|
+
await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
|
|
2083
2822
|
await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
|
|
2084
2823
|
await writeSnapshotFiles({
|
|
2085
2824
|
runtime,
|
|
@@ -2104,19 +2843,54 @@ const toTestsManifestLine = (entry) => ({
|
|
|
2104
2843
|
markdown_path: entry.relativePath,
|
|
2105
2844
|
assets_dir: entry.relativeAssetDir,
|
|
2106
2845
|
});
|
|
2107
|
-
const
|
|
2108
|
-
|
|
2109
|
-
subject: finding.subject,
|
|
2110
|
-
severity: finding.severity,
|
|
2111
|
-
category: finding.category,
|
|
2112
|
-
check_name: finding.checkName,
|
|
2113
|
-
message: finding.message,
|
|
2114
|
-
explanation: finding.explanation,
|
|
2115
|
-
evidence_paths: finding.evidencePaths,
|
|
2116
|
-
remediation_hint: finding.remediationHint,
|
|
2117
|
-
expected_reference: finding.expectedReference,
|
|
2118
|
-
confidence: finding.confidence,
|
|
2846
|
+
const toFindingSubject = (finding) => ({
|
|
2847
|
+
type: finding.subjectType,
|
|
2848
|
+
...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
|
|
2119
2849
|
});
|
|
2850
|
+
const toFindingManifestLine = (finding) => {
|
|
2851
|
+
const impact = defaultImpactForFinding(finding);
|
|
2852
|
+
const confidence = finding.confidence ?? 1;
|
|
2853
|
+
return {
|
|
2854
|
+
schema_version: "allure-agent-finding/v2",
|
|
2855
|
+
check_id: finding.checkName,
|
|
2856
|
+
instance_id: finding.findingId,
|
|
2857
|
+
severity: finding.severity,
|
|
2858
|
+
impact,
|
|
2859
|
+
confidence,
|
|
2860
|
+
category: finding.category,
|
|
2861
|
+
title: finding.title ?? finding.message,
|
|
2862
|
+
message: finding.message,
|
|
2863
|
+
subject: toFindingSubject(finding),
|
|
2864
|
+
expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
|
|
2865
|
+
observed: finding.observed ?? { detail: finding.explanation },
|
|
2866
|
+
evidence: {
|
|
2867
|
+
paths: finding.evidencePaths,
|
|
2868
|
+
},
|
|
2869
|
+
action: finding.action ?? finding.remediationHint,
|
|
2870
|
+
...(finding.source ? { source: finding.source } : {}),
|
|
2871
|
+
...(finding.limits ? { limits: finding.limits } : {}),
|
|
2872
|
+
...(finding.affected ? { affected: finding.affected } : {}),
|
|
2873
|
+
...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
|
|
2874
|
+
legacy: {
|
|
2875
|
+
finding_id: finding.findingId,
|
|
2876
|
+
subject: finding.subject,
|
|
2877
|
+
subject_type: finding.subjectType,
|
|
2878
|
+
check_name: finding.checkName,
|
|
2879
|
+
explanation: finding.explanation,
|
|
2880
|
+
evidence_paths: finding.evidencePaths,
|
|
2881
|
+
remediation_hint: finding.remediationHint,
|
|
2882
|
+
expected_reference: finding.expectedReference,
|
|
2883
|
+
},
|
|
2884
|
+
finding_id: finding.findingId,
|
|
2885
|
+
subject_ref: finding.subject,
|
|
2886
|
+
subject_type: finding.subjectType,
|
|
2887
|
+
check_name: finding.checkName,
|
|
2888
|
+
explanation: finding.explanation,
|
|
2889
|
+
evidence_paths: finding.evidencePaths,
|
|
2890
|
+
remediation_hint: finding.remediationHint,
|
|
2891
|
+
expected_reference: finding.expectedReference,
|
|
2892
|
+
};
|
|
2893
|
+
};
|
|
2120
2894
|
const queueRuntimeTask = (runtime, task) => {
|
|
2121
2895
|
runtime.queue = runtime.queue
|
|
2122
2896
|
.catch(() => undefined)
|
|
@@ -2238,18 +3012,23 @@ const createRuntimeState = async (params) => {
|
|
|
2238
3012
|
await cleanupManagedEntries(outputDir);
|
|
2239
3013
|
const generatedAt = new Date().toISOString();
|
|
2240
3014
|
const createFinding = createFindingFactory();
|
|
2241
|
-
const expectationLoadResult = await loadExpectations(outputDir, createFinding);
|
|
2242
|
-
const projectGuide = await loadProjectGuide(outputDir);
|
|
3015
|
+
const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
|
|
2243
3016
|
const runtime = {
|
|
2244
3017
|
outputDir,
|
|
2245
3018
|
context,
|
|
2246
3019
|
store,
|
|
2247
3020
|
generatedAt,
|
|
2248
|
-
command:
|
|
3021
|
+
command: options.command,
|
|
3022
|
+
agentContext: {
|
|
3023
|
+
agentName: options.agentName,
|
|
3024
|
+
loopId: options.loopId,
|
|
3025
|
+
taskId: options.taskId,
|
|
3026
|
+
conversationId: options.conversationId,
|
|
3027
|
+
},
|
|
3028
|
+
humanReport: options.humanReport,
|
|
2249
3029
|
createFinding,
|
|
2250
3030
|
expectations: expectationLoadResult.expectations,
|
|
2251
3031
|
expectationLoadFindings: expectationLoadResult.findings,
|
|
2252
|
-
projectGuide,
|
|
2253
3032
|
unsubscribers: [],
|
|
2254
3033
|
queue: Promise.resolve(),
|
|
2255
3034
|
seenLogicalKeys: new Set(),
|