@allurereport/plugin-agent 3.10.0 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -79
- package/dist/capabilities.d.ts +99 -0
- package/dist/capabilities.js +173 -0
- package/dist/errors.d.ts +9 -0
- package/dist/errors.js +15 -0
- package/dist/guidance.d.ts +4 -5
- package/dist/guidance.js +194 -57
- package/dist/harness.d.ts +68 -4
- package/dist/harness.js +45 -17
- package/dist/index.d.ts +9 -1
- package/dist/index.js +9 -0
- package/dist/inline-expectations.d.ts +23 -0
- package/dist/inline-expectations.js +186 -0
- package/dist/invalid-output.d.ts +58 -0
- package/dist/invalid-output.js +238 -0
- package/dist/model.d.ts +34 -0
- package/dist/model.js +8 -1
- package/dist/paths.d.ts +3 -0
- package/dist/paths.js +10 -0
- package/dist/plugin.js +847 -136
- package/dist/query.d.ts +193 -0
- package/dist/query.js +175 -0
- package/dist/selection.d.ts +42 -0
- package/dist/selection.js +141 -0
- package/dist/state.d.ts +15 -0
- package/dist/state.js +83 -0
- package/package.json +6 -6
package/dist/plugin.js
CHANGED
|
@@ -12,20 +12,12 @@ var __classPrivateFieldSet = (this && this.__classPrivateFieldSet) || function (
|
|
|
12
12
|
var _AgentPlugin_runtime;
|
|
13
13
|
import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
|
|
14
14
|
import { basename, dirname, extname, join, relative, resolve } from "node:path";
|
|
15
|
-
import process
|
|
15
|
+
import process from "node:process";
|
|
16
16
|
import { formatDuration, isAttachment, isStep, } from "@allurereport/core-api";
|
|
17
|
-
import { parse } from "yaml";
|
|
18
17
|
import { renderAgentsGuide } from "./guidance.js";
|
|
19
|
-
|
|
20
|
-
const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
|
|
21
|
-
const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
|
|
22
|
-
const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
|
|
23
|
-
const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
|
|
24
|
-
const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
|
|
25
|
-
const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
|
|
26
|
-
const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
|
|
18
|
+
import { parseAgentExpectations } from "./model.js";
|
|
27
19
|
const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
|
|
28
|
-
const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"
|
|
20
|
+
const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"];
|
|
29
21
|
const STATUS_ORDER = {
|
|
30
22
|
failed: 0,
|
|
31
23
|
broken: 1,
|
|
@@ -126,16 +118,34 @@ const normalizeLabelValues = (value) => {
|
|
|
126
118
|
return values.length ? [[name, values]] : [];
|
|
127
119
|
}));
|
|
128
120
|
};
|
|
121
|
+
const normalizeNonNegativeInteger = (value) => typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
|
|
122
|
+
const normalizePositiveInteger = (value) => typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
|
|
129
123
|
const normalizeSelectors = (input) => ({
|
|
130
124
|
environments: normalizeStringArray(input?.environments),
|
|
131
125
|
fullNames: normalizeStringArray(input?.full_names),
|
|
132
126
|
fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
|
|
133
127
|
labelValues: normalizeLabelValues(input?.label_values),
|
|
128
|
+
testCount: normalizeNonNegativeInteger(input?.test_count),
|
|
134
129
|
});
|
|
135
130
|
const hasSelector = (selectors) => selectors.environments.length > 0 ||
|
|
136
131
|
selectors.fullNames.length > 0 ||
|
|
137
132
|
selectors.fullNamePrefixes.length > 0 ||
|
|
138
133
|
Object.keys(selectors.labelValues).length > 0;
|
|
134
|
+
const normalizeEvidenceExpectations = (input) => ({
|
|
135
|
+
minSteps: normalizePositiveInteger(input?.min_steps),
|
|
136
|
+
minAttachments: normalizePositiveInteger(input?.min_attachments),
|
|
137
|
+
stepNameContains: normalizeStringArray(input?.step_name_contains),
|
|
138
|
+
attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
|
|
139
|
+
if (!attachment || typeof attachment !== "object") {
|
|
140
|
+
return [];
|
|
141
|
+
}
|
|
142
|
+
const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
|
|
143
|
+
const contentType = typeof attachment.content_type === "string" && attachment.content_type.length > 0
|
|
144
|
+
? attachment.content_type
|
|
145
|
+
: undefined;
|
|
146
|
+
return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
|
|
147
|
+
}),
|
|
148
|
+
});
|
|
139
149
|
const normalizeNotes = (value) => {
|
|
140
150
|
if (typeof value === "string") {
|
|
141
151
|
return value.length > 0 ? [value] : [];
|
|
@@ -273,6 +283,27 @@ const mergeStepSummaries = (items) => items.reduce((acc, item) => ({
|
|
|
273
283
|
attachmentRefs: 0,
|
|
274
284
|
assertionLikeSteps: 0,
|
|
275
285
|
});
|
|
286
|
+
const collectStepNames = (steps, path = []) => {
|
|
287
|
+
const names = [];
|
|
288
|
+
for (const node of steps) {
|
|
289
|
+
if (!isStep(node)) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
const nextPath = [...path, node.name];
|
|
293
|
+
names.push({ name: node.name, path: nextPath });
|
|
294
|
+
if (node.steps.length) {
|
|
295
|
+
names.push(...collectStepNames(node.steps, nextPath));
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return names;
|
|
299
|
+
};
|
|
300
|
+
const testStepContainsText = (entry, expectedText) => {
|
|
301
|
+
const expected = normalizeStepText(expectedText);
|
|
302
|
+
if (!expected) {
|
|
303
|
+
return false;
|
|
304
|
+
}
|
|
305
|
+
return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
|
|
306
|
+
};
|
|
276
307
|
const buildAttemptSignature = (attempt) => JSON.stringify({
|
|
277
308
|
status: attempt.tr.status,
|
|
278
309
|
errorMessage: attempt.tr.error?.message,
|
|
@@ -357,6 +388,7 @@ const subtractStatusCounts = (left, right) => ({
|
|
|
357
388
|
});
|
|
358
389
|
const summarizeStatusCounts = (counts) => `${counts.total} total (${counts.failed} failed, ${counts.broken} broken, ${counts.unknown} unknown, ${counts.skipped} skipped, ${counts.passed} passed)`;
|
|
359
390
|
const normalizeLogLine = (value) => value.replace(/\s+/g, " ").trim();
|
|
391
|
+
const normalizeStepText = (value) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
|
|
360
392
|
const normalizeWarningLine = (value) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
|
|
361
393
|
const buildCountedValues = (values) => {
|
|
362
394
|
const counts = new Map();
|
|
@@ -752,10 +784,13 @@ const renderModelingSummary = (modeling) => {
|
|
|
752
784
|
return lines.join("\n");
|
|
753
785
|
};
|
|
754
786
|
const renderSelectorSummary = (title, selectors) => {
|
|
755
|
-
if (!hasSelector(selectors)) {
|
|
787
|
+
if (!hasSelector(selectors) && selectors.testCount === undefined) {
|
|
756
788
|
return `- ${title}: None`;
|
|
757
789
|
}
|
|
758
790
|
const parts = [];
|
|
791
|
+
if (selectors.testCount !== undefined) {
|
|
792
|
+
parts.push(`test count: ${selectors.testCount}`);
|
|
793
|
+
}
|
|
759
794
|
if (selectors.environments.length) {
|
|
760
795
|
parts.push(`environments: ${selectors.environments.join(", ")}`);
|
|
761
796
|
}
|
|
@@ -771,6 +806,29 @@ const renderSelectorSummary = (title, selectors) => {
|
|
|
771
806
|
}
|
|
772
807
|
return `- ${title}: ${parts.join(" | ")}`;
|
|
773
808
|
};
|
|
809
|
+
const renderEvidenceExpectationSummary = (evidence) => {
|
|
810
|
+
const parts = [];
|
|
811
|
+
if (evidence.minSteps !== undefined) {
|
|
812
|
+
parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
|
|
813
|
+
}
|
|
814
|
+
if (evidence.minAttachments !== undefined) {
|
|
815
|
+
parts.push(`attachments per test: >= ${evidence.minAttachments}`);
|
|
816
|
+
}
|
|
817
|
+
if (evidence.stepNameContains.length) {
|
|
818
|
+
parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
|
|
819
|
+
}
|
|
820
|
+
if (evidence.attachments.length) {
|
|
821
|
+
parts.push(`attachments: ${evidence.attachments
|
|
822
|
+
.map((attachment) => [
|
|
823
|
+
attachment.name ? `name=${attachment.name}` : undefined,
|
|
824
|
+
attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
|
|
825
|
+
]
|
|
826
|
+
.filter(Boolean)
|
|
827
|
+
.join(", "))
|
|
828
|
+
.join("; ")}`);
|
|
829
|
+
}
|
|
830
|
+
return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
|
|
831
|
+
};
|
|
774
832
|
const buildCheckSummary = (findings) => {
|
|
775
833
|
const countsBySeverity = {
|
|
776
834
|
high: 0,
|
|
@@ -794,6 +852,240 @@ const buildCheckSummary = (findings) => {
|
|
|
794
852
|
countsByCategory,
|
|
795
853
|
};
|
|
796
854
|
};
|
|
855
|
+
const EXPECTATION_CHECK_IDS = new Set([
|
|
856
|
+
"expectations-invalid",
|
|
857
|
+
"expectations-empty",
|
|
858
|
+
"expectations-unsupported-control",
|
|
859
|
+
"expectations-weak-goal",
|
|
860
|
+
"expected-test-missing",
|
|
861
|
+
"expected-prefix-missing",
|
|
862
|
+
"expected-label-missing",
|
|
863
|
+
"expected-environment-missing",
|
|
864
|
+
"expected-count-mismatch",
|
|
865
|
+
"expected-step-containing-missing",
|
|
866
|
+
"insufficient-expected-steps",
|
|
867
|
+
"insufficient-expected-attachments",
|
|
868
|
+
"missing-expected-attachment",
|
|
869
|
+
"forbidden-label-observed",
|
|
870
|
+
"no-tests-observed",
|
|
871
|
+
]);
|
|
872
|
+
const MISSING_EXPECTED_CHECK_IDS = new Set([
|
|
873
|
+
"expected-test-missing",
|
|
874
|
+
"expected-prefix-missing",
|
|
875
|
+
"expected-label-missing",
|
|
876
|
+
"expected-environment-missing",
|
|
877
|
+
]);
|
|
878
|
+
const EVIDENCE_MISMATCH_CHECK_IDS = new Set([
|
|
879
|
+
"expected-step-containing-missing",
|
|
880
|
+
"insufficient-expected-steps",
|
|
881
|
+
"insufficient-expected-attachments",
|
|
882
|
+
"missing-expected-attachment",
|
|
883
|
+
]);
|
|
884
|
+
const countLabelValues = (labelValues) => Object.values(labelValues).reduce((total, values) => total + values.length, 0);
|
|
885
|
+
const recognizedControlCount = (expectations) => {
|
|
886
|
+
if (!expectations) {
|
|
887
|
+
return 0;
|
|
888
|
+
}
|
|
889
|
+
return ((expectations.goal ? 1 : 0) +
|
|
890
|
+
(expectations.taskId ? 1 : 0) +
|
|
891
|
+
(expectations.expected.testCount !== undefined ? 1 : 0) +
|
|
892
|
+
expectations.expected.environments.length +
|
|
893
|
+
expectations.expected.fullNames.length +
|
|
894
|
+
expectations.expected.fullNamePrefixes.length +
|
|
895
|
+
countLabelValues(expectations.expected.labelValues) +
|
|
896
|
+
countLabelValues(expectations.forbidden.labelValues) +
|
|
897
|
+
(expectations.evidence.minSteps !== undefined ? 1 : 0) +
|
|
898
|
+
(expectations.evidence.minAttachments !== undefined ? 1 : 0) +
|
|
899
|
+
expectations.evidence.stepNameContains.length +
|
|
900
|
+
expectations.evidence.attachments.length);
|
|
901
|
+
};
|
|
902
|
+
const runtimeMatchingControlCount = (expectations) => {
|
|
903
|
+
if (!expectations) {
|
|
904
|
+
return 0;
|
|
905
|
+
}
|
|
906
|
+
return ((expectations.expected.testCount !== undefined ? 1 : 0) +
|
|
907
|
+
expectations.expected.environments.length +
|
|
908
|
+
expectations.expected.fullNames.length +
|
|
909
|
+
expectations.expected.fullNamePrefixes.length +
|
|
910
|
+
countLabelValues(expectations.expected.labelValues) +
|
|
911
|
+
countLabelValues(expectations.forbidden.labelValues) +
|
|
912
|
+
(expectations.evidence.minSteps !== undefined ? 1 : 0) +
|
|
913
|
+
(expectations.evidence.minAttachments !== undefined ? 1 : 0) +
|
|
914
|
+
expectations.evidence.stepNameContains.length +
|
|
915
|
+
expectations.evidence.attachments.length);
|
|
916
|
+
};
|
|
917
|
+
const toExpectationModel = (expectations) => {
|
|
918
|
+
const expected = {};
|
|
919
|
+
const forbidden = {};
|
|
920
|
+
const evidence = {};
|
|
921
|
+
if (expectations.expected.testCount !== undefined) {
|
|
922
|
+
expected.test_count = expectations.expected.testCount;
|
|
923
|
+
}
|
|
924
|
+
if (expectations.expected.environments.length) {
|
|
925
|
+
expected.environments = expectations.expected.environments;
|
|
926
|
+
}
|
|
927
|
+
if (expectations.expected.fullNames.length) {
|
|
928
|
+
expected.full_names = expectations.expected.fullNames;
|
|
929
|
+
}
|
|
930
|
+
if (expectations.expected.fullNamePrefixes.length) {
|
|
931
|
+
expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
|
|
932
|
+
}
|
|
933
|
+
if (Object.keys(expectations.expected.labelValues).length) {
|
|
934
|
+
expected.label_values = expectations.expected.labelValues;
|
|
935
|
+
}
|
|
936
|
+
if (Object.keys(expectations.forbidden.labelValues).length) {
|
|
937
|
+
forbidden.label_values = expectations.forbidden.labelValues;
|
|
938
|
+
}
|
|
939
|
+
if (expectations.evidence.minSteps !== undefined) {
|
|
940
|
+
evidence.min_steps = expectations.evidence.minSteps;
|
|
941
|
+
}
|
|
942
|
+
if (expectations.evidence.minAttachments !== undefined) {
|
|
943
|
+
evidence.min_attachments = expectations.evidence.minAttachments;
|
|
944
|
+
}
|
|
945
|
+
if (expectations.evidence.stepNameContains.length) {
|
|
946
|
+
evidence.step_name_contains = expectations.evidence.stepNameContains;
|
|
947
|
+
}
|
|
948
|
+
if (expectations.evidence.attachments.length) {
|
|
949
|
+
evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
|
|
950
|
+
...(attachment.name ? { name: attachment.name } : {}),
|
|
951
|
+
...(attachment.contentType ? { content_type: attachment.contentType } : {}),
|
|
952
|
+
}));
|
|
953
|
+
}
|
|
954
|
+
return {
|
|
955
|
+
...(expectations.goal ? { goal: expectations.goal } : {}),
|
|
956
|
+
...(expectations.taskId ? { task_id: expectations.taskId } : {}),
|
|
957
|
+
...(Object.keys(expected).length ? { expected } : {}),
|
|
958
|
+
...(Object.keys(forbidden).length ? { forbidden } : {}),
|
|
959
|
+
...(Object.keys(evidence).length ? { evidence } : {}),
|
|
960
|
+
...(expectations.notes.length ? { notes: expectations.notes } : {}),
|
|
961
|
+
};
|
|
962
|
+
};
|
|
963
|
+
const defaultImpactForFinding = (finding) => {
|
|
964
|
+
if (finding.impact) {
|
|
965
|
+
return finding.impact;
|
|
966
|
+
}
|
|
967
|
+
if ([
|
|
968
|
+
"expected-test-missing",
|
|
969
|
+
"expected-prefix-missing",
|
|
970
|
+
"expected-label-missing",
|
|
971
|
+
"expected-environment-missing",
|
|
972
|
+
"forbidden-label-observed",
|
|
973
|
+
"no-tests-observed",
|
|
974
|
+
].includes(finding.checkName)) {
|
|
975
|
+
return "reject";
|
|
976
|
+
}
|
|
977
|
+
if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
|
|
978
|
+
return "reject";
|
|
979
|
+
}
|
|
980
|
+
if ([
|
|
981
|
+
"expectations-invalid",
|
|
982
|
+
"expectations-empty",
|
|
983
|
+
"expectations-unsupported-control",
|
|
984
|
+
"expected-count-mismatch",
|
|
985
|
+
"expected-step-containing-missing",
|
|
986
|
+
"insufficient-expected-steps",
|
|
987
|
+
"insufficient-expected-attachments",
|
|
988
|
+
"missing-expected-attachment",
|
|
989
|
+
"runner-failures-outside-logical-results",
|
|
990
|
+
"metadata-mismatch",
|
|
991
|
+
"history-id-collision",
|
|
992
|
+
"failed-without-useful-steps",
|
|
993
|
+
"failed-without-attachments",
|
|
994
|
+
"nontrivial-run-with-empty-trace",
|
|
995
|
+
"retries-without-new-evidence",
|
|
996
|
+
"passed-without-observable-evidence",
|
|
997
|
+
].includes(finding.checkName)) {
|
|
998
|
+
return "iterate";
|
|
999
|
+
}
|
|
1000
|
+
if (finding.severity === "high") {
|
|
1001
|
+
return "iterate";
|
|
1002
|
+
}
|
|
1003
|
+
return "advisory";
|
|
1004
|
+
};
|
|
1005
|
+
const strongestImpact = (findings, fallback) => {
|
|
1006
|
+
if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
|
|
1007
|
+
return "reject";
|
|
1008
|
+
}
|
|
1009
|
+
if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
|
|
1010
|
+
return "iterate";
|
|
1011
|
+
}
|
|
1012
|
+
return fallback;
|
|
1013
|
+
};
|
|
1014
|
+
const buildExpectationResult = (params) => {
|
|
1015
|
+
const { expectations, findings, observedTestCount, modelingSummary } = params;
|
|
1016
|
+
const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
|
|
1017
|
+
const recognized = recognizedControlCount(expectations);
|
|
1018
|
+
const runtimeMatching = runtimeMatchingControlCount(expectations);
|
|
1019
|
+
const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
|
|
1020
|
+
const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
|
|
1021
|
+
const unsupportedFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-unsupported-control");
|
|
1022
|
+
const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
|
|
1023
|
+
const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
|
|
1024
|
+
let status;
|
|
1025
|
+
let impact;
|
|
1026
|
+
if (invalidFindings.length) {
|
|
1027
|
+
status = "unavailable";
|
|
1028
|
+
impact =
|
|
1029
|
+
strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
|
|
1030
|
+
}
|
|
1031
|
+
else if (emptyFindings.length || unsupportedFindings.length) {
|
|
1032
|
+
status = "unsupported";
|
|
1033
|
+
impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
|
|
1034
|
+
}
|
|
1035
|
+
else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
|
|
1036
|
+
status = "failed";
|
|
1037
|
+
impact = "reject";
|
|
1038
|
+
}
|
|
1039
|
+
else if (runtimeMatching === 0) {
|
|
1040
|
+
status = "not_requested";
|
|
1041
|
+
impact = "advisory";
|
|
1042
|
+
}
|
|
1043
|
+
else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
|
|
1044
|
+
status = "failed";
|
|
1045
|
+
impact = "reject";
|
|
1046
|
+
}
|
|
1047
|
+
else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
|
|
1048
|
+
status = "failed";
|
|
1049
|
+
impact = "iterate";
|
|
1050
|
+
}
|
|
1051
|
+
else if (modelingSummary.completeness === "partial") {
|
|
1052
|
+
status = "partial";
|
|
1053
|
+
impact = "iterate";
|
|
1054
|
+
}
|
|
1055
|
+
else {
|
|
1056
|
+
status = "matched";
|
|
1057
|
+
impact = "accept";
|
|
1058
|
+
}
|
|
1059
|
+
return {
|
|
1060
|
+
schema_version: "allure-agent-expectation-result/v1",
|
|
1061
|
+
status,
|
|
1062
|
+
impact,
|
|
1063
|
+
source: expectations
|
|
1064
|
+
? {
|
|
1065
|
+
kind: expectations.sourceKind,
|
|
1066
|
+
path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
|
|
1067
|
+
}
|
|
1068
|
+
: {
|
|
1069
|
+
kind: "none",
|
|
1070
|
+
path: null,
|
|
1071
|
+
},
|
|
1072
|
+
recognized_control_count: recognized,
|
|
1073
|
+
unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
|
|
1074
|
+
degraded_controls: [],
|
|
1075
|
+
summary: {
|
|
1076
|
+
expected_tests: expectedTests,
|
|
1077
|
+
observed_tests: observedTestCount,
|
|
1078
|
+
missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
|
|
1079
|
+
.length,
|
|
1080
|
+
forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
|
|
1081
|
+
.length,
|
|
1082
|
+
unexpected_observed: 0,
|
|
1083
|
+
evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
|
|
1084
|
+
.length,
|
|
1085
|
+
},
|
|
1086
|
+
finding_ids: expectationFindings.map((finding) => finding.findingId),
|
|
1087
|
+
};
|
|
1088
|
+
};
|
|
797
1089
|
const sortFindings = (findings) => [...findings].sort((left, right) => {
|
|
798
1090
|
const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
|
|
799
1091
|
if (bySeverity !== 0) {
|
|
@@ -818,6 +1110,28 @@ const renderFindingEvidenceLinks = (params) => {
|
|
|
818
1110
|
})
|
|
819
1111
|
.join("\n");
|
|
820
1112
|
};
|
|
1113
|
+
const formatFindingStructuredValue = (value) => {
|
|
1114
|
+
if (value === undefined || value === null) {
|
|
1115
|
+
return undefined;
|
|
1116
|
+
}
|
|
1117
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
1118
|
+
return String(value);
|
|
1119
|
+
}
|
|
1120
|
+
if (Array.isArray(value)) {
|
|
1121
|
+
return value
|
|
1122
|
+
.map((item) => formatFindingStructuredValue(item))
|
|
1123
|
+
.filter(Boolean)
|
|
1124
|
+
.join(", ");
|
|
1125
|
+
}
|
|
1126
|
+
if (typeof value === "object") {
|
|
1127
|
+
const parts = Object.entries(value).flatMap(([key, item]) => {
|
|
1128
|
+
const formatted = formatFindingStructuredValue(item);
|
|
1129
|
+
return formatted ? [`${key}: ${formatted}`] : [];
|
|
1130
|
+
});
|
|
1131
|
+
return parts.length ? parts.join("; ") : undefined;
|
|
1132
|
+
}
|
|
1133
|
+
return undefined;
|
|
1134
|
+
};
|
|
821
1135
|
const renderFindingsSection = (params) => {
|
|
822
1136
|
const { title, findings, currentFilePath, outputDir } = params;
|
|
823
1137
|
if (!findings.length) {
|
|
@@ -825,25 +1139,26 @@ const renderFindingsSection = (params) => {
|
|
|
825
1139
|
}
|
|
826
1140
|
const lines = [`## ${title}`, ""];
|
|
827
1141
|
for (const finding of sortFindings(findings)) {
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
if (finding.expectedReference) {
|
|
834
|
-
lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
|
|
835
|
-
}
|
|
836
|
-
if (finding.confidence !== undefined) {
|
|
837
|
-
lines.push(`- Confidence: ${finding.confidence}`);
|
|
838
|
-
}
|
|
839
|
-
lines.push("- Evidence:");
|
|
840
|
-
lines.push("");
|
|
841
|
-
lines.push(renderFindingEvidenceLinks({
|
|
1142
|
+
const impact = defaultImpactForFinding(finding);
|
|
1143
|
+
const expected = formatFindingStructuredValue(finding.expected) ??
|
|
1144
|
+
(finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
|
|
1145
|
+
const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
|
|
1146
|
+
const evidenceLinks = renderFindingEvidenceLinks({
|
|
842
1147
|
finding,
|
|
843
1148
|
currentFilePath,
|
|
844
1149
|
outputDir,
|
|
845
|
-
})
|
|
846
|
-
lines.push(
|
|
1150
|
+
});
|
|
1151
|
+
lines.push(`- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`);
|
|
1152
|
+
if (expected) {
|
|
1153
|
+
lines.push(` Expected: ${escapeInlineMarkdown(expected)}`);
|
|
1154
|
+
}
|
|
1155
|
+
if (observed) {
|
|
1156
|
+
lines.push(` Observed: ${escapeInlineMarkdown(observed)}`);
|
|
1157
|
+
}
|
|
1158
|
+
lines.push(` Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
|
|
1159
|
+
if (evidenceLinks !== "None") {
|
|
1160
|
+
lines.push(` Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
|
|
1161
|
+
}
|
|
847
1162
|
}
|
|
848
1163
|
return lines.join("\n").trimEnd();
|
|
849
1164
|
};
|
|
@@ -858,6 +1173,25 @@ const renderExpectationSection = (entry) => {
|
|
|
858
1173
|
];
|
|
859
1174
|
return lines.join("\n");
|
|
860
1175
|
};
|
|
1176
|
+
const renderExpectationResultSection = (params) => {
|
|
1177
|
+
const result = buildExpectationResult(params);
|
|
1178
|
+
const summary = result.summary;
|
|
1179
|
+
return [
|
|
1180
|
+
"## Expectation Result",
|
|
1181
|
+
"",
|
|
1182
|
+
`- Status: ${result.status}`,
|
|
1183
|
+
`- Impact: ${result.impact}`,
|
|
1184
|
+
`- Recognized Controls: ${result.recognized_control_count}`,
|
|
1185
|
+
`- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
|
|
1186
|
+
`- Expected Tests: ${summary.expected_tests}`,
|
|
1187
|
+
`- Observed Tests: ${summary.observed_tests}`,
|
|
1188
|
+
`- Missing Expected: ${summary.missing_expected}`,
|
|
1189
|
+
`- Forbidden Observed: ${summary.forbidden_observed}`,
|
|
1190
|
+
`- Evidence Mismatches: ${summary.evidence_mismatches}`,
|
|
1191
|
+
`- Run Manifest: [manifest/run.json](manifest/run.json)`,
|
|
1192
|
+
`- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
|
|
1193
|
+
].join("\n");
|
|
1194
|
+
};
|
|
861
1195
|
const renderRerunGuidance = (findings) => {
|
|
862
1196
|
const relevant = findings.filter(({ category }) => category === "evidence" || category === "smells" || category === "metadata");
|
|
863
1197
|
if (!relevant.length) {
|
|
@@ -875,7 +1209,7 @@ const renderRerunGuidance = (findings) => {
|
|
|
875
1209
|
if (relevant.some(({ checkName }) => checkName === "noop-dominated-steps")) {
|
|
876
1210
|
lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
|
|
877
1211
|
}
|
|
878
|
-
lines.push("- Rerun only the relevant tests with the same expectations
|
|
1212
|
+
lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
|
|
879
1213
|
return lines.join("\n");
|
|
880
1214
|
};
|
|
881
1215
|
const renderTestFile = (params) => {
|
|
@@ -1015,14 +1349,24 @@ const renderIndex = (params) => {
|
|
|
1015
1349
|
lines.push("");
|
|
1016
1350
|
lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
|
|
1017
1351
|
lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
|
|
1018
|
-
lines.push(
|
|
1352
|
+
lines.push(expectations.sourceKind === "inline"
|
|
1353
|
+
? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
|
|
1354
|
+
: `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`);
|
|
1019
1355
|
lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
|
|
1020
1356
|
lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
|
|
1357
|
+
lines.push(renderEvidenceExpectationSummary(expectations.evidence));
|
|
1021
1358
|
if (expectations.notes.length) {
|
|
1022
1359
|
lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
|
|
1023
1360
|
}
|
|
1024
1361
|
}
|
|
1025
1362
|
lines.push("");
|
|
1363
|
+
lines.push(renderExpectationResultSection({
|
|
1364
|
+
expectations,
|
|
1365
|
+
findings,
|
|
1366
|
+
observedTestCount: tests.length,
|
|
1367
|
+
modelingSummary,
|
|
1368
|
+
}));
|
|
1369
|
+
lines.push("");
|
|
1026
1370
|
lines.push("## Advisory Check Summary");
|
|
1027
1371
|
lines.push("");
|
|
1028
1372
|
lines.push(`- modeling completeness: ${modelingSummary.completeness}`);
|
|
@@ -1225,10 +1569,7 @@ const readMaterializedArtifactText = async (outputDir, artifact) => {
|
|
|
1225
1569
|
return undefined;
|
|
1226
1570
|
}
|
|
1227
1571
|
};
|
|
1228
|
-
const resolveOutputDir = (options) =>
|
|
1229
|
-
const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
|
|
1230
|
-
return outputDir ? resolve(outputDir) : undefined;
|
|
1231
|
-
};
|
|
1572
|
+
const resolveOutputDir = (options) => (options.outputDir ? resolve(options.outputDir) : undefined);
|
|
1232
1573
|
const cleanupManagedEntries = async (outputDir) => {
|
|
1233
1574
|
await Promise.all(MANAGED_ENTRIES.map(async (entry) => {
|
|
1234
1575
|
await rm(join(outputDir, entry), {
|
|
@@ -1267,15 +1608,97 @@ const createFindingFactory = () => {
|
|
|
1267
1608
|
};
|
|
1268
1609
|
};
|
|
1269
1610
|
};
|
|
1270
|
-
const
|
|
1271
|
-
const parsed = parse(rawContent);
|
|
1611
|
+
const assertExpectationsObject = (parsed) => {
|
|
1272
1612
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
1273
|
-
throw new Error("Expected
|
|
1613
|
+
throw new Error("Expected an expectations object");
|
|
1274
1614
|
}
|
|
1275
|
-
return parsed;
|
|
1276
1615
|
};
|
|
1277
|
-
const
|
|
1278
|
-
const
|
|
1616
|
+
const writeExpectedManifest = async (outputDir, parsed) => {
|
|
1617
|
+
const relativePath = normalizeMarkdownPath("manifest/expected.json");
|
|
1618
|
+
await mkdir(join(outputDir, "manifest"), { recursive: true });
|
|
1619
|
+
await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
|
|
1620
|
+
return relativePath;
|
|
1621
|
+
};
|
|
1622
|
+
const toLoadedExpectations = (params) => {
|
|
1623
|
+
const { parsed, relativePath, sourceKind, sourcePath } = params;
|
|
1624
|
+
return {
|
|
1625
|
+
sourcePath,
|
|
1626
|
+
sourceKind,
|
|
1627
|
+
relativePath,
|
|
1628
|
+
raw: parsed,
|
|
1629
|
+
goal: parsed.goal,
|
|
1630
|
+
taskId: parsed.task_id,
|
|
1631
|
+
notes: normalizeNotes(parsed.notes),
|
|
1632
|
+
expected: normalizeSelectors(parsed.expected),
|
|
1633
|
+
forbidden: normalizeSelectors(parsed.forbidden),
|
|
1634
|
+
evidence: normalizeEvidenceExpectations(parsed.evidence),
|
|
1635
|
+
};
|
|
1636
|
+
};
|
|
1637
|
+
const loadExpectations = async (outputDir, createFinding, options) => {
|
|
1638
|
+
const configuredPath = options.expectationsPath;
|
|
1639
|
+
const inlineExpectations = options.expectations;
|
|
1640
|
+
if (!configuredPath && !inlineExpectations) {
|
|
1641
|
+
return {
|
|
1642
|
+
expectations: undefined,
|
|
1643
|
+
findings: [],
|
|
1644
|
+
};
|
|
1645
|
+
}
|
|
1646
|
+
if (configuredPath && inlineExpectations) {
|
|
1647
|
+
return {
|
|
1648
|
+
expectations: undefined,
|
|
1649
|
+
findings: [
|
|
1650
|
+
createFinding({
|
|
1651
|
+
subject: "run",
|
|
1652
|
+
subjectType: "run",
|
|
1653
|
+
severity: "high",
|
|
1654
|
+
category: "bootstrap",
|
|
1655
|
+
impact: "reject",
|
|
1656
|
+
checkName: "expectations-invalid",
|
|
1657
|
+
title: "Expectation input is invalid",
|
|
1658
|
+
message: "Both file and inline agent expectations were provided.",
|
|
1659
|
+
explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
|
|
1660
|
+
evidencePaths: [],
|
|
1661
|
+
remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
|
|
1662
|
+
expectedReference: undefined,
|
|
1663
|
+
}),
|
|
1664
|
+
],
|
|
1665
|
+
};
|
|
1666
|
+
}
|
|
1667
|
+
if (inlineExpectations) {
|
|
1668
|
+
try {
|
|
1669
|
+
assertExpectationsObject(inlineExpectations);
|
|
1670
|
+
const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
|
|
1671
|
+
return {
|
|
1672
|
+
expectations: toLoadedExpectations({
|
|
1673
|
+
parsed: inlineExpectations,
|
|
1674
|
+
relativePath,
|
|
1675
|
+
sourceKind: "inline",
|
|
1676
|
+
}),
|
|
1677
|
+
findings: [],
|
|
1678
|
+
};
|
|
1679
|
+
}
|
|
1680
|
+
catch (error) {
|
|
1681
|
+
return {
|
|
1682
|
+
expectations: undefined,
|
|
1683
|
+
findings: [
|
|
1684
|
+
createFinding({
|
|
1685
|
+
subject: "run",
|
|
1686
|
+
subjectType: "run",
|
|
1687
|
+
severity: "high",
|
|
1688
|
+
category: "bootstrap",
|
|
1689
|
+
impact: "reject",
|
|
1690
|
+
checkName: "expectations-invalid",
|
|
1691
|
+
title: "Expectation input is invalid",
|
|
1692
|
+
message: "Could not load inline agent expectations",
|
|
1693
|
+
explanation: `The inline expectations option could not be normalized: ${error.message}`,
|
|
1694
|
+
evidencePaths: [],
|
|
1695
|
+
remediationHint: "Provide a valid expectations object before rerunning.",
|
|
1696
|
+
expectedReference: undefined,
|
|
1697
|
+
}),
|
|
1698
|
+
],
|
|
1699
|
+
};
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1279
1702
|
if (!configuredPath) {
|
|
1280
1703
|
return {
|
|
1281
1704
|
expectations: undefined,
|
|
@@ -1285,21 +1708,15 @@ const loadExpectations = async (outputDir, createFinding) => {
|
|
|
1285
1708
|
const expectationsPath = resolve(configuredPath);
|
|
1286
1709
|
try {
|
|
1287
1710
|
const rawContent = await readFile(expectationsPath, "utf-8");
|
|
1288
|
-
const parsed =
|
|
1289
|
-
const relativePath =
|
|
1290
|
-
await mkdir(join(outputDir, "manifest"), { recursive: true });
|
|
1291
|
-
await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
|
|
1711
|
+
const parsed = parseAgentExpectations(rawContent);
|
|
1712
|
+
const relativePath = await writeExpectedManifest(outputDir, parsed);
|
|
1292
1713
|
return {
|
|
1293
|
-
expectations: {
|
|
1294
|
-
|
|
1714
|
+
expectations: toLoadedExpectations({
|
|
1715
|
+
parsed,
|
|
1295
1716
|
relativePath,
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
notes: normalizeNotes(parsed.notes),
|
|
1300
|
-
expected: normalizeSelectors(parsed.expected),
|
|
1301
|
-
forbidden: normalizeSelectors(parsed.forbidden),
|
|
1302
|
-
},
|
|
1717
|
+
sourceKind: "file",
|
|
1718
|
+
sourcePath: expectationsPath,
|
|
1719
|
+
}),
|
|
1303
1720
|
findings: [],
|
|
1304
1721
|
};
|
|
1305
1722
|
}
|
|
@@ -1312,37 +1729,19 @@ const loadExpectations = async (outputDir, createFinding) => {
|
|
|
1312
1729
|
subjectType: "run",
|
|
1313
1730
|
severity: "high",
|
|
1314
1731
|
category: "bootstrap",
|
|
1315
|
-
|
|
1316
|
-
|
|
1732
|
+
impact: "reject",
|
|
1733
|
+
checkName: "expectations-invalid",
|
|
1734
|
+
title: "Expectation input is invalid",
|
|
1735
|
+
message: `Could not load expectations from ${expectationsPath}`,
|
|
1317
1736
|
explanation: `The expectations file could not be parsed as YAML or JSON: ${error.message}`,
|
|
1318
1737
|
evidencePaths: [],
|
|
1319
|
-
remediationHint: "Provide a readable YAML or JSON file
|
|
1738
|
+
remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
|
|
1320
1739
|
expectedReference: undefined,
|
|
1321
1740
|
}),
|
|
1322
1741
|
],
|
|
1323
1742
|
};
|
|
1324
1743
|
}
|
|
1325
1744
|
};
|
|
1326
|
-
const loadProjectGuide = async (outputDir) => {
|
|
1327
|
-
const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
|
|
1328
|
-
const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
|
|
1329
|
-
try {
|
|
1330
|
-
const content = await readFile(sourcePath, "utf-8");
|
|
1331
|
-
const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
|
|
1332
|
-
await mkdir(join(outputDir, "project", "docs"), { recursive: true });
|
|
1333
|
-
await writeFile(join(outputDir, relativePath), content, "utf-8");
|
|
1334
|
-
return {
|
|
1335
|
-
sourcePath,
|
|
1336
|
-
relativePath,
|
|
1337
|
-
};
|
|
1338
|
-
}
|
|
1339
|
-
catch (error) {
|
|
1340
|
-
if (error.code === "ENOENT") {
|
|
1341
|
-
return undefined;
|
|
1342
|
-
}
|
|
1343
|
-
throw error;
|
|
1344
|
-
}
|
|
1345
|
-
};
|
|
1346
1745
|
const computeScopeEvaluation = (params) => {
|
|
1347
1746
|
const { tr, environmentId, expectations } = params;
|
|
1348
1747
|
if (!expectations) {
|
|
@@ -1418,22 +1817,54 @@ const collectTestEvidencePaths = (entry) => {
|
|
|
1418
1817
|
}
|
|
1419
1818
|
return uniqueValues(paths);
|
|
1420
1819
|
};
|
|
1820
|
+
const getExpectationTargetEntries = (entries, expectations) => {
|
|
1821
|
+
if (!hasSelector(expectations.expected)) {
|
|
1822
|
+
return entries;
|
|
1823
|
+
}
|
|
1824
|
+
return entries.filter((entry) => entry.scope.scopeMatch === "match");
|
|
1825
|
+
};
|
|
1826
|
+
const currentAttemptStepSummary = (entry) => mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
|
|
1827
|
+
const nonMissingArtifacts = (entry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
|
|
1828
|
+
const formatAttachmentExpectation = (expectation) => [
|
|
1829
|
+
expectation.name ? `name=${expectation.name}` : undefined,
|
|
1830
|
+
expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
|
|
1831
|
+
]
|
|
1832
|
+
.filter(Boolean)
|
|
1833
|
+
.join(", ");
|
|
1834
|
+
const matchesAttachmentExpectation = (artifact, expectation) => {
|
|
1835
|
+
if (artifact.missing) {
|
|
1836
|
+
return false;
|
|
1837
|
+
}
|
|
1838
|
+
if (expectation.name && artifact.displayName !== expectation.name) {
|
|
1839
|
+
return false;
|
|
1840
|
+
}
|
|
1841
|
+
if (expectation.contentType && artifact.contentType !== expectation.contentType) {
|
|
1842
|
+
return false;
|
|
1843
|
+
}
|
|
1844
|
+
return true;
|
|
1845
|
+
};
|
|
1421
1846
|
const buildRunAndTestFindings = (params) => {
|
|
1422
1847
|
const { entries, expectations, globalArtifacts, modelingSummary, createFinding } = params;
|
|
1423
1848
|
const runFindings = [];
|
|
1424
1849
|
const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
|
|
1425
1850
|
const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
|
|
1426
|
-
if (entries.length === 0) {
|
|
1851
|
+
if (entries.length === 0 && expectations?.expected.testCount !== 0) {
|
|
1427
1852
|
runFindings.push(createFinding({
|
|
1428
1853
|
subject: "run",
|
|
1429
1854
|
subjectType: "run",
|
|
1430
1855
|
severity: "high",
|
|
1856
|
+
impact: "reject",
|
|
1431
1857
|
category: "bootstrap",
|
|
1432
|
-
checkName: "no-
|
|
1858
|
+
checkName: "no-tests-observed",
|
|
1859
|
+
title: "No logical tests were observed",
|
|
1433
1860
|
message: "No visible test results were found in the run.",
|
|
1434
1861
|
explanation: "The agent output was generated, but there were no visible logical test results to review.",
|
|
1435
|
-
evidencePaths: [],
|
|
1436
|
-
remediationHint: "
|
|
1862
|
+
evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
|
|
1863
|
+
remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
|
|
1864
|
+
expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
|
|
1865
|
+
observed: { test_count: 0 },
|
|
1866
|
+
action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
|
|
1867
|
+
confidence: 1,
|
|
1437
1868
|
}));
|
|
1438
1869
|
}
|
|
1439
1870
|
if (!stdoutArtifact && !stderrArtifact) {
|
|
@@ -1446,7 +1877,7 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1446
1877
|
message: "The run does not include global stdout or stderr logs.",
|
|
1447
1878
|
explanation: "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
|
|
1448
1879
|
evidencePaths: [],
|
|
1449
|
-
remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics
|
|
1880
|
+
remediationHint: "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
|
|
1450
1881
|
confidence: 0.9,
|
|
1451
1882
|
}));
|
|
1452
1883
|
}
|
|
@@ -1486,19 +1917,93 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1486
1917
|
const actualEnvironments = uniqueValues(entries.map(({ environmentId }) => environmentId));
|
|
1487
1918
|
if (expectations) {
|
|
1488
1919
|
const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
|
|
1920
|
+
const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
|
|
1921
|
+
const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
|
|
1922
|
+
if (recognizedControlCount(expectations) === 0) {
|
|
1923
|
+
runFindings.push(createFinding({
|
|
1924
|
+
subject: "run",
|
|
1925
|
+
subjectType: "run",
|
|
1926
|
+
severity: "high",
|
|
1927
|
+
impact: "iterate",
|
|
1928
|
+
category: "scope",
|
|
1929
|
+
checkName: "expectations-empty",
|
|
1930
|
+
title: "Expectation source did not contain recognized controls",
|
|
1931
|
+
message: "Expectation source was provided but no recognized M1 controls were parsed.",
|
|
1932
|
+
explanation: "The run can still be reviewed, but expectation precision was not requested.",
|
|
1933
|
+
evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
|
|
1934
|
+
remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
|
|
1935
|
+
observed: { recognized_control_count: 0 },
|
|
1936
|
+
action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
|
|
1937
|
+
confidence: 1,
|
|
1938
|
+
}));
|
|
1939
|
+
}
|
|
1940
|
+
if ((hasRuntimeControls && !expectations.goal) ||
|
|
1941
|
+
["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)) {
|
|
1942
|
+
runFindings.push(createFinding({
|
|
1943
|
+
subject: "run",
|
|
1944
|
+
subjectType: "run",
|
|
1945
|
+
severity: "info",
|
|
1946
|
+
impact: "advisory",
|
|
1947
|
+
category: "scope",
|
|
1948
|
+
checkName: "expectations-weak-goal",
|
|
1949
|
+
title: "Run goal is missing or too generic",
|
|
1950
|
+
message: expectations.goal
|
|
1951
|
+
? `The run goal is too generic: ${expectations.goal}`
|
|
1952
|
+
: "Runtime expectations were provided without a goal.",
|
|
1953
|
+
explanation: "The goal is intent metadata and does not change the runtime evidence.",
|
|
1954
|
+
evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
|
|
1955
|
+
remediationHint: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
|
|
1956
|
+
expected: { goal: "specific validation claim" },
|
|
1957
|
+
observed: { goal: expectations.goal ?? null },
|
|
1958
|
+
action: "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
|
|
1959
|
+
confidence: 0.9,
|
|
1960
|
+
}));
|
|
1961
|
+
}
|
|
1962
|
+
if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
|
|
1963
|
+
const severity = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
|
|
1964
|
+
const impact = expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
|
|
1965
|
+
runFindings.push(createFinding({
|
|
1966
|
+
subject: "run",
|
|
1967
|
+
subjectType: "run",
|
|
1968
|
+
severity,
|
|
1969
|
+
impact,
|
|
1970
|
+
category: "scope",
|
|
1971
|
+
checkName: "expected-count-mismatch",
|
|
1972
|
+
title: "Observed logical test count did not match",
|
|
1973
|
+
message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
|
|
1974
|
+
explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
|
|
1975
|
+
evidencePaths: expectations.relativePath
|
|
1976
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
1977
|
+
: ["manifest/tests.jsonl"],
|
|
1978
|
+
remediationHint: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
|
|
1979
|
+
expectedReference: "expected.test_count",
|
|
1980
|
+
expected: { test_count: expectations.expected.testCount },
|
|
1981
|
+
observed: { test_count: entries.length },
|
|
1982
|
+
action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
|
|
1983
|
+
confidence: 1,
|
|
1984
|
+
}));
|
|
1985
|
+
}
|
|
1489
1986
|
expectations.expected.fullNames.forEach((fullName, index) => {
|
|
1490
1987
|
if (!allFullNames.includes(fullName)) {
|
|
1491
1988
|
runFindings.push(createFinding({
|
|
1492
1989
|
subject: "run",
|
|
1493
1990
|
subjectType: "run",
|
|
1494
1991
|
severity: "high",
|
|
1992
|
+
impact: "reject",
|
|
1495
1993
|
category: "scope",
|
|
1496
|
-
checkName: "
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1994
|
+
checkName: "expected-test-missing",
|
|
1995
|
+
title: "Expected test was not observed",
|
|
1996
|
+
message: "The expected test did not appear in the observed logical results.",
|
|
1997
|
+
explanation: `Expected test did not run: ${fullName}`,
|
|
1998
|
+
evidencePaths: expectations.relativePath
|
|
1999
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2000
|
+
: ["manifest/tests.jsonl"],
|
|
2001
|
+
remediationHint: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
|
|
1501
2002
|
expectedReference: `expected.full_names[${index}]`,
|
|
2003
|
+
expected: { full_names: [fullName] },
|
|
2004
|
+
observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
|
|
2005
|
+
action: "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
|
|
2006
|
+
confidence: 1,
|
|
1502
2007
|
}));
|
|
1503
2008
|
}
|
|
1504
2009
|
});
|
|
@@ -1507,14 +2012,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1507
2012
|
runFindings.push(createFinding({
|
|
1508
2013
|
subject: "run",
|
|
1509
2014
|
subjectType: "run",
|
|
1510
|
-
severity: "
|
|
2015
|
+
severity: "high",
|
|
2016
|
+
impact: "reject",
|
|
1511
2017
|
category: "scope",
|
|
1512
|
-
checkName: "
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
2018
|
+
checkName: "expected-prefix-missing",
|
|
2019
|
+
title: "Expected test prefix was not observed",
|
|
2020
|
+
message: `No observed test full name started with the expected prefix: ${prefix}`,
|
|
2021
|
+
explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
|
|
2022
|
+
evidencePaths: expectations.relativePath
|
|
2023
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2024
|
+
: ["manifest/tests.jsonl"],
|
|
2025
|
+
remediationHint: "Treat the run as wrong scope or missing coverage.",
|
|
1517
2026
|
expectedReference: `expected.full_name_prefixes[${index}]`,
|
|
2027
|
+
expected: { full_name_prefixes: [prefix] },
|
|
2028
|
+
observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
|
|
2029
|
+
action: "Treat the run as wrong scope or missing coverage.",
|
|
2030
|
+
confidence: 1,
|
|
1518
2031
|
}));
|
|
1519
2032
|
}
|
|
1520
2033
|
});
|
|
@@ -1523,14 +2036,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1523
2036
|
runFindings.push(createFinding({
|
|
1524
2037
|
subject: "run",
|
|
1525
2038
|
subjectType: "run",
|
|
1526
|
-
severity: "
|
|
2039
|
+
severity: "high",
|
|
2040
|
+
impact: "reject",
|
|
1527
2041
|
category: "scope",
|
|
1528
|
-
checkName: "
|
|
2042
|
+
checkName: "expected-environment-missing",
|
|
2043
|
+
title: "Expected environment was not observed",
|
|
1529
2044
|
message: `Expected environment did not appear in the run: ${environment}`,
|
|
1530
|
-
explanation: "The expectations
|
|
1531
|
-
evidencePaths: expectations.relativePath
|
|
1532
|
-
|
|
2045
|
+
explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
|
|
2046
|
+
evidencePaths: expectations.relativePath
|
|
2047
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2048
|
+
: ["manifest/tests.jsonl"],
|
|
2049
|
+
remediationHint: "Rerun in the intended environment before making environment-specific claims.",
|
|
1533
2050
|
expectedReference: `expected.environments[${index}]`,
|
|
2051
|
+
expected: { environments: [environment] },
|
|
2052
|
+
observed: { environments: actualEnvironments },
|
|
2053
|
+
action: "Rerun in the intended environment before making environment-specific claims.",
|
|
2054
|
+
confidence: 1,
|
|
1534
2055
|
}));
|
|
1535
2056
|
}
|
|
1536
2057
|
});
|
|
@@ -1540,14 +2061,22 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1540
2061
|
runFindings.push(createFinding({
|
|
1541
2062
|
subject: "run",
|
|
1542
2063
|
subjectType: "run",
|
|
1543
|
-
severity: "
|
|
2064
|
+
severity: "high",
|
|
2065
|
+
impact: "reject",
|
|
1544
2066
|
category: "scope",
|
|
1545
|
-
checkName: "
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
2067
|
+
checkName: "expected-label-missing",
|
|
2068
|
+
title: "Expected label was not observed",
|
|
2069
|
+
message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
|
|
2070
|
+
explanation: "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
|
|
2071
|
+
evidencePaths: expectations.relativePath
|
|
2072
|
+
? [expectations.relativePath, "manifest/tests.jsonl"]
|
|
2073
|
+
: ["manifest/tests.jsonl"],
|
|
2074
|
+
remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
|
|
1550
2075
|
expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
|
|
2076
|
+
expected: { label_values: { [labelName]: values } },
|
|
2077
|
+
observed: { test_count: entries.length },
|
|
2078
|
+
action: "Fix metadata, selector, or run the correct labeled scope.",
|
|
2079
|
+
confidence: 1,
|
|
1551
2080
|
}));
|
|
1552
2081
|
}
|
|
1553
2082
|
});
|
|
@@ -1570,28 +2099,53 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1570
2099
|
});
|
|
1571
2100
|
}
|
|
1572
2101
|
}
|
|
2102
|
+
const evidenceTargetKeys = expectations
|
|
2103
|
+
? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
|
|
2104
|
+
: new Set();
|
|
1573
2105
|
for (const entry of entries) {
|
|
1574
2106
|
const currentAttempt = entry.attempts[0];
|
|
1575
2107
|
const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
|
|
1576
2108
|
const testEvidencePaths = collectTestEvidencePaths(entry);
|
|
1577
2109
|
const allStepSummary = mergeStepSummaries(entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])));
|
|
2110
|
+
const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
|
|
2111
|
+
const expectedEvidence = expectations?.evidence;
|
|
2112
|
+
const currentStepSummary = currentAttemptStepSummary(entry);
|
|
2113
|
+
const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
|
|
2114
|
+
const currentAttachments = nonMissingArtifacts(entry);
|
|
1578
2115
|
const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
|
|
1579
2116
|
const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
|
|
1580
2117
|
const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
|
|
1581
2118
|
if (entry.scope.scopeMatch === "forbidden") {
|
|
2119
|
+
const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => reference.startsWith("forbidden.label_values"));
|
|
2120
|
+
const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
|
|
1582
2121
|
entry.findings.push(createFinding({
|
|
1583
2122
|
subject: entry.key,
|
|
1584
2123
|
subjectType: "test",
|
|
1585
2124
|
severity: "high",
|
|
2125
|
+
impact: "reject",
|
|
1586
2126
|
category: "scope",
|
|
1587
|
-
checkName
|
|
1588
|
-
|
|
1589
|
-
|
|
2127
|
+
checkName,
|
|
2128
|
+
title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
|
|
2129
|
+
message: forbiddenLabelReference
|
|
2130
|
+
? "This test has a label value that was explicitly forbidden."
|
|
2131
|
+
: "This test matched a forbidden selector from the expectations.",
|
|
2132
|
+
explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
|
|
1590
2133
|
evidencePaths: expectations?.relativePath
|
|
1591
2134
|
? [entry.relativePath, expectations.relativePath]
|
|
1592
2135
|
: [entry.relativePath],
|
|
1593
|
-
remediationHint:
|
|
1594
|
-
|
|
2136
|
+
remediationHint: forbiddenLabelReference
|
|
2137
|
+
? "Treat as scope drift. Split or correct the run before using it as focused validation."
|
|
2138
|
+
: "Tighten the test selection or update the expectations before accepting the run.",
|
|
2139
|
+
expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
|
|
2140
|
+
expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
|
|
2141
|
+
observed: {
|
|
2142
|
+
full_name: entry.tr.fullName ?? entry.tr.name,
|
|
2143
|
+
labels: toLabelEntries(entry.tr.labels),
|
|
2144
|
+
},
|
|
2145
|
+
action: forbiddenLabelReference
|
|
2146
|
+
? "Treat as scope drift. Split or correct the run before using it as focused validation."
|
|
2147
|
+
: "Tighten the test selection or update the expectations before accepting the run.",
|
|
2148
|
+
confidence: 1,
|
|
1595
2149
|
}));
|
|
1596
2150
|
}
|
|
1597
2151
|
else if (entry.scope.scopeMatch === "unexpected") {
|
|
@@ -1602,11 +2156,11 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1602
2156
|
category: "scope",
|
|
1603
2157
|
checkName: "unexpected-test",
|
|
1604
2158
|
message: "This test ran outside the expected scope.",
|
|
1605
|
-
explanation: "The expectations
|
|
2159
|
+
explanation: "The expectations defined positive scope selectors, but this logical test did not match any of them.",
|
|
1606
2160
|
evidencePaths: expectations?.relativePath
|
|
1607
2161
|
? [entry.relativePath, expectations.relativePath]
|
|
1608
2162
|
: [entry.relativePath],
|
|
1609
|
-
remediationHint: "Rerun only the intended tests or broaden the expectations
|
|
2163
|
+
remediationHint: "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
|
|
1610
2164
|
}));
|
|
1611
2165
|
}
|
|
1612
2166
|
if (entry.scope.metadataMismatches.length > 0) {
|
|
@@ -1640,6 +2194,117 @@ const buildRunAndTestFindings = (params) => {
|
|
|
1640
2194
|
confidence: 0.85,
|
|
1641
2195
|
}));
|
|
1642
2196
|
}
|
|
2197
|
+
expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
|
|
2198
|
+
if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
|
|
2199
|
+
return;
|
|
2200
|
+
}
|
|
2201
|
+
entry.findings.push(createFinding({
|
|
2202
|
+
subject: entry.key,
|
|
2203
|
+
subjectType: "test",
|
|
2204
|
+
severity: "warning",
|
|
2205
|
+
impact: "iterate",
|
|
2206
|
+
category: "evidence",
|
|
2207
|
+
checkName: "expected-step-containing-missing",
|
|
2208
|
+
title: "Expected step text was not observed",
|
|
2209
|
+
message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
|
|
2210
|
+
explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
|
|
2211
|
+
evidencePaths: expectations?.relativePath
|
|
2212
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2213
|
+
: [entry.relativePath],
|
|
2214
|
+
remediationHint: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
|
|
2215
|
+
expectedReference: `evidence.step_name_contains[${index}]`,
|
|
2216
|
+
expected: { step_name_contains: [expectedText] },
|
|
2217
|
+
observed: { steps: currentStepSummary.totalSteps, matched: false },
|
|
2218
|
+
action: "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
|
|
2219
|
+
confidence: 0.9,
|
|
2220
|
+
}));
|
|
2221
|
+
});
|
|
2222
|
+
if (expectedEvidenceApplies &&
|
|
2223
|
+
expectedEvidence?.minSteps !== undefined &&
|
|
2224
|
+
currentMeaningfulSteps < expectedEvidence.minSteps) {
|
|
2225
|
+
entry.findings.push(createFinding({
|
|
2226
|
+
subject: entry.key,
|
|
2227
|
+
subjectType: "test",
|
|
2228
|
+
severity: "warning",
|
|
2229
|
+
impact: "iterate",
|
|
2230
|
+
category: "evidence",
|
|
2231
|
+
checkName: "insufficient-expected-steps",
|
|
2232
|
+
title: "Expected step count was not met",
|
|
2233
|
+
message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
|
|
2234
|
+
explanation: "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
|
|
2235
|
+
evidencePaths: expectations?.relativePath
|
|
2236
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2237
|
+
: [entry.relativePath],
|
|
2238
|
+
remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
|
|
2239
|
+
expectedReference: "evidence.min_steps",
|
|
2240
|
+
expected: { min_steps: expectedEvidence.minSteps },
|
|
2241
|
+
observed: { meaningful_steps: currentMeaningfulSteps },
|
|
2242
|
+
action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
|
|
2243
|
+
confidence: 0.9,
|
|
2244
|
+
}));
|
|
2245
|
+
}
|
|
2246
|
+
if (expectedEvidenceApplies &&
|
|
2247
|
+
expectedEvidence?.minAttachments !== undefined &&
|
|
2248
|
+
currentAttachments.length < expectedEvidence.minAttachments) {
|
|
2249
|
+
entry.findings.push(createFinding({
|
|
2250
|
+
subject: entry.key,
|
|
2251
|
+
subjectType: "test",
|
|
2252
|
+
severity: "warning",
|
|
2253
|
+
impact: "iterate",
|
|
2254
|
+
category: "evidence",
|
|
2255
|
+
checkName: "insufficient-expected-attachments",
|
|
2256
|
+
title: "Expected attachment count was not met",
|
|
2257
|
+
message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
|
|
2258
|
+
explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
|
|
2259
|
+
evidencePaths: expectations?.relativePath
|
|
2260
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2261
|
+
: [entry.relativePath],
|
|
2262
|
+
remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
|
|
2263
|
+
expectedReference: "evidence.min_attachments",
|
|
2264
|
+
expected: { min_attachments: expectedEvidence.minAttachments },
|
|
2265
|
+
observed: { attachments: currentAttachments.length },
|
|
2266
|
+
action: "Attach real runtime artifacts only when they are needed for debugging or review.",
|
|
2267
|
+
confidence: 0.9,
|
|
2268
|
+
}));
|
|
2269
|
+
}
|
|
2270
|
+
expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
|
|
2271
|
+
if (!expectedEvidenceApplies) {
|
|
2272
|
+
return;
|
|
2273
|
+
}
|
|
2274
|
+
if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
|
|
2275
|
+
return;
|
|
2276
|
+
}
|
|
2277
|
+
entry.findings.push(createFinding({
|
|
2278
|
+
subject: entry.key,
|
|
2279
|
+
subjectType: "test",
|
|
2280
|
+
severity: "warning",
|
|
2281
|
+
impact: "iterate",
|
|
2282
|
+
category: "evidence",
|
|
2283
|
+
checkName: "missing-expected-attachment",
|
|
2284
|
+
title: "Expected attachment was not observed",
|
|
2285
|
+
message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
|
|
2286
|
+
explanation: "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
|
|
2287
|
+
evidencePaths: expectations?.relativePath
|
|
2288
|
+
? [entry.relativePath, expectations.relativePath]
|
|
2289
|
+
: [entry.relativePath],
|
|
2290
|
+
remediationHint: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
|
|
2291
|
+
expectedReference: `evidence.attachments[${index}]`,
|
|
2292
|
+
expected: {
|
|
2293
|
+
attachment: {
|
|
2294
|
+
...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
|
|
2295
|
+
...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
|
|
2296
|
+
},
|
|
2297
|
+
},
|
|
2298
|
+
observed: {
|
|
2299
|
+
attachments: currentAttachments.map((attachment) => ({
|
|
2300
|
+
name: attachment.displayName,
|
|
2301
|
+
content_type: attachment.contentType ?? null,
|
|
2302
|
+
})),
|
|
2303
|
+
},
|
|
2304
|
+
action: "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
|
|
2305
|
+
confidence: 0.95,
|
|
2306
|
+
}));
|
|
2307
|
+
});
|
|
1643
2308
|
if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
|
|
1644
2309
|
entry.findings.push(createFinding({
|
|
1645
2310
|
subject: entry.key,
|
|
@@ -1928,11 +2593,17 @@ const appendJsonlLine = async (path, item) => {
|
|
|
1928
2593
|
await appendFile(path, `${JSON.stringify(item)}\n`, "utf-8");
|
|
1929
2594
|
};
|
|
1930
2595
|
const toRunManifest = (params) => {
|
|
1931
|
-
const { context, command, generatedAt, phase, expectations,
|
|
2596
|
+
const { context, command, agentContext, generatedAt, phase, expectations, snapshot } = params;
|
|
1932
2597
|
const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
|
|
1933
2598
|
const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
|
|
1934
2599
|
const originalExitCode = snapshot.globalExitCode?.original ?? null;
|
|
1935
2600
|
const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
|
|
2601
|
+
const expectationResult = buildExpectationResult({
|
|
2602
|
+
expectations,
|
|
2603
|
+
findings: snapshot.combinedAllFindings,
|
|
2604
|
+
observedTestCount: snapshot.entries.length,
|
|
2605
|
+
modelingSummary: snapshot.modelingSummary,
|
|
2606
|
+
});
|
|
1936
2607
|
return {
|
|
1937
2608
|
schema_version: AGENT_SCHEMA_VERSION,
|
|
1938
2609
|
report_uuid: context.reportUuid,
|
|
@@ -1966,25 +2637,26 @@ const toRunManifest = (params) => {
|
|
|
1966
2637
|
findings_manifest: "manifest/findings.jsonl",
|
|
1967
2638
|
test_events_manifest: "manifest/test-events.jsonl",
|
|
1968
2639
|
expected_manifest: expectations?.relativePath ?? null,
|
|
1969
|
-
project_guide: projectGuide?.relativePath ?? null,
|
|
1970
2640
|
process_logs: {
|
|
1971
2641
|
stdout: stdoutArtifact?.relativePath ?? null,
|
|
1972
2642
|
stderr: stderrArtifact?.relativePath ?? null,
|
|
1973
2643
|
},
|
|
1974
2644
|
},
|
|
1975
2645
|
expectations_present: Boolean(expectations),
|
|
2646
|
+
expectations: expectations ? toExpectationModel(expectations) : null,
|
|
2647
|
+
expectation_result: expectationResult,
|
|
1976
2648
|
check_summary: buildCheckSummary(snapshot.combinedAllFindings),
|
|
1977
2649
|
agent_context: {
|
|
1978
|
-
agent_name:
|
|
1979
|
-
loop_id:
|
|
1980
|
-
task_id:
|
|
1981
|
-
conversation_id:
|
|
2650
|
+
agent_name: agentContext.agentName ?? null,
|
|
2651
|
+
loop_id: agentContext.loopId ?? null,
|
|
2652
|
+
task_id: agentContext.taskId ?? expectations?.taskId ?? null,
|
|
2653
|
+
conversation_id: agentContext.conversationId ?? null,
|
|
1982
2654
|
},
|
|
1983
2655
|
};
|
|
1984
2656
|
};
|
|
1985
2657
|
const writeSnapshotFiles = async (params) => {
|
|
1986
2658
|
const { runtime, snapshot, phase } = params;
|
|
1987
|
-
const { outputDir, context, command, generatedAt, expectations
|
|
2659
|
+
const { outputDir, context, command, generatedAt, expectations } = runtime;
|
|
1988
2660
|
const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
|
|
1989
2661
|
const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
|
|
1990
2662
|
for (const stalePath of runtime.currentTestPaths) {
|
|
@@ -2010,10 +2682,10 @@ const writeSnapshotFiles = async (params) => {
|
|
|
2010
2682
|
writeJson(join(outputDir, "manifest", "run.json"), toRunManifest({
|
|
2011
2683
|
context,
|
|
2012
2684
|
command,
|
|
2685
|
+
agentContext: runtime.agentContext,
|
|
2013
2686
|
generatedAt,
|
|
2014
2687
|
phase,
|
|
2015
2688
|
expectations,
|
|
2016
|
-
projectGuide,
|
|
2017
2689
|
snapshot,
|
|
2018
2690
|
})),
|
|
2019
2691
|
writeJsonlSnapshot(join(outputDir, "manifest", "tests.jsonl"), snapshot.entries.map(toTestsManifestLine)),
|
|
@@ -2035,7 +2707,7 @@ const writeSnapshotFiles = async (params) => {
|
|
|
2035
2707
|
qualityGateResults: snapshot.qualityGateResults,
|
|
2036
2708
|
findings: snapshot.combinedAllFindings,
|
|
2037
2709
|
})),
|
|
2038
|
-
writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(
|
|
2710
|
+
writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
|
|
2039
2711
|
]);
|
|
2040
2712
|
};
|
|
2041
2713
|
const createBootstrapSnapshot = () => ({
|
|
@@ -2079,7 +2751,7 @@ const createBootstrapSnapshot = () => ({
|
|
|
2079
2751
|
combinedAllFindings: [],
|
|
2080
2752
|
});
|
|
2081
2753
|
const writeBootstrapFiles = async (runtime) => {
|
|
2082
|
-
await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(
|
|
2754
|
+
await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
|
|
2083
2755
|
await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
|
|
2084
2756
|
await writeSnapshotFiles({
|
|
2085
2757
|
runtime,
|
|
@@ -2104,19 +2776,54 @@ const toTestsManifestLine = (entry) => ({
|
|
|
2104
2776
|
markdown_path: entry.relativePath,
|
|
2105
2777
|
assets_dir: entry.relativeAssetDir,
|
|
2106
2778
|
});
|
|
2107
|
-
const
|
|
2108
|
-
|
|
2109
|
-
subject: finding.subject,
|
|
2110
|
-
severity: finding.severity,
|
|
2111
|
-
category: finding.category,
|
|
2112
|
-
check_name: finding.checkName,
|
|
2113
|
-
message: finding.message,
|
|
2114
|
-
explanation: finding.explanation,
|
|
2115
|
-
evidence_paths: finding.evidencePaths,
|
|
2116
|
-
remediation_hint: finding.remediationHint,
|
|
2117
|
-
expected_reference: finding.expectedReference,
|
|
2118
|
-
confidence: finding.confidence,
|
|
2779
|
+
const toFindingSubject = (finding) => ({
|
|
2780
|
+
type: finding.subjectType,
|
|
2781
|
+
...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
|
|
2119
2782
|
});
|
|
2783
|
+
const toFindingManifestLine = (finding) => {
|
|
2784
|
+
const impact = defaultImpactForFinding(finding);
|
|
2785
|
+
const confidence = finding.confidence ?? 1;
|
|
2786
|
+
return {
|
|
2787
|
+
schema_version: "allure-agent-finding/v2",
|
|
2788
|
+
check_id: finding.checkName,
|
|
2789
|
+
instance_id: finding.findingId,
|
|
2790
|
+
severity: finding.severity,
|
|
2791
|
+
impact,
|
|
2792
|
+
confidence,
|
|
2793
|
+
category: finding.category,
|
|
2794
|
+
title: finding.title ?? finding.message,
|
|
2795
|
+
message: finding.message,
|
|
2796
|
+
subject: toFindingSubject(finding),
|
|
2797
|
+
expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
|
|
2798
|
+
observed: finding.observed ?? { detail: finding.explanation },
|
|
2799
|
+
evidence: {
|
|
2800
|
+
paths: finding.evidencePaths,
|
|
2801
|
+
},
|
|
2802
|
+
action: finding.action ?? finding.remediationHint,
|
|
2803
|
+
...(finding.source ? { source: finding.source } : {}),
|
|
2804
|
+
...(finding.limits ? { limits: finding.limits } : {}),
|
|
2805
|
+
...(finding.affected ? { affected: finding.affected } : {}),
|
|
2806
|
+
...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
|
|
2807
|
+
legacy: {
|
|
2808
|
+
finding_id: finding.findingId,
|
|
2809
|
+
subject: finding.subject,
|
|
2810
|
+
subject_type: finding.subjectType,
|
|
2811
|
+
check_name: finding.checkName,
|
|
2812
|
+
explanation: finding.explanation,
|
|
2813
|
+
evidence_paths: finding.evidencePaths,
|
|
2814
|
+
remediation_hint: finding.remediationHint,
|
|
2815
|
+
expected_reference: finding.expectedReference,
|
|
2816
|
+
},
|
|
2817
|
+
finding_id: finding.findingId,
|
|
2818
|
+
subject_ref: finding.subject,
|
|
2819
|
+
subject_type: finding.subjectType,
|
|
2820
|
+
check_name: finding.checkName,
|
|
2821
|
+
explanation: finding.explanation,
|
|
2822
|
+
evidence_paths: finding.evidencePaths,
|
|
2823
|
+
remediation_hint: finding.remediationHint,
|
|
2824
|
+
expected_reference: finding.expectedReference,
|
|
2825
|
+
};
|
|
2826
|
+
};
|
|
2120
2827
|
const queueRuntimeTask = (runtime, task) => {
|
|
2121
2828
|
runtime.queue = runtime.queue
|
|
2122
2829
|
.catch(() => undefined)
|
|
@@ -2238,18 +2945,22 @@ const createRuntimeState = async (params) => {
|
|
|
2238
2945
|
await cleanupManagedEntries(outputDir);
|
|
2239
2946
|
const generatedAt = new Date().toISOString();
|
|
2240
2947
|
const createFinding = createFindingFactory();
|
|
2241
|
-
const expectationLoadResult = await loadExpectations(outputDir, createFinding);
|
|
2242
|
-
const projectGuide = await loadProjectGuide(outputDir);
|
|
2948
|
+
const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
|
|
2243
2949
|
const runtime = {
|
|
2244
2950
|
outputDir,
|
|
2245
2951
|
context,
|
|
2246
2952
|
store,
|
|
2247
2953
|
generatedAt,
|
|
2248
|
-
command:
|
|
2954
|
+
command: options.command,
|
|
2955
|
+
agentContext: {
|
|
2956
|
+
agentName: options.agentName,
|
|
2957
|
+
loopId: options.loopId,
|
|
2958
|
+
taskId: options.taskId,
|
|
2959
|
+
conversationId: options.conversationId,
|
|
2960
|
+
},
|
|
2249
2961
|
createFinding,
|
|
2250
2962
|
expectations: expectationLoadResult.expectations,
|
|
2251
2963
|
expectationLoadFindings: expectationLoadResult.findings,
|
|
2252
|
-
projectGuide,
|
|
2253
2964
|
unsubscribers: [],
|
|
2254
2965
|
queue: Promise.resolve(),
|
|
2255
2966
|
seenLogicalKeys: new Set(),
|