@allurereport/plugin-agent 3.10.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -77
- package/dist/capabilities.d.ts +127 -0
- package/dist/capabilities.js +266 -0
- package/dist/errors.d.ts +9 -0
- package/dist/errors.js +15 -0
- package/dist/guidance.d.ts +4 -5
- package/dist/guidance.js +223 -60
- package/dist/harness.d.ts +72 -4
- package/dist/harness.js +49 -17
- package/dist/index.d.ts +9 -1
- package/dist/index.js +9 -0
- package/dist/inline-expectations.d.ts +23 -0
- package/dist/inline-expectations.js +186 -0
- package/dist/invalid-output.d.ts +58 -0
- package/dist/invalid-output.js +238 -0
- package/dist/model.d.ts +59 -0
- package/dist/model.js +8 -1
- package/dist/paths.d.ts +3 -0
- package/dist/paths.js +10 -0
- package/dist/plugin.js +916 -137
- package/dist/query.d.ts +195 -0
- package/dist/query.js +177 -0
- package/dist/selection.d.ts +42 -0
- package/dist/selection.js +141 -0
- package/dist/state.d.ts +56 -0
- package/dist/state.js +277 -0
- package/dist/utils.d.ts +17 -0
- package/dist/utils.js +171 -0
- package/package.json +6 -6
package/dist/guidance.js
CHANGED
|
@@ -1,10 +1,25 @@
|
|
|
1
1
|
export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
|
|
2
|
-
"
|
|
2
|
+
"expectations-invalid": {
|
|
3
3
|
category: "bootstrap-allure",
|
|
4
|
-
title: "Repair the expectations
|
|
5
|
-
guidance: "Regenerate a valid YAML
|
|
4
|
+
title: "Repair the expectations input",
|
|
5
|
+
guidance: "Regenerate valid inline expectations or a valid YAML/JSON expectations file before the next iteration.",
|
|
6
6
|
},
|
|
7
|
-
"
|
|
7
|
+
"expectations-empty": {
|
|
8
|
+
category: "narrow-test-scope",
|
|
9
|
+
title: "Add recognized expectation controls",
|
|
10
|
+
guidance: "Rerun with supported M1 expectation controls or omit expectations for an intentionally broad review.",
|
|
11
|
+
},
|
|
12
|
+
"expectations-unsupported-control": {
|
|
13
|
+
category: "review-manually",
|
|
14
|
+
title: "Use supported expectation controls",
|
|
15
|
+
guidance: "Replace unsupported controls with supported M1 flags or report weaker checking explicitly.",
|
|
16
|
+
},
|
|
17
|
+
"expectations-weak-goal": {
|
|
18
|
+
category: "review-manually",
|
|
19
|
+
title: "Use a more specific goal next time",
|
|
20
|
+
guidance: "Base conclusions on observed evidence and rerun with a specific goal when expectation precision matters.",
|
|
21
|
+
},
|
|
22
|
+
"no-tests-observed": {
|
|
8
23
|
category: "bootstrap-allure",
|
|
9
24
|
title: "Restore Allure result generation",
|
|
10
25
|
guidance: "Make sure the test command emits Allure results before rerunning the enrichment loop.",
|
|
@@ -24,22 +39,27 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
|
|
|
24
39
|
title: "Call out partial runtime modeling",
|
|
25
40
|
guidance: "Compare run statistics with the logical test files and document any skipped or non-passed results that were not rendered.",
|
|
26
41
|
},
|
|
27
|
-
"
|
|
42
|
+
"expected-test-missing": {
|
|
28
43
|
category: "narrow-test-scope",
|
|
29
44
|
title: "Bring the intended test back into scope",
|
|
30
45
|
guidance: "Regenerate expectations and rerun only the planned tests or selectors.",
|
|
31
46
|
},
|
|
32
|
-
"
|
|
47
|
+
"expected-count-mismatch": {
|
|
48
|
+
category: "narrow-test-scope",
|
|
49
|
+
title: "Restore the expected visible test count",
|
|
50
|
+
guidance: "Check the command, selectors, and agent modeling before accepting the run.",
|
|
51
|
+
},
|
|
52
|
+
"expected-prefix-missing": {
|
|
33
53
|
category: "narrow-test-scope",
|
|
34
54
|
title: "Restore the intended name-prefix scope",
|
|
35
55
|
guidance: "Check the selector and rerun only the feature slice that should have matched it.",
|
|
36
56
|
},
|
|
37
|
-
"
|
|
57
|
+
"expected-environment-missing": {
|
|
38
58
|
category: "narrow-test-scope",
|
|
39
59
|
title: "Rerun the intended environment",
|
|
40
60
|
guidance: "Constrain the rerun to the expected environment before accepting the result.",
|
|
41
61
|
},
|
|
42
|
-
"
|
|
62
|
+
"expected-label-missing": {
|
|
43
63
|
category: "repair-test-metadata",
|
|
44
64
|
title: "Add the minimal missing scope label",
|
|
45
65
|
guidance: "Only add the labels required by the expectations selector; do not inflate metadata.",
|
|
@@ -49,6 +69,11 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
|
|
|
49
69
|
title: "Remove unrelated environments from the rerun",
|
|
50
70
|
guidance: "Tighten the rerun selector so unrelated environments do not appear in agent output.",
|
|
51
71
|
},
|
|
72
|
+
"forbidden-label-observed": {
|
|
73
|
+
category: "narrow-test-scope",
|
|
74
|
+
title: "Stop forbidden labeled tests from running",
|
|
75
|
+
guidance: "Reject the run, narrow the rerun scope, and keep the forbidden label expectation.",
|
|
76
|
+
},
|
|
52
77
|
"forbidden-selector-match": {
|
|
53
78
|
category: "narrow-test-scope",
|
|
54
79
|
title: "Stop forbidden tests from running",
|
|
@@ -74,6 +99,26 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
|
|
|
74
99
|
title: "Add meaningful setup, action, and assertion steps",
|
|
75
100
|
guidance: "Wrap only real actions, state transitions, and checks in Allure steps before rerunning.",
|
|
76
101
|
},
|
|
102
|
+
"expected-step-containing-missing": {
|
|
103
|
+
category: "add-meaningful-steps",
|
|
104
|
+
title: "Add or correct the expected step text",
|
|
105
|
+
guidance: "Expose the expected runtime check as a test-scoped Allure step, or correct the expectation wording.",
|
|
106
|
+
},
|
|
107
|
+
"insufficient-expected-steps": {
|
|
108
|
+
category: "add-meaningful-steps",
|
|
109
|
+
title: "Add the expected meaningful steps",
|
|
110
|
+
guidance: "Expose real setup, action, state transition, and assertion steps without adding filler.",
|
|
111
|
+
},
|
|
112
|
+
"insufficient-expected-attachments": {
|
|
113
|
+
category: "add-test-attachments",
|
|
114
|
+
title: "Add the expected runtime attachments",
|
|
115
|
+
guidance: "Attach focused runtime evidence such as payloads, logs, screenshots, diffs, or traces.",
|
|
116
|
+
},
|
|
117
|
+
"missing-expected-attachment": {
|
|
118
|
+
category: "add-test-attachments",
|
|
119
|
+
title: "Add the required attachment",
|
|
120
|
+
guidance: "Attach the requested runtime artifact near the relevant action or assertion.",
|
|
121
|
+
},
|
|
77
122
|
"failed-without-attachments": {
|
|
78
123
|
category: "add-test-attachments",
|
|
79
124
|
title: "Attach focused runtime evidence near the failure",
|
|
@@ -110,18 +155,150 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
|
|
|
110
155
|
guidance: "Add a few real verification steps or attachments so the passing test shows what it proved.",
|
|
111
156
|
},
|
|
112
157
|
};
|
|
113
|
-
export const
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
158
|
+
export const AGENT_WORKFLOWS_MARKDOWN = `Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done.
|
|
159
|
+
|
|
160
|
+
### Validate A Change
|
|
161
|
+
|
|
162
|
+
Use when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.
|
|
163
|
+
|
|
164
|
+
Commands:
|
|
165
|
+
|
|
166
|
+
- \`allure agent --goal <text> --expect-* -- <command>\`
|
|
167
|
+
|
|
168
|
+
Done when:
|
|
169
|
+
|
|
170
|
+
- the expected scope ran and no forbidden scope appeared
|
|
171
|
+
- \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
|
|
172
|
+
- the \`index.md\` path was reported
|
|
173
|
+
- the changed package build and required static checks passed when this repository guide requires them
|
|
174
|
+
|
|
175
|
+
### Add Or Update Tests
|
|
176
|
+
|
|
177
|
+
Use when creating or changing tests for a feature, fix, or behavior gap.
|
|
178
|
+
|
|
179
|
+
Commands:
|
|
180
|
+
|
|
181
|
+
- \`allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>" --expect-label name=value --expect-step-containing <text> -- <command>\`
|
|
182
|
+
|
|
183
|
+
Done when:
|
|
184
|
+
|
|
185
|
+
- the tests prove the intended behavior rather than only touching the code path
|
|
186
|
+
- scope expectations match the intended feature, issue, or package slice
|
|
187
|
+
- each expected test has enough steps or attachments for a reviewer to understand what happened
|
|
188
|
+
- weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope
|
|
189
|
+
|
|
190
|
+
### Review Existing Coverage
|
|
191
|
+
|
|
192
|
+
Use when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first.
|
|
193
|
+
|
|
194
|
+
Commands:
|
|
195
|
+
|
|
196
|
+
- one scoped \`allure agent --goal <text> --expect-* -- <command>\` run per review group
|
|
197
|
+
- \`allure agent inspect --goal <text> --expect-* <allure-results-dir-or-glob>\` or \`--dump <archive-or-glob>\` when the evidence already exists as local results or CI dump artifacts
|
|
198
|
+
|
|
199
|
+
Done when:
|
|
200
|
+
|
|
201
|
+
- the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run
|
|
202
|
+
- each group has expectations that describe the intended scope
|
|
203
|
+
- runtime artifacts are reviewed before source-only coverage conclusions
|
|
204
|
+
- uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary
|
|
205
|
+
|
|
206
|
+
### Review Existing Evidence
|
|
207
|
+
|
|
208
|
+
Use when CI has already produced dump archives or local Allure results already exist and you need agent-readable review artifacts without rerunning tests locally.
|
|
209
|
+
|
|
210
|
+
Commands:
|
|
211
|
+
|
|
212
|
+
- \`allure agent inspect <allure-results-dir-or-glob>\`
|
|
213
|
+
- \`allure agent inspect --dump <archive-or-glob>\`
|
|
214
|
+
- \`allure agent inspect --dump <linux.zip> --dump <macos.zip>\`
|
|
215
|
+
- \`allure agent inspect --goal <text> --expect-* --dump <archive-or-glob>\`
|
|
216
|
+
|
|
217
|
+
Done when:
|
|
218
|
+
|
|
219
|
+
- all intended result directories or dump artifacts were downloaded or present and matched by the command
|
|
220
|
+
- \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
|
|
221
|
+
- the review calls out that inspect-derived output cannot add missing live process logs or rerun-time evidence unless those artifacts were captured in the results or dumps
|
|
222
|
+
- any environment-specific gaps between CI jobs are explicit
|
|
223
|
+
|
|
224
|
+
### Triage Failures
|
|
225
|
+
|
|
226
|
+
Use when tests failed, broke, or runner output does not match agent artifacts.
|
|
227
|
+
|
|
228
|
+
Commands:
|
|
229
|
+
|
|
230
|
+
- \`allure agent latest\`
|
|
231
|
+
- \`allure agent --rerun-latest --rerun-preset failed -- <command>\`
|
|
232
|
+
- \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\`
|
|
233
|
+
|
|
234
|
+
Done when:
|
|
235
|
+
|
|
236
|
+
- failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly
|
|
237
|
+
- \`artifacts/global/stderr.txt\` and global errors were checked when failures are missing from \`manifest/tests.jsonl\`
|
|
238
|
+
- reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan
|
|
239
|
+
|
|
240
|
+
### Rerun A Prior Scope
|
|
241
|
+
|
|
242
|
+
Use when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused.
|
|
243
|
+
|
|
244
|
+
Commands:
|
|
245
|
+
|
|
246
|
+
- \`allure agent select --latest [--preset review|failed|unsuccessful|all]\`
|
|
247
|
+
- \`allure agent select --from <output-dir> [--environment <id>] [--label name=value]\`
|
|
248
|
+
- \`allure agent --rerun-latest -- <command>\`
|
|
249
|
+
- \`allure agent --rerun-from <output-dir> -- <command>\`
|
|
250
|
+
|
|
251
|
+
Done when:
|
|
252
|
+
|
|
253
|
+
- the rerun scope comes from Allure testplan support
|
|
254
|
+
- \`--rerun-preset\`, \`--rerun-environment\`, or \`--rerun-label\` filters explain any narrowed selection
|
|
255
|
+
- manual test names are used only as a fallback when testplan support is unavailable
|
|
256
|
+
- the rerun output is reviewed before making a new conclusion
|
|
257
|
+
|
|
258
|
+
### Improve Evidence Quality
|
|
259
|
+
|
|
260
|
+
Use when tests pass or fail but the runtime story is too weak to review.
|
|
261
|
+
|
|
262
|
+
Commands:
|
|
263
|
+
|
|
264
|
+
- \`allure agent --expect-step-containing <text> --expect-steps <count> --expect-attachments <count> -- <command>\`
|
|
265
|
+
- \`allure agent --expect-attachment <name|name=value|content-type=value> -- <command>\`
|
|
266
|
+
|
|
267
|
+
Done when:
|
|
268
|
+
|
|
269
|
+
- steps describe real setup, actions, state transitions, or assertions
|
|
270
|
+
- attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces
|
|
271
|
+
- placeholder steps, generic \`"passed"\` attachments, and other dummy evidence are removed
|
|
272
|
+
- the same intended scope was rerun and no high-confidence evidence findings remain
|
|
273
|
+
|
|
274
|
+
### Recover Or Diagnose Agent Mode
|
|
275
|
+
|
|
276
|
+
Use when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox.
|
|
277
|
+
|
|
278
|
+
Commands:
|
|
279
|
+
|
|
280
|
+
- \`allure --version\`
|
|
281
|
+
- \`allure agent capabilities --json\`
|
|
282
|
+
- \`allure agent --help\`
|
|
283
|
+
- \`allure agent latest\`
|
|
284
|
+
- \`allure agent state-dir\`
|
|
285
|
+
- \`ALLURE_AGENT_STATE_DIR=<dir>\`
|
|
286
|
+
|
|
287
|
+
Done when:
|
|
288
|
+
|
|
289
|
+
- supported local commands and flags are known from capabilities or help output
|
|
290
|
+
- the output directory, \`index.md\` path, or state directory is identified, or the reason it is unavailable is documented
|
|
291
|
+
- console-only conclusions stay provisional until agent-mode artifacts are available`;
|
|
292
|
+
export const AGENT_COMMAND_TASK_MAP = [
|
|
293
|
+
"`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.",
|
|
294
|
+
"`allure agent --goal ... -- <command>`: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone. The default `--report auto` may also write a human-readable `awesome/index.html` for small runs.",
|
|
295
|
+
"`allure agent inspect <allure-results-dir-or-glob>` / `allure agent inspect --dump <archive-or-glob>`: existing evidence review loop. Use after downloading one or more dump archives or when Allure results already exist and you need agent-readable markdown, manifests, and optional human report output without rerunning tests locally. Repeat `--dump` to merge multiple environments or jobs.",
|
|
296
|
+
"`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, a user asks for the human-readable report from the last run, or a follow-up task needs prior output before selecting or rerunning tests.",
|
|
297
|
+
"`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where per-project run registries are stored.",
|
|
298
|
+
"`allure agent query --latest summary|tests|findings|test` / `allure agent query --from <output-dir> ...`: output inspection loop. Use when you need a focused JSON summary, human-report status, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.",
|
|
299
|
+
"`allure agent select --latest` / `allure agent select --from <output-dir>`: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.",
|
|
300
|
+
"`allure agent --rerun-latest` / `allure agent --rerun-from <output-dir>`: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.",
|
|
301
|
+
"`ALLURE_AGENT_STATE_DIR=<dir>`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable.",
|
|
125
302
|
];
|
|
126
303
|
export const AGENT_VERIFICATION_RULES = [
|
|
127
304
|
"If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.",
|
|
@@ -130,16 +307,6 @@ export const AGENT_VERIFICATION_RULES = [
|
|
|
130
307
|
"After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace <package-name> build`).",
|
|
131
308
|
"After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.",
|
|
132
309
|
];
|
|
133
|
-
export const AGENT_SMALL_TEST_CHANGE_WORKFLOW = [
|
|
134
|
-
"Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.",
|
|
135
|
-
"Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.",
|
|
136
|
-
"Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim.",
|
|
137
|
-
];
|
|
138
|
-
export const AGENT_COVERAGE_REVIEW_WORKFLOW = [
|
|
139
|
-
"Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.",
|
|
140
|
-
"Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.",
|
|
141
|
-
"Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad.",
|
|
142
|
-
];
|
|
143
310
|
export const AGENT_TEST_ENRICHMENT_BEST_PRACTICES = [
|
|
144
311
|
"Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.",
|
|
145
312
|
"Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.",
|
|
@@ -174,14 +341,20 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
|
|
|
174
341
|
- Use \`allure agent\` for smoke checks too, even when the change is small or mechanical.
|
|
175
342
|
- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
|
|
176
343
|
- After each agent-mode test run, print the \`index.md\` path from that run's output directory so users can open the run overview quickly.
|
|
177
|
-
- Use \`
|
|
178
|
-
-
|
|
179
|
-
- Use \`allure agent
|
|
180
|
-
- Use \`allure agent
|
|
344
|
+
- Use \`allure agent latest\` to print the newest output directory and \`index.md\` path when \`--output\` was omitted.
|
|
345
|
+
- When a user asks for the human-readable report from the last run, use \`allure agent latest\` first if the output directory is unknown, then check \`manifest/human-report.json\`; when its status is \`generated\`, use the path recorded there, usually \`awesome/index.html\`.
|
|
346
|
+
- Use \`allure agent capabilities --json\` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text.
|
|
347
|
+
- Use \`allure agent state-dir\` to inspect the shared state directory that stores per-project run registries.
|
|
348
|
+
- Use \`allure agent latest\`, \`state-dir\`, \`query\`, \`select\`, and \`--rerun-*\` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.
|
|
349
|
+
- Use \`allure agent inspect <allure-results-dir-or-glob>\` or \`allure agent inspect --dump <archive-or-glob>\` when you need agent-readable markdown and manifests from existing Allure results without rerunning tests locally; repeat \`--dump\` for multiple CI jobs or environments.
|
|
350
|
+
- Use \`--report auto|off|awesome|config\` to control human report output. The default \`auto\` mode writes \`awesome/index.html\` for 1000 or fewer stored visible logical results and records generated, skipped, disabled, or failed status in \`manifest/human-report.json\`.
|
|
351
|
+
- Use \`allure agent query --latest summary|tests|findings|test\` or \`allure agent query --from <output-dir> ...\` to inspect prior output as focused JSON before manually opening raw manifests.
|
|
352
|
+
- Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning; add \`--output <file>\` when you want the CLI to write the plan and print a short selection summary.
|
|
181
353
|
- Use \`allure agent --rerun-latest -- <command>\` or \`allure agent --rerun-from <output-dir> -- <command>\` to rerun only the selected tests.
|
|
354
|
+
- When rerunning previous failures, use \`allure agent --rerun-latest --rerun-preset failed -- <command>\` or \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\` instead of manually rebuilding runner-specific test names.
|
|
182
355
|
- Use \`--rerun-preset review|failed|unsuccessful|all\`, repeated \`--rerun-environment <id>\`, and repeated \`--rerun-label name=value\` when you need a narrower rerun selection from the previous output.
|
|
183
|
-
- Use \`ALLURE_AGENT_STATE_DIR\` when you need to override
|
|
184
|
-
-
|
|
356
|
+
- Use \`ALLURE_AGENT_STATE_DIR\` when you need to override the shared agent state directory for \`latest\`, \`state-dir\`, or \`--rerun-latest\`.
|
|
357
|
+
- Prefer inline \`allure agent\` expectation flags such as \`--goal\`, \`--expect-tests\`, \`--expect-test\`, \`--expect-label\`, and \`--expect-step-containing\`; use \`--expectations <file>\` only when flags become awkward.
|
|
185
358
|
- Run tests with \`allure agent\` and review \`manifest/run.json\`, \`manifest/test-events.jsonl\`, \`index.md\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\`.
|
|
186
359
|
- Enrich only the intended tests. Add real steps for real setup, actions, and assertions.
|
|
187
360
|
- Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.
|
|
@@ -189,26 +362,19 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
|
|
|
189
362
|
- Instrument stable helpers when several call sites need the same evidence. For example, teach \`runCommand\` to emit a step instead of wrapping every caller.
|
|
190
363
|
- Reject the rerun if scope drifts, evidence stays weak, or high-confidence noop-style findings remain.`;
|
|
191
364
|
const renderBullets = (items) => items.map((item) => `- ${item}`).join("\n");
|
|
192
|
-
const renderNumbered = (items) => items.map((item, index) => `${index + 1}. ${item}`).join("\n");
|
|
193
365
|
const renderRemediationGuide = () => Object.entries(ENRICHMENT_ACTIONS_BY_CHECK_NAME)
|
|
194
366
|
.map(([checkName, action]) => `- \`${checkName}\`: ${action.title}. ${action.guidance}`)
|
|
195
367
|
.join("\n");
|
|
196
|
-
export const renderAgentsGuide = (
|
|
368
|
+
export const renderAgentsGuide = () => `# AGENTS Guide
|
|
197
369
|
|
|
198
370
|
## Reading Order
|
|
199
371
|
|
|
200
|
-
|
|
201
|
-
? `1. Read [project guidance](${projectGuidePath}) first for repo-specific testing conventions and loop expectations.
|
|
202
|
-
2. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
|
|
203
|
-
3. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
|
|
204
|
-
4. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
|
|
205
|
-
5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
|
|
206
|
-
6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`
|
|
207
|
-
: `1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
|
|
372
|
+
1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
|
|
208
373
|
2. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
|
|
209
374
|
3. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
|
|
210
|
-
4.
|
|
211
|
-
5.
|
|
375
|
+
4. If a human-readable report is needed, read \`manifest/human-report.json\`; when status is \`generated\`, open the recorded path such as \`awesome/index.html\`.
|
|
376
|
+
5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
|
|
377
|
+
6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.
|
|
212
378
|
|
|
213
379
|
## Directory Contract
|
|
214
380
|
|
|
@@ -217,28 +383,25 @@ ${projectGuidePath
|
|
|
217
383
|
- \`manifest/test-events.jsonl\` is the append-only live event stream for machine consumers during the run.
|
|
218
384
|
- \`manifest/tests.jsonl\` contains one logical test summary per line.
|
|
219
385
|
- \`manifest/findings.jsonl\` contains one advisory finding per line.
|
|
220
|
-
- \`manifest/expected.json\`
|
|
221
|
-
- \`
|
|
386
|
+
- \`manifest/expected.json\` contains normalized expectations from inline flags or \`--expectations <file>\` when provided.
|
|
387
|
+
- \`manifest/human-report.json\` records whether a human-readable report was generated, skipped, disabled, or failed.
|
|
388
|
+
- \`awesome/index.html\` is the default single-file human report path when \`--report auto\` or \`--report awesome\` generates it.
|
|
222
389
|
- \`tests/<environment>/<slug>.md\` contains one logical test per file.
|
|
223
390
|
- Retries from the same run are nested inside the same logical test file.
|
|
224
391
|
- \`tests/<environment>/<slug>.assets/\` contains copied attachments for that logical test.
|
|
225
392
|
- \`artifacts/global/\` contains copied global artifacts for the whole run.
|
|
226
393
|
|
|
227
|
-
##
|
|
394
|
+
## Command Task Map
|
|
228
395
|
|
|
229
|
-
${
|
|
396
|
+
${renderBullets(AGENT_COMMAND_TASK_MAP)}
|
|
230
397
|
|
|
231
|
-
##
|
|
398
|
+
## Agent Workflows
|
|
232
399
|
|
|
233
|
-
${
|
|
400
|
+
${AGENT_WORKFLOWS_MARKDOWN}
|
|
234
401
|
|
|
235
|
-
##
|
|
236
|
-
|
|
237
|
-
${renderNumbered(AGENT_SMALL_TEST_CHANGE_WORKFLOW)}
|
|
238
|
-
|
|
239
|
-
## Coverage Review Workflow
|
|
402
|
+
## Verification Standard
|
|
240
403
|
|
|
241
|
-
${
|
|
404
|
+
${renderBullets(AGENT_VERIFICATION_RULES)}
|
|
242
405
|
|
|
243
406
|
## Test Enrichment Best Practices
|
|
244
407
|
|
package/dist/harness.d.ts
CHANGED
|
@@ -1,22 +1,37 @@
|
|
|
1
1
|
import type { Statistic, TestLabel, TestStatus } from "@allurereport/core-api";
|
|
2
2
|
import { type EnrichmentActionCategory } from "./guidance.js";
|
|
3
|
+
import type { AgentHumanReportStatus } from "./model.js";
|
|
3
4
|
export type AgentFindingSeverity = "info" | "warning" | "high";
|
|
4
5
|
export type AgentFindingCategory = "bootstrap" | "scope" | "metadata" | "evidence" | "smells";
|
|
5
6
|
export type AgentScopeMatch = "match" | "unexpected" | "forbidden" | "unknown";
|
|
6
7
|
export type AgentAcceptanceStatus = "accept" | "iterate" | "reject";
|
|
7
8
|
export type AgentAcceptanceImpact = "advisory" | "iterate" | "reject";
|
|
9
|
+
export type AgentExpectationResultStatus = "matched" | "failed" | "partial" | "degraded" | "unsupported" | "unavailable" | "not_requested";
|
|
10
|
+
export type AgentExpectationResultImpact = "accept" | "reject" | "iterate" | "advisory";
|
|
8
11
|
export type AgentEnrichmentActionCategory = EnrichmentActionCategory;
|
|
9
12
|
export type AgentExpectationSelector = {
|
|
10
13
|
environments?: string[];
|
|
11
14
|
full_names?: string[];
|
|
12
15
|
full_name_prefixes?: string[];
|
|
13
16
|
label_values?: Record<string, string | string[]>;
|
|
17
|
+
test_count?: number;
|
|
18
|
+
};
|
|
19
|
+
export type AgentEvidenceExpectations = {
|
|
20
|
+
required?: boolean;
|
|
21
|
+
min_steps?: number;
|
|
22
|
+
min_attachments?: number;
|
|
23
|
+
step_name_contains?: string[];
|
|
24
|
+
attachments?: Array<{
|
|
25
|
+
name?: string;
|
|
26
|
+
content_type?: string;
|
|
27
|
+
}>;
|
|
14
28
|
};
|
|
15
29
|
export type AgentExpectations = {
|
|
16
30
|
goal?: string;
|
|
17
31
|
task_id?: string;
|
|
18
32
|
expected?: AgentExpectationSelector;
|
|
19
33
|
forbidden?: AgentExpectationSelector;
|
|
34
|
+
evidence?: AgentEvidenceExpectations;
|
|
20
35
|
notes?: string[];
|
|
21
36
|
};
|
|
22
37
|
export type AgentHarnessScopeInput = {
|
|
@@ -97,12 +112,13 @@ export type AgentRunManifest = {
|
|
|
97
112
|
findings_manifest: string;
|
|
98
113
|
test_events_manifest?: string;
|
|
99
114
|
expected_manifest: string | null;
|
|
100
|
-
|
|
115
|
+
human_report_manifest?: string | null;
|
|
101
116
|
process_logs: {
|
|
102
117
|
stdout: string | null;
|
|
103
118
|
stderr: string | null;
|
|
104
119
|
};
|
|
105
120
|
};
|
|
121
|
+
human_report?: AgentHumanReportStatus | null;
|
|
106
122
|
modeling?: {
|
|
107
123
|
completeness: "complete" | "partial";
|
|
108
124
|
reasons: string[];
|
|
@@ -148,6 +164,28 @@ export type AgentRunManifest = {
|
|
|
148
164
|
};
|
|
149
165
|
};
|
|
150
166
|
expectations_present: boolean;
|
|
167
|
+
expectations: AgentExpectations | null;
|
|
168
|
+
expectation_result: {
|
|
169
|
+
schema_version: "allure-agent-expectation-result/v1";
|
|
170
|
+
status: AgentExpectationResultStatus;
|
|
171
|
+
impact: AgentExpectationResultImpact;
|
|
172
|
+
source: {
|
|
173
|
+
kind: "inline" | "file" | "none";
|
|
174
|
+
path: string | null;
|
|
175
|
+
};
|
|
176
|
+
recognized_control_count: number;
|
|
177
|
+
unsupported_controls: string[];
|
|
178
|
+
degraded_controls: string[];
|
|
179
|
+
summary: {
|
|
180
|
+
expected_tests: number;
|
|
181
|
+
observed_tests: number;
|
|
182
|
+
missing_expected: number;
|
|
183
|
+
forbidden_observed: number;
|
|
184
|
+
unexpected_observed: number;
|
|
185
|
+
evidence_mismatches: number;
|
|
186
|
+
};
|
|
187
|
+
finding_ids: string[];
|
|
188
|
+
};
|
|
151
189
|
check_summary: {
|
|
152
190
|
total: number;
|
|
153
191
|
countsBySeverity: Record<AgentFindingSeverity, number>;
|
|
@@ -183,17 +221,46 @@ export type AgentTestManifestLine = {
|
|
|
183
221
|
assets_dir: string;
|
|
184
222
|
};
|
|
185
223
|
export type AgentFindingManifestLine = {
|
|
224
|
+
schema_version?: "allure-agent-finding/v2";
|
|
225
|
+
check_id?: string;
|
|
226
|
+
instance_id?: string;
|
|
186
227
|
finding_id: string;
|
|
187
|
-
subject: string
|
|
228
|
+
subject: string | {
|
|
229
|
+
type: "run" | "test" | "environment" | "attachment" | "global";
|
|
230
|
+
id?: string;
|
|
231
|
+
path?: string;
|
|
232
|
+
full_name?: string;
|
|
233
|
+
environment?: string;
|
|
234
|
+
};
|
|
235
|
+
subject_ref?: string;
|
|
236
|
+
subject_type?: "run" | "test";
|
|
188
237
|
severity: AgentFindingSeverity;
|
|
238
|
+
impact?: AgentAcceptanceImpact;
|
|
189
239
|
category: AgentFindingCategory;
|
|
190
240
|
check_name: string;
|
|
241
|
+
title?: string;
|
|
191
242
|
message: string;
|
|
192
243
|
explanation: string;
|
|
193
244
|
evidence_paths: string[];
|
|
194
245
|
remediation_hint: string;
|
|
195
246
|
expected_reference?: string;
|
|
196
247
|
confidence?: number;
|
|
248
|
+
expected?: Record<string, unknown>;
|
|
249
|
+
observed?: Record<string, unknown>;
|
|
250
|
+
evidence?: {
|
|
251
|
+
paths?: string[];
|
|
252
|
+
};
|
|
253
|
+
action?: string;
|
|
254
|
+
legacy?: {
|
|
255
|
+
finding_id: string;
|
|
256
|
+
subject: string;
|
|
257
|
+
subject_type?: "run" | "test";
|
|
258
|
+
check_name: string;
|
|
259
|
+
explanation?: string;
|
|
260
|
+
evidence_paths?: string[];
|
|
261
|
+
remediation_hint: string;
|
|
262
|
+
expected_reference?: string;
|
|
263
|
+
};
|
|
197
264
|
};
|
|
198
265
|
export type AgentOutputBundle = {
|
|
199
266
|
outputDir: string;
|
|
@@ -201,6 +268,7 @@ export type AgentOutputBundle = {
|
|
|
201
268
|
tests: AgentTestManifestLine[];
|
|
202
269
|
findings: AgentFindingManifestLine[];
|
|
203
270
|
expected?: AgentExpectations;
|
|
271
|
+
humanReport?: AgentHumanReportStatus;
|
|
204
272
|
};
|
|
205
273
|
export type AgentEnrichmentAction = {
|
|
206
274
|
checkName: string;
|
|
@@ -254,8 +322,8 @@ export type AgentReviewOptions = {
|
|
|
254
322
|
};
|
|
255
323
|
export declare const DEFAULT_ANTI_DUMMY_CONFIDENCE = 0.75;
|
|
256
324
|
export declare const AGENT_ENRICHMENT_ACTIONS: Record<string, AgentEnrichmentAction>;
|
|
257
|
-
export declare const SCOPE_REJECTING_CHECKS: readonly ["
|
|
258
|
-
export declare const ITERATION_REQUIRED_CHECKS: readonly ["invalid
|
|
325
|
+
export declare const SCOPE_REJECTING_CHECKS: readonly ["expected-test-missing", "expected-count-mismatch", "expected-prefix-missing", "expected-label-missing", "expected-environment-missing", "no-tests-observed", "unexpected-environment", "forbidden-label-observed", "unexpected-test"];
|
|
326
|
+
export declare const ITERATION_REQUIRED_CHECKS: readonly ["expectations-invalid", "expectations-empty", "expectations-unsupported-control", "runner-failures-outside-logical-results", "metadata-mismatch", "history-id-collision", "expected-step-containing-missing", "insufficient-expected-steps", "insufficient-expected-attachments", "missing-expected-attachment", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", "retries-without-new-evidence", "passed-without-observable-evidence"];
|
|
259
327
|
export declare const ANTI_DUMMY_CHECKS: readonly ["noop-dominated-steps"];
|
|
260
328
|
export declare const buildAgentExpectations: (input: AgentHarnessRequest) => AgentExpectations;
|
|
261
329
|
export declare const mapFindingToEnrichmentAction: (finding: AgentFindingManifestLine | string) => AgentEnrichmentAction;
|