@allurereport/plugin-agent 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/guidance.js CHANGED
@@ -1,10 +1,25 @@
1
1
  export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
2
- "invalid-expectations-file": {
2
+ "expectations-invalid": {
3
3
  category: "bootstrap-allure",
4
- title: "Repair the expectations file",
5
- guidance: "Regenerate a valid YAML or JSON expectations file before the next enrichment iteration.",
4
+ title: "Repair the expectations input",
5
+ guidance: "Regenerate valid inline expectations or a valid YAML/JSON expectations file before the next iteration.",
6
6
  },
7
- "no-visible-tests": {
7
+ "expectations-empty": {
8
+ category: "narrow-test-scope",
9
+ title: "Add recognized expectation controls",
10
+ guidance: "Rerun with supported M1 expectation controls or omit expectations for an intentionally broad review.",
11
+ },
12
+ "expectations-unsupported-control": {
13
+ category: "review-manually",
14
+ title: "Use supported expectation controls",
15
+ guidance: "Replace unsupported controls with supported M1 flags or report weaker checking explicitly.",
16
+ },
17
+ "expectations-weak-goal": {
18
+ category: "review-manually",
19
+ title: "Use a more specific goal next time",
20
+ guidance: "Base conclusions on observed evidence and rerun with a specific goal when expectation precision matters.",
21
+ },
22
+ "no-tests-observed": {
8
23
  category: "bootstrap-allure",
9
24
  title: "Restore Allure result generation",
10
25
  guidance: "Make sure the test command emits Allure results before rerunning the enrichment loop.",
@@ -24,22 +39,27 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
24
39
  title: "Call out partial runtime modeling",
25
40
  guidance: "Compare run statistics with the logical test files and document any skipped or non-passed results that were not rendered.",
26
41
  },
27
- "missing-expected-test": {
42
+ "expected-test-missing": {
28
43
  category: "narrow-test-scope",
29
44
  title: "Bring the intended test back into scope",
30
45
  guidance: "Regenerate expectations and rerun only the planned tests or selectors.",
31
46
  },
32
- "missing-expected-prefix": {
47
+ "expected-count-mismatch": {
48
+ category: "narrow-test-scope",
49
+ title: "Restore the expected visible test count",
50
+ guidance: "Check the command, selectors, and agent modeling before accepting the run.",
51
+ },
52
+ "expected-prefix-missing": {
33
53
  category: "narrow-test-scope",
34
54
  title: "Restore the intended name-prefix scope",
35
55
  guidance: "Check the selector and rerun only the feature slice that should have matched it.",
36
56
  },
37
- "missing-expected-environment": {
57
+ "expected-environment-missing": {
38
58
  category: "narrow-test-scope",
39
59
  title: "Rerun the intended environment",
40
60
  guidance: "Constrain the rerun to the expected environment before accepting the result.",
41
61
  },
42
- "missing-expected-label-selector": {
62
+ "expected-label-missing": {
43
63
  category: "repair-test-metadata",
44
64
  title: "Add the minimal missing scope label",
45
65
  guidance: "Only add the labels required by the expectations selector; do not inflate metadata.",
@@ -49,6 +69,11 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
49
69
  title: "Remove unrelated environments from the rerun",
50
70
  guidance: "Tighten the rerun selector so unrelated environments do not appear in agent output.",
51
71
  },
72
+ "forbidden-label-observed": {
73
+ category: "narrow-test-scope",
74
+ title: "Stop forbidden labeled tests from running",
75
+ guidance: "Reject the run, narrow the rerun scope, and keep the forbidden label expectation.",
76
+ },
52
77
  "forbidden-selector-match": {
53
78
  category: "narrow-test-scope",
54
79
  title: "Stop forbidden tests from running",
@@ -74,6 +99,26 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
74
99
  title: "Add meaningful setup, action, and assertion steps",
75
100
  guidance: "Wrap only real actions, state transitions, and checks in Allure steps before rerunning.",
76
101
  },
102
+ "expected-step-containing-missing": {
103
+ category: "add-meaningful-steps",
104
+ title: "Add or correct the expected step text",
105
+ guidance: "Expose the expected runtime check as a test-scoped Allure step, or correct the expectation wording.",
106
+ },
107
+ "insufficient-expected-steps": {
108
+ category: "add-meaningful-steps",
109
+ title: "Add the expected meaningful steps",
110
+ guidance: "Expose real setup, action, state transition, and assertion steps without adding filler.",
111
+ },
112
+ "insufficient-expected-attachments": {
113
+ category: "add-test-attachments",
114
+ title: "Add the expected runtime attachments",
115
+ guidance: "Attach focused runtime evidence such as payloads, logs, screenshots, diffs, or traces.",
116
+ },
117
+ "missing-expected-attachment": {
118
+ category: "add-test-attachments",
119
+ title: "Add the required attachment",
120
+ guidance: "Attach the requested runtime artifact near the relevant action or assertion.",
121
+ },
77
122
  "failed-without-attachments": {
78
123
  category: "add-test-attachments",
79
124
  title: "Attach focused runtime evidence near the failure",
@@ -110,18 +155,150 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
110
155
  guidance: "Add a few real verification steps or attachments so the passing test shows what it proved.",
111
156
  },
112
157
  };
113
- export const AGENT_ENRICHMENT_WORKFLOW = [
114
- "Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted enrichment iteration.",
115
- "Run tests with `allure agent --output <dir> --expectations <file> -- <command>`.",
116
- "After each test run, print the `index.md` path from that output directory so collaborators can open the run overview quickly.",
117
- "Use `allure agent latest` to recover the newest output directory when a prior run omitted `--output`.",
118
- "Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.",
119
- "Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.",
120
- "Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.",
121
- "Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests through Allure testplan support. Add `--rerun-preset`, repeated `--rerun-environment`, or repeated `--rerun-label name=value` filters when you need a narrower rerun slice.",
122
- "Inspect `manifest/run.json`, tail `manifest/test-events.jsonl`, then review `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before editing tests.",
123
- "Enrich only the intended tests, rerun the same scope, and compare the rerun against `manifest/expected.json` when present.",
124
- "Accept the rerun only when scope is clean, evidence is strong enough to review, and no high-confidence dummy findings remain.",
158
+ export const AGENT_WORKFLOWS_MARKDOWN = `Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done.
159
+
160
+ ### Validate A Change
161
+
162
+ Use when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.
163
+
164
+ Commands:
165
+
166
+ - \`allure agent --goal <text> --expect-* -- <command>\`
167
+
168
+ Done when:
169
+
170
+ - the expected scope ran and no forbidden scope appeared
171
+ - \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
172
+ - the \`index.md\` path was reported
173
+ - the changed package build and required static checks passed when this repository guide requires them
174
+
175
+ ### Add Or Update Tests
176
+
177
+ Use when creating or changing tests for a feature, fix, or behavior gap.
178
+
179
+ Commands:
180
+
181
+ - \`allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>" --expect-label name=value --expect-step-containing <text> -- <command>\`
182
+
183
+ Done when:
184
+
185
+ - the tests prove the intended behavior rather than only touching the code path
186
+ - scope expectations match the intended feature, issue, or package slice
187
+ - each expected test has enough steps or attachments for a reviewer to understand what happened
188
+ - weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope
189
+
190
+ ### Review Existing Coverage
191
+
192
+ Use when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first.
193
+
194
+ Commands:
195
+
196
+ - one scoped \`allure agent --goal <text> --expect-* -- <command>\` run per review group
197
+ - \`allure agent inspect --goal <text> --expect-* <allure-results-dir-or-glob>\` or \`--dump <archive-or-glob>\` when the evidence already exists as local results or CI dump artifacts
198
+
199
+ Done when:
200
+
201
+ - the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run
202
+ - each group has expectations that describe the intended scope
203
+ - runtime artifacts are reviewed before source-only coverage conclusions
204
+ - uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary
205
+
206
+ ### Review Existing Evidence
207
+
208
+ Use when CI has already produced dump archives or local Allure results already exist and you need agent-readable review artifacts without rerunning tests locally.
209
+
210
+ Commands:
211
+
212
+ - \`allure agent inspect <allure-results-dir-or-glob>\`
213
+ - \`allure agent inspect --dump <archive-or-glob>\`
214
+ - \`allure agent inspect --dump <linux.zip> --dump <macos.zip>\`
215
+ - \`allure agent inspect --goal <text> --expect-* --dump <archive-or-glob>\`
216
+
217
+ Done when:
218
+
219
+ - all intended result directories or dump artifacts were downloaded or present and matched by the command
220
+ - \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
221
+ - the review calls out that inspect-derived output cannot add missing live process logs or rerun-time evidence unless those artifacts were captured in the results or dumps
222
+ - any environment-specific gaps between CI jobs are explicit
223
+
224
+ ### Triage Failures
225
+
226
+ Use when tests failed, broke, or runner output does not match agent artifacts.
227
+
228
+ Commands:
229
+
230
+ - \`allure agent latest\`
231
+ - \`allure agent --rerun-latest --rerun-preset failed -- <command>\`
232
+ - \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\`
233
+
234
+ Done when:
235
+
236
+ - failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly
237
+ - \`artifacts/global/stderr.txt\` and global errors were checked when failures are missing from \`manifest/tests.jsonl\`
238
+ - reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan
239
+
240
+ ### Rerun A Prior Scope
241
+
242
+ Use when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused.
243
+
244
+ Commands:
245
+
246
+ - \`allure agent select --latest [--preset review|failed|unsuccessful|all]\`
247
+ - \`allure agent select --from <output-dir> [--environment <id>] [--label name=value]\`
248
+ - \`allure agent --rerun-latest -- <command>\`
249
+ - \`allure agent --rerun-from <output-dir> -- <command>\`
250
+
251
+ Done when:
252
+
253
+ - the rerun scope comes from Allure testplan support
254
+ - \`--rerun-preset\`, \`--rerun-environment\`, or \`--rerun-label\` filters explain any narrowed selection
255
+ - manual test names are used only as a fallback when testplan support is unavailable
256
+ - the rerun output is reviewed before making a new conclusion
257
+
258
+ ### Improve Evidence Quality
259
+
260
+ Use when tests pass or fail but the runtime story is too weak to review.
261
+
262
+ Commands:
263
+
264
+ - \`allure agent --expect-step-containing <text> --expect-steps <count> --expect-attachments <count> -- <command>\`
265
+ - \`allure agent --expect-attachment <name|name=value|content-type=value> -- <command>\`
266
+
267
+ Done when:
268
+
269
+ - steps describe real setup, actions, state transitions, or assertions
270
+ - attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces
271
+ - placeholder steps, generic \`"passed"\` attachments, and other dummy evidence are removed
272
+ - the same intended scope was rerun and no high-confidence evidence findings remain
273
+
274
+ ### Recover Or Diagnose Agent Mode
275
+
276
+ Use when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox.
277
+
278
+ Commands:
279
+
280
+ - \`allure --version\`
281
+ - \`allure agent capabilities --json\`
282
+ - \`allure agent --help\`
283
+ - \`allure agent latest\`
284
+ - \`allure agent state-dir\`
285
+ - \`ALLURE_AGENT_STATE_DIR=<dir>\`
286
+
287
+ Done when:
288
+
289
+ - supported local commands and flags are known from capabilities or help output
290
+ - the output directory, \`index.md\` path, or state directory is identified, or the reason it is unavailable is documented
291
+ - console-only conclusions stay provisional until agent-mode artifacts are available`;
292
+ export const AGENT_COMMAND_TASK_MAP = [
293
+ "`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.",
294
+ "`allure agent --goal ... -- <command>`: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone. The default `--report auto` may also write a human-readable `awesome/index.html` for small runs.",
295
+ "`allure agent inspect <allure-results-dir-or-glob>` / `allure agent inspect --dump <archive-or-glob>`: existing evidence review loop. Use after downloading one or more dump archives or when Allure results already exist and you need agent-readable markdown, manifests, and optional human report output without rerunning tests locally. Repeat `--dump` to merge multiple environments or jobs.",
296
+ "`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, a user asks for the human-readable report from the last run, or a follow-up task needs prior output before selecting or rerunning tests.",
297
+ "`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where per-project run registries are stored.",
298
+ "`allure agent query --latest summary|tests|findings|test` / `allure agent query --from <output-dir> ...`: output inspection loop. Use when you need a focused JSON summary, human-report status, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.",
299
+ "`allure agent select --latest` / `allure agent select --from <output-dir>`: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.",
300
+ "`allure agent --rerun-latest` / `allure agent --rerun-from <output-dir>`: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.",
301
+ "`ALLURE_AGENT_STATE_DIR=<dir>`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable.",
125
302
  ];
126
303
  export const AGENT_VERIFICATION_RULES = [
127
304
  "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.",
@@ -130,16 +307,6 @@ export const AGENT_VERIFICATION_RULES = [
130
307
  "After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace <package-name> build`).",
131
308
  "After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.",
132
309
  ];
133
- export const AGENT_SMALL_TEST_CHANGE_WORKFLOW = [
134
- "Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.",
135
- "Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.",
136
- "Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim.",
137
- ];
138
- export const AGENT_COVERAGE_REVIEW_WORKFLOW = [
139
- "Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.",
140
- "Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.",
141
- "Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad.",
142
- ];
143
310
  export const AGENT_TEST_ENRICHMENT_BEST_PRACTICES = [
144
311
  "Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.",
145
312
  "Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.",
@@ -174,14 +341,20 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
174
341
  - Use \`allure agent\` for smoke checks too, even when the change is small or mechanical.
175
342
  - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
176
343
  - After each agent-mode test run, print the \`index.md\` path from that run's output directory so users can open the run overview quickly.
177
- - Use \`ALLURE_AGENT_*\` with \`allure run\` only as the lower-level fallback when you need direct environment control.
178
- - Use \`allure agent latest\` to reopen the newest run when \`--output\` was omitted.
179
- - Use \`allure agent state-dir\` to inspect where the current project stores its latest-agent state.
180
- - Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning.
344
+ - Use \`allure agent latest\` to print the newest output directory and \`index.md\` path when \`--output\` was omitted.
345
+ - When a user asks for the human-readable report from the last run, use \`allure agent latest\` first if the output directory is unknown, then check \`manifest/human-report.json\`; when its status is \`generated\`, use the path recorded there, usually \`awesome/index.html\`.
346
+ - Use \`allure agent capabilities --json\` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text.
347
+ - Use \`allure agent state-dir\` to inspect the shared state directory that stores per-project run registries.
348
+ - Use \`allure agent latest\`, \`state-dir\`, \`query\`, \`select\`, and \`--rerun-*\` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.
349
+ - Use \`allure agent inspect <allure-results-dir-or-glob>\` or \`allure agent inspect --dump <archive-or-glob>\` when you need agent-readable markdown and manifests from existing Allure results without rerunning tests locally; repeat \`--dump\` for multiple CI jobs or environments.
350
+ - Use \`--report auto|off|awesome|config\` to control human report output. The default \`auto\` mode writes \`awesome/index.html\` for 1000 or fewer stored visible logical results and records generated, skipped, disabled, or failed status in \`manifest/human-report.json\`.
351
+ - Use \`allure agent query --latest summary|tests|findings|test\` or \`allure agent query --from <output-dir> ...\` to inspect prior output as focused JSON before manually opening raw manifests.
352
+ - Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning; add \`--output <file>\` when you want the CLI to write the plan and print a short selection summary.
181
353
  - Use \`allure agent --rerun-latest -- <command>\` or \`allure agent --rerun-from <output-dir> -- <command>\` to rerun only the selected tests.
354
+ - When rerunning previous failures, use \`allure agent --rerun-latest --rerun-preset failed -- <command>\` or \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\` instead of manually rebuilding runner-specific test names.
182
355
  - Use \`--rerun-preset review|failed|unsuccessful|all\`, repeated \`--rerun-environment <id>\`, and repeated \`--rerun-label name=value\` when you need a narrower rerun selection from the previous output.
183
- - Use \`ALLURE_AGENT_STATE_DIR\` when you need to override where the current project stores latest-agent state for \`latest\`, \`state-dir\`, or \`--rerun-latest\`.
184
- - Generate or refresh \`ALLURE_AGENT_EXPECTATIONS\` before each targeted rerun.
356
+ - Use \`ALLURE_AGENT_STATE_DIR\` when you need to override the shared agent state directory for \`latest\`, \`state-dir\`, or \`--rerun-latest\`.
357
+ - Prefer inline \`allure agent\` expectation flags such as \`--goal\`, \`--expect-tests\`, \`--expect-test\`, \`--expect-label\`, and \`--expect-step-containing\`; use \`--expectations <file>\` only when flags become awkward.
185
358
  - Run tests with \`allure agent\` and review \`manifest/run.json\`, \`manifest/test-events.jsonl\`, \`index.md\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\`.
186
359
  - Enrich only the intended tests. Add real steps for real setup, actions, and assertions.
187
360
  - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.
@@ -189,26 +362,19 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
189
362
  - Instrument stable helpers when several call sites need the same evidence. For example, teach \`runCommand\` to emit a step instead of wrapping every caller.
190
363
  - Reject the rerun if scope drifts, evidence stays weak, or high-confidence noop-style findings remain.`;
191
364
  const renderBullets = (items) => items.map((item) => `- ${item}`).join("\n");
192
- const renderNumbered = (items) => items.map((item, index) => `${index + 1}. ${item}`).join("\n");
193
365
  const renderRemediationGuide = () => Object.entries(ENRICHMENT_ACTIONS_BY_CHECK_NAME)
194
366
  .map(([checkName, action]) => `- \`${checkName}\`: ${action.title}. ${action.guidance}`)
195
367
  .join("\n");
196
- export const renderAgentsGuide = (projectGuidePath) => `# AGENTS Guide
368
+ export const renderAgentsGuide = () => `# AGENTS Guide
197
369
 
198
370
  ## Reading Order
199
371
 
200
- ${projectGuidePath
201
- ? `1. Read [project guidance](${projectGuidePath}) first for repo-specific testing conventions and loop expectations.
202
- 2. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
203
- 3. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
204
- 4. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
205
- 5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
206
- 6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`
207
- : `1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
372
+ 1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
208
373
  2. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
209
374
  3. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
210
- 4. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
211
- 5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`}
375
+ 4. If a human-readable report is needed, read \`manifest/human-report.json\`; when status is \`generated\`, open the recorded path such as \`awesome/index.html\`.
376
+ 5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
377
+ 6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.
212
378
 
213
379
  ## Directory Contract
214
380
 
@@ -217,28 +383,25 @@ ${projectGuidePath
217
383
  - \`manifest/test-events.jsonl\` is the append-only live event stream for machine consumers during the run.
218
384
  - \`manifest/tests.jsonl\` contains one logical test summary per line.
219
385
  - \`manifest/findings.jsonl\` contains one advisory finding per line.
220
- - \`manifest/expected.json\` is copied from \`ALLURE_AGENT_EXPECTATIONS\` when provided.
221
- - \`project/docs/allure-agent-mode.md\` is copied from the project when available so each run keeps the guide used for that execution.
386
+ - \`manifest/expected.json\` contains normalized expectations from inline flags or \`--expectations <file>\` when provided.
387
+ - \`manifest/human-report.json\` records whether a human-readable report was generated, skipped, disabled, or failed.
388
+ - \`awesome/index.html\` is the default single-file human report path when \`--report auto\` or \`--report awesome\` generates it.
222
389
  - \`tests/<environment>/<slug>.md\` contains one logical test per file.
223
390
  - Retries from the same run are nested inside the same logical test file.
224
391
  - \`tests/<environment>/<slug>.assets/\` contains copied attachments for that logical test.
225
392
  - \`artifacts/global/\` contains copied global artifacts for the whole run.
226
393
 
227
- ## Enrichment Loop Workflow
394
+ ## Command Task Map
228
395
 
229
- ${renderNumbered(AGENT_ENRICHMENT_WORKFLOW)}
396
+ ${renderBullets(AGENT_COMMAND_TASK_MAP)}
230
397
 
231
- ## Verification Standard
398
+ ## Agent Workflows
232
399
 
233
- ${renderBullets(AGENT_VERIFICATION_RULES)}
400
+ ${AGENT_WORKFLOWS_MARKDOWN}
234
401
 
235
- ## Small Test Change Workflow
236
-
237
- ${renderNumbered(AGENT_SMALL_TEST_CHANGE_WORKFLOW)}
238
-
239
- ## Coverage Review Workflow
402
+ ## Verification Standard
240
403
 
241
- ${renderNumbered(AGENT_COVERAGE_REVIEW_WORKFLOW)}
404
+ ${renderBullets(AGENT_VERIFICATION_RULES)}
242
405
 
243
406
  ## Test Enrichment Best Practices
244
407
 
package/dist/harness.d.ts CHANGED
@@ -1,22 +1,37 @@
1
1
  import type { Statistic, TestLabel, TestStatus } from "@allurereport/core-api";
2
2
  import { type EnrichmentActionCategory } from "./guidance.js";
3
+ import type { AgentHumanReportStatus } from "./model.js";
3
4
  export type AgentFindingSeverity = "info" | "warning" | "high";
4
5
  export type AgentFindingCategory = "bootstrap" | "scope" | "metadata" | "evidence" | "smells";
5
6
  export type AgentScopeMatch = "match" | "unexpected" | "forbidden" | "unknown";
6
7
  export type AgentAcceptanceStatus = "accept" | "iterate" | "reject";
7
8
  export type AgentAcceptanceImpact = "advisory" | "iterate" | "reject";
9
+ export type AgentExpectationResultStatus = "matched" | "failed" | "partial" | "degraded" | "unsupported" | "unavailable" | "not_requested";
10
+ export type AgentExpectationResultImpact = "accept" | "reject" | "iterate" | "advisory";
8
11
  export type AgentEnrichmentActionCategory = EnrichmentActionCategory;
9
12
  export type AgentExpectationSelector = {
10
13
  environments?: string[];
11
14
  full_names?: string[];
12
15
  full_name_prefixes?: string[];
13
16
  label_values?: Record<string, string | string[]>;
17
+ test_count?: number;
18
+ };
19
+ export type AgentEvidenceExpectations = {
20
+ required?: boolean;
21
+ min_steps?: number;
22
+ min_attachments?: number;
23
+ step_name_contains?: string[];
24
+ attachments?: Array<{
25
+ name?: string;
26
+ content_type?: string;
27
+ }>;
14
28
  };
15
29
  export type AgentExpectations = {
16
30
  goal?: string;
17
31
  task_id?: string;
18
32
  expected?: AgentExpectationSelector;
19
33
  forbidden?: AgentExpectationSelector;
34
+ evidence?: AgentEvidenceExpectations;
20
35
  notes?: string[];
21
36
  };
22
37
  export type AgentHarnessScopeInput = {
@@ -97,12 +112,13 @@ export type AgentRunManifest = {
97
112
  findings_manifest: string;
98
113
  test_events_manifest?: string;
99
114
  expected_manifest: string | null;
100
- project_guide: string | null;
115
+ human_report_manifest?: string | null;
101
116
  process_logs: {
102
117
  stdout: string | null;
103
118
  stderr: string | null;
104
119
  };
105
120
  };
121
+ human_report?: AgentHumanReportStatus | null;
106
122
  modeling?: {
107
123
  completeness: "complete" | "partial";
108
124
  reasons: string[];
@@ -148,6 +164,28 @@ export type AgentRunManifest = {
148
164
  };
149
165
  };
150
166
  expectations_present: boolean;
167
+ expectations: AgentExpectations | null;
168
+ expectation_result: {
169
+ schema_version: "allure-agent-expectation-result/v1";
170
+ status: AgentExpectationResultStatus;
171
+ impact: AgentExpectationResultImpact;
172
+ source: {
173
+ kind: "inline" | "file" | "none";
174
+ path: string | null;
175
+ };
176
+ recognized_control_count: number;
177
+ unsupported_controls: string[];
178
+ degraded_controls: string[];
179
+ summary: {
180
+ expected_tests: number;
181
+ observed_tests: number;
182
+ missing_expected: number;
183
+ forbidden_observed: number;
184
+ unexpected_observed: number;
185
+ evidence_mismatches: number;
186
+ };
187
+ finding_ids: string[];
188
+ };
151
189
  check_summary: {
152
190
  total: number;
153
191
  countsBySeverity: Record<AgentFindingSeverity, number>;
@@ -183,17 +221,46 @@ export type AgentTestManifestLine = {
183
221
  assets_dir: string;
184
222
  };
185
223
  export type AgentFindingManifestLine = {
224
+ schema_version?: "allure-agent-finding/v2";
225
+ check_id?: string;
226
+ instance_id?: string;
186
227
  finding_id: string;
187
- subject: string;
228
+ subject: string | {
229
+ type: "run" | "test" | "environment" | "attachment" | "global";
230
+ id?: string;
231
+ path?: string;
232
+ full_name?: string;
233
+ environment?: string;
234
+ };
235
+ subject_ref?: string;
236
+ subject_type?: "run" | "test";
188
237
  severity: AgentFindingSeverity;
238
+ impact?: AgentAcceptanceImpact;
189
239
  category: AgentFindingCategory;
190
240
  check_name: string;
241
+ title?: string;
191
242
  message: string;
192
243
  explanation: string;
193
244
  evidence_paths: string[];
194
245
  remediation_hint: string;
195
246
  expected_reference?: string;
196
247
  confidence?: number;
248
+ expected?: Record<string, unknown>;
249
+ observed?: Record<string, unknown>;
250
+ evidence?: {
251
+ paths?: string[];
252
+ };
253
+ action?: string;
254
+ legacy?: {
255
+ finding_id: string;
256
+ subject: string;
257
+ subject_type?: "run" | "test";
258
+ check_name: string;
259
+ explanation?: string;
260
+ evidence_paths?: string[];
261
+ remediation_hint: string;
262
+ expected_reference?: string;
263
+ };
197
264
  };
198
265
  export type AgentOutputBundle = {
199
266
  outputDir: string;
@@ -201,6 +268,7 @@ export type AgentOutputBundle = {
201
268
  tests: AgentTestManifestLine[];
202
269
  findings: AgentFindingManifestLine[];
203
270
  expected?: AgentExpectations;
271
+ humanReport?: AgentHumanReportStatus;
204
272
  };
205
273
  export type AgentEnrichmentAction = {
206
274
  checkName: string;
@@ -254,8 +322,8 @@ export type AgentReviewOptions = {
254
322
  };
255
323
  export declare const DEFAULT_ANTI_DUMMY_CONFIDENCE = 0.75;
256
324
  export declare const AGENT_ENRICHMENT_ACTIONS: Record<string, AgentEnrichmentAction>;
257
- export declare const SCOPE_REJECTING_CHECKS: readonly ["missing-expected-test", "missing-expected-prefix", "missing-expected-environment", "unexpected-environment", "forbidden-selector-match", "unexpected-test"];
258
- export declare const ITERATION_REQUIRED_CHECKS: readonly ["invalid-expectations-file", "no-visible-tests", "runner-failures-outside-logical-results", "missing-expected-label-selector", "metadata-mismatch", "history-id-collision", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", "retries-without-new-evidence", "passed-without-observable-evidence"];
325
+ export declare const SCOPE_REJECTING_CHECKS: readonly ["expected-test-missing", "expected-count-mismatch", "expected-prefix-missing", "expected-label-missing", "expected-environment-missing", "no-tests-observed", "unexpected-environment", "forbidden-label-observed", "unexpected-test"];
326
+ export declare const ITERATION_REQUIRED_CHECKS: readonly ["expectations-invalid", "expectations-empty", "expectations-unsupported-control", "runner-failures-outside-logical-results", "metadata-mismatch", "history-id-collision", "expected-step-containing-missing", "insufficient-expected-steps", "insufficient-expected-attachments", "missing-expected-attachment", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", "retries-without-new-evidence", "passed-without-observable-evidence"];
259
327
  export declare const ANTI_DUMMY_CHECKS: readonly ["noop-dominated-steps"];
260
328
  export declare const buildAgentExpectations: (input: AgentHarnessRequest) => AgentExpectations;
261
329
  export declare const mapFindingToEnrichmentAction: (finding: AgentFindingManifestLine | string) => AgentEnrichmentAction;