npm - @allurereport/plugin-agent - Versions diffs - 3.10.0 → 3.11.0 - Mend

@allurereport/plugin-agent 3.10.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +85 -79
package/dist/capabilities.d.ts +99 -0
package/dist/capabilities.js +173 -0
package/dist/errors.d.ts +9 -0
package/dist/errors.js +15 -0
package/dist/guidance.d.ts +4 -5
package/dist/guidance.js +194 -57
package/dist/harness.d.ts +68 -4
package/dist/harness.js +45 -17
package/dist/index.d.ts +9 -1
package/dist/index.js +9 -0
package/dist/inline-expectations.d.ts +23 -0
package/dist/inline-expectations.js +186 -0
package/dist/invalid-output.d.ts +58 -0
package/dist/invalid-output.js +238 -0
package/dist/model.d.ts +34 -0
package/dist/model.js +8 -1
package/dist/paths.d.ts +3 -0
package/dist/paths.js +10 -0
package/dist/plugin.js +847 -136
package/dist/query.d.ts +193 -0
package/dist/query.js +175 -0
package/dist/selection.d.ts +42 -0
package/dist/selection.js +141 -0
package/dist/state.d.ts +15 -0
package/dist/state.js +83 -0
package/package.json +6 -6

package/dist/guidance.d.ts CHANGED Viewed

@@ -5,14 +5,13 @@ export type EnrichmentActionDefinition = {
     guidance: string;
 };
 export declare const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDefinition>;
-export declare const AGENT_ENRICHMENT_WORKFLOW: readonly ["Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted enrichment iteration.", "Run tests with `allure agent --output <dir> --expectations <file> -- <command>`.", "After each test run, print the `index.md` path from that output directory so collaborators can open the run overview quickly.", "Use `allure agent latest` to recover the newest output directory when a prior run omitted `--output`.", "Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.", "Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.", "Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.", "Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests through Allure testplan support. Add `--rerun-preset`, repeated `--rerun-environment`, or repeated `--rerun-label name=value` filters when you need a narrower rerun slice.", "Inspect `manifest/run.json`, tail `manifest/test-events.jsonl`, then review `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before editing tests.", "Enrich only the intended tests, rerun the same scope, and compare the rerun against `manifest/expected.json` when present.", "Accept the rerun only when scope is clean, evidence is strong enough to review, and no high-confidence dummy findings remain."];
+export declare const AGENT_WORKFLOWS_MARKDOWN = "Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done.\n\n### Validate A Change\n\nUse when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.\n\nCommands:\n\n- `allure agent --goal <text> --expect-* -- <command>`\n\nDone when:\n\n- the expected scope ran and no forbidden scope appeared\n- `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` were reviewed\n- the `index.md` path was reported\n- the changed package build and required static checks passed when this repository guide requires them\n\n### Add Or Update Tests\n\nUse when creating or changing tests for a feature, fix, or behavior gap.\n\nCommands:\n\n- `allure agent --goal <text> --expect-tests <count> --expect-test \"<fullName>\" --expect-label name=value --expect-step-containing <text> -- <command>`\n\nDone when:\n\n- the tests prove the intended behavior rather than only touching the code path\n- scope expectations match the intended feature, issue, or package slice\n- each expected test has enough steps or attachments for a reviewer to understand what happened\n- weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope\n\n### Review Existing Coverage\n\nUse when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first.\n\nCommands:\n\n- one scoped `allure agent --goal <text> --expect-* -- <command>` run per review group\n\nDone when:\n\n- the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run\n- each group has expectations that describe the intended scope\n- runtime artifacts are reviewed before source-only coverage conclusions\n- uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary\n\n### Triage Failures\n\nUse when tests failed, broke, or runner output does not match agent artifacts.\n\nCommands:\n\n- `allure agent latest`\n- `allure agent --rerun-latest --rerun-preset failed -- <command>`\n- `allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>`\n\nDone when:\n\n- failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly\n- `artifacts/global/stderr.txt` and global errors were checked when failures are missing from `manifest/tests.jsonl`\n- reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan\n\n### Rerun A Prior Scope\n\nUse when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused.\n\nCommands:\n\n- `allure agent select --latest [--preset review|failed|unsuccessful|all]`\n- `allure agent select --from <output-dir> [--environment <id>] [--label name=value]`\n- `allure agent --rerun-latest -- <command>`\n- `allure agent --rerun-from <output-dir> -- <command>`\n\nDone when:\n\n- the rerun scope comes from Allure testplan support\n- `--rerun-preset`, `--rerun-environment`, or `--rerun-label` filters explain any narrowed selection\n- manual test names are used only as a fallback when testplan support is unavailable\n- the rerun output is reviewed before making a new conclusion\n\n### Improve Evidence Quality\n\nUse when tests pass or fail but the runtime story is too weak to review.\n\nCommands:\n\n- `allure agent --expect-step-containing <text> --expect-steps <count> --expect-attachments <count> -- <command>`\n- `allure agent --expect-attachment <name|name=value|content-type=value> -- <command>`\n\nDone when:\n\n- steps describe real setup, actions, state transitions, or assertions\n- attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces\n- placeholder steps, generic `\"passed\"` attachments, and other dummy evidence are removed\n- the same intended scope was rerun and no high-confidence evidence findings remain\n\n### Recover Or Diagnose Agent Mode\n\nUse when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox.\n\nCommands:\n\n- `allure --version`\n- `allure agent capabilities --json`\n- `allure agent --help`\n- `allure agent latest`\n- `allure agent state-dir`\n- `ALLURE_AGENT_STATE_DIR=<dir>`\n\nDone when:\n\n- supported local commands and flags are known from capabilities or help output\n- the output directory, `index.md` path, or state directory is identified, or the reason it is unavailable is documented\n- console-only conclusions stay provisional until agent-mode artifacts are available";
+export declare const AGENT_COMMAND_TASK_MAP: readonly ["`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.", "`allure agent --goal ... -- <command>`: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone.", "`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, or a follow-up task needs prior output before selecting or rerunning tests.", "`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where project-scoped latest pointers are stored.", "`allure agent query --latest summary|tests|findings|test` / `allure agent query --from <output-dir> ...`: output inspection loop. Use when you need a focused JSON summary, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.", "`allure agent select --latest` / `allure agent select --from <output-dir>`: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.", "`allure agent --rerun-latest` / `allure agent --rerun-from <output-dir>`: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.", "`ALLURE_AGENT_STATE_DIR=<dir>`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable."];
 export declare const AGENT_VERIFICATION_RULES: readonly ["If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.", "Use `allure agent` for smoke checks too, even when the change is small or mechanical.", "Only skip agent mode when it is impossible or when you are debugging agent mode itself.", "After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace <package-name> build`).", "After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly."];
-export declare const AGENT_SMALL_TEST_CHANGE_WORKFLOW: readonly ["Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.", "Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.", "Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim."];
-export declare const AGENT_COVERAGE_REVIEW_WORKFLOW: readonly ["Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.", "Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.", "Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad."];
 export declare const AGENT_TEST_ENRICHMENT_BEST_PRACTICES: readonly ["Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.", "Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.", "Add metadata only when it improves scope review, debugging, or downstream policy. Keep labels and parameters intentionally minimal.", "If multiple call sites need the same evidence, instrument the helper once. Example: teach `runCommand` to emit a step instead of wrapping every `runCommand(...)` call site with identical step blocks."];
 export declare const AGENT_ANTI_DUMMY_POLICY: readonly ["Do not add empty wrapper steps, placeholder attachments, or generic strings such as `passed`, `success`, or static boilerplate.", "Do not attach evidence that is not grounded in the current test execution. Synthetic summaries are fine only when they are derived from real runtime artifacts.", "Do not inflate metadata with labels, stories, or severities that no expectation, review step, or policy actually uses."];
 export declare const AGENT_ACCEPTANCE_CHECKLIST: readonly ["The rerun matches the intended scope and does not trigger forbidden or unexpected-test findings.", "Each touched test shows enough evidence to explain what happened and what was verified.", "Retries include per-attempt diagnostics when the same test reruns.", "No high-confidence anti-dummy findings remain, especially `noop-dominated-steps` or low-signal `step-spam` traces."];
 export declare const AGENT_REVIEW_COMPLETENESS_CHECKLIST: readonly ["The relevant scope was run with agent mode unless that was impossible.", "Scoped reviews created expectations before the final rerun, unless the task was an intentionally broad package-health audit.", "The final conclusion was based on `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` rather than console output alone.", "Any missing or partial runtime modeling was called out explicitly before the review was considered complete."];
 export declare const AGENT_PARTIAL_RUNTIME_REVIEW_RULES: readonly ["Suite-load, import, or setup failures may appear only in `artifacts/global/stderr.txt` or global errors instead of `manifest/tests.jsonl`.", "If runner-visible failures are not represented as logical tests, inspect global stderr and treat the run as a partial runtime review, not a clean result set.", "Console-only conclusions stay provisional when agent output is missing, incomplete, or partially modeled."];
-export declare const AGENT_INSTRUCTIONS_TEMPLATE = "## Allure Agent Mode Instructions\n\n- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.\n- Use `allure agent` for smoke checks too, even when the change is small or mechanical.\n- Only skip agent mode when it is impossible or when you are debugging agent mode itself.\n- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.\n- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control.\n- Use `allure agent latest` to reopen the newest run when `--output` was omitted.\n- Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.\n- Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.\n- Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests.\n- Use `--rerun-preset review|failed|unsuccessful|all`, repeated `--rerun-environment <id>`, and repeated `--rerun-label name=value` when you need a narrower rerun selection from the previous output.\n- Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.\n- Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted rerun.\n- Run tests with `allure agent` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.\n- Enrich only the intended tests. Add real steps for real setup, actions, and assertions.\n- Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.\n- Keep metadata minimal. Add labels or severity only when scope review, debugging, or quality policy uses them.\n- Instrument stable helpers when several call sites need the same evidence. For example, teach `runCommand` to emit a step instead of wrapping every caller.\n- Reject the rerun if scope drifts, evidence stays weak, or high-confidence noop-style findings remain.";
-export declare const renderAgentsGuide: (projectGuidePath?: string) => string;
+export declare const AGENT_INSTRUCTIONS_TEMPLATE = "## Allure Agent Mode Instructions\n\n- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.\n- Use `allure agent` for smoke checks too, even when the change is small or mechanical.\n- Only skip agent mode when it is impossible or when you are debugging agent mode itself.\n- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.\n- Use `allure agent latest` to print the newest output directory and `index.md` path when `--output` was omitted.\n- Use `allure agent capabilities --json` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text.\n- Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.\n- Use `allure agent latest`, `state-dir`, `query`, `select`, and `--rerun-*` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.\n- Use `allure agent query --latest summary|tests|findings|test` or `allure agent query --from <output-dir> ...` to inspect prior output as focused JSON before manually opening raw manifests.\n- Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning; add `--output <file>` when you want the CLI to write the plan and print a short selection summary.\n- Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests.\n- When rerunning previous failures, use `allure agent --rerun-latest --rerun-preset failed -- <command>` or `allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>` instead of manually rebuilding runner-specific test names.\n- Use `--rerun-preset review|failed|unsuccessful|all`, repeated `--rerun-environment <id>`, and repeated `--rerun-label name=value` when you need a narrower rerun selection from the previous output.\n- Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.\n- Prefer inline `allure agent` expectation flags such as `--goal`, `--expect-tests`, `--expect-test`, `--expect-label`, and `--expect-step-containing`; use `--expectations <file>` only when flags become awkward.\n- Run tests with `allure agent` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.\n- Enrich only the intended tests. Add real steps for real setup, actions, and assertions.\n- Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.\n- Keep metadata minimal. Add labels or severity only when scope review, debugging, or quality policy uses them.\n- Instrument stable helpers when several call sites need the same evidence. For example, teach `runCommand` to emit a step instead of wrapping every caller.\n- Reject the rerun if scope drifts, evidence stays weak, or high-confidence noop-style findings remain.";
+export declare const renderAgentsGuide: () => string;

package/dist/guidance.js CHANGED Viewed

@@ -1,10 +1,25 @@
 export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
-    "invalid-expectations-file": {
+    "expectations-invalid": {
         category: "bootstrap-allure",
-        title: "Repair the expectations file",
-        guidance: "Regenerate a valid YAML or JSON expectations file before the next enrichment iteration.",
+        title: "Repair the expectations input",
+        guidance: "Regenerate valid inline expectations or a valid YAML/JSON expectations file before the next iteration.",
     },
-    "no-visible-tests": {
+    "expectations-empty": {
+        category: "narrow-test-scope",
+        title: "Add recognized expectation controls",
+        guidance: "Rerun with supported M1 expectation controls or omit expectations for an intentionally broad review.",
+    },
+    "expectations-unsupported-control": {
+        category: "review-manually",
+        title: "Use supported expectation controls",
+        guidance: "Replace unsupported controls with supported M1 flags or report weaker checking explicitly.",
+    },
+    "expectations-weak-goal": {
+        category: "review-manually",
+        title: "Use a more specific goal next time",
+        guidance: "Base conclusions on observed evidence and rerun with a specific goal when expectation precision matters.",
+    },
+    "no-tests-observed": {
         category: "bootstrap-allure",
         title: "Restore Allure result generation",
         guidance: "Make sure the test command emits Allure results before rerunning the enrichment loop.",
@@ -24,22 +39,27 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
         title: "Call out partial runtime modeling",
         guidance: "Compare run statistics with the logical test files and document any skipped or non-passed results that were not rendered.",
     },
-    "missing-expected-test": {
+    "expected-test-missing": {
         category: "narrow-test-scope",
         title: "Bring the intended test back into scope",
         guidance: "Regenerate expectations and rerun only the planned tests or selectors.",
     },
-    "missing-expected-prefix": {
+    "expected-count-mismatch": {
+        category: "narrow-test-scope",
+        title: "Restore the expected visible test count",
+        guidance: "Check the command, selectors, and agent modeling before accepting the run.",
+    },
+    "expected-prefix-missing": {
         category: "narrow-test-scope",
         title: "Restore the intended name-prefix scope",
         guidance: "Check the selector and rerun only the feature slice that should have matched it.",
     },
-    "missing-expected-environment": {
+    "expected-environment-missing": {
         category: "narrow-test-scope",
         title: "Rerun the intended environment",
         guidance: "Constrain the rerun to the expected environment before accepting the result.",
     },
-    "missing-expected-label-selector": {
+    "expected-label-missing": {
         category: "repair-test-metadata",
         title: "Add the minimal missing scope label",
         guidance: "Only add the labels required by the expectations selector; do not inflate metadata.",
@@ -49,6 +69,11 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
         title: "Remove unrelated environments from the rerun",
         guidance: "Tighten the rerun selector so unrelated environments do not appear in agent output.",
     },
+    "forbidden-label-observed": {
+        category: "narrow-test-scope",
+        title: "Stop forbidden labeled tests from running",
+        guidance: "Reject the run, narrow the rerun scope, and keep the forbidden label expectation.",
+    },
     "forbidden-selector-match": {
         category: "narrow-test-scope",
         title: "Stop forbidden tests from running",
@@ -74,6 +99,26 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
         title: "Add meaningful setup, action, and assertion steps",
         guidance: "Wrap only real actions, state transitions, and checks in Allure steps before rerunning.",
     },
+    "expected-step-containing-missing": {
+        category: "add-meaningful-steps",
+        title: "Add or correct the expected step text",
+        guidance: "Expose the expected runtime check as a test-scoped Allure step, or correct the expectation wording.",
+    },
+    "insufficient-expected-steps": {
+        category: "add-meaningful-steps",
+        title: "Add the expected meaningful steps",
+        guidance: "Expose real setup, action, state transition, and assertion steps without adding filler.",
+    },
+    "insufficient-expected-attachments": {
+        category: "add-test-attachments",
+        title: "Add the expected runtime attachments",
+        guidance: "Attach focused runtime evidence such as payloads, logs, screenshots, diffs, or traces.",
+    },
+    "missing-expected-attachment": {
+        category: "add-test-attachments",
+        title: "Add the required attachment",
+        guidance: "Attach the requested runtime artifact near the relevant action or assertion.",
+    },
     "failed-without-attachments": {
         category: "add-test-attachments",
         title: "Attach focused runtime evidence near the failure",
@@ -110,18 +155,130 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME = {
         guidance: "Add a few real verification steps or attachments so the passing test shows what it proved.",
     },
 };
-export const AGENT_ENRICHMENT_WORKFLOW = [
-    "Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted enrichment iteration.",
-    "Run tests with `allure agent --output <dir> --expectations <file> -- <command>`.",
-    "After each test run, print the `index.md` path from that output directory so collaborators can open the run overview quickly.",
-    "Use `allure agent latest` to recover the newest output directory when a prior run omitted `--output`.",
-    "Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.",
-    "Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.",
-    "Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.",
-    "Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests through Allure testplan support. Add `--rerun-preset`, repeated `--rerun-environment`, or repeated `--rerun-label name=value` filters when you need a narrower rerun slice.",
-    "Inspect `manifest/run.json`, tail `manifest/test-events.jsonl`, then review `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before editing tests.",
-    "Enrich only the intended tests, rerun the same scope, and compare the rerun against `manifest/expected.json` when present.",
-    "Accept the rerun only when scope is clean, evidence is strong enough to review, and no high-confidence dummy findings remain.",
+export const AGENT_WORKFLOWS_MARKDOWN = `Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done.
+### Validate A Change
+Use when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.
+Commands:
+- \`allure agent --goal <text> --expect-* -- <command>\`
+Done when:
+- the expected scope ran and no forbidden scope appeared
+- \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
+- the \`index.md\` path was reported
+- the changed package build and required static checks passed when this repository guide requires them
+### Add Or Update Tests
+Use when creating or changing tests for a feature, fix, or behavior gap.
+Commands:
+- \`allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>" --expect-label name=value --expect-step-containing <text> -- <command>\`
+Done when:
+- the tests prove the intended behavior rather than only touching the code path
+- scope expectations match the intended feature, issue, or package slice
+- each expected test has enough steps or attachments for a reviewer to understand what happened
+- weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope
+### Review Existing Coverage
+Use when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first.
+Commands:
+- one scoped \`allure agent --goal <text> --expect-* -- <command>\` run per review group
+Done when:
+- the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run
+- each group has expectations that describe the intended scope
+- runtime artifacts are reviewed before source-only coverage conclusions
+- uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary
+### Triage Failures
+Use when tests failed, broke, or runner output does not match agent artifacts.
+Commands:
+- \`allure agent latest\`
+- \`allure agent --rerun-latest --rerun-preset failed -- <command>\`
+- \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\`
+Done when:
+- failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly
+- \`artifacts/global/stderr.txt\` and global errors were checked when failures are missing from \`manifest/tests.jsonl\`
+- reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan
+### Rerun A Prior Scope
+Use when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused.
+Commands:
+- \`allure agent select --latest [--preset review|failed|unsuccessful|all]\`
+- \`allure agent select --from <output-dir> [--environment <id>] [--label name=value]\`
+- \`allure agent --rerun-latest -- <command>\`
+- \`allure agent --rerun-from <output-dir> -- <command>\`
+Done when:
+- the rerun scope comes from Allure testplan support
+- \`--rerun-preset\`, \`--rerun-environment\`, or \`--rerun-label\` filters explain any narrowed selection
+- manual test names are used only as a fallback when testplan support is unavailable
+- the rerun output is reviewed before making a new conclusion
+### Improve Evidence Quality
+Use when tests pass or fail but the runtime story is too weak to review.
+Commands:
+- \`allure agent --expect-step-containing <text> --expect-steps <count> --expect-attachments <count> -- <command>\`
+- \`allure agent --expect-attachment <name|name=value|content-type=value> -- <command>\`
+Done when:
+- steps describe real setup, actions, state transitions, or assertions
+- attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces
+- placeholder steps, generic \`"passed"\` attachments, and other dummy evidence are removed
+- the same intended scope was rerun and no high-confidence evidence findings remain
+### Recover Or Diagnose Agent Mode
+Use when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox.
+Commands:
+- \`allure --version\`
+- \`allure agent capabilities --json\`
+- \`allure agent --help\`
+- \`allure agent latest\`
+- \`allure agent state-dir\`
+- \`ALLURE_AGENT_STATE_DIR=<dir>\`
+Done when:
+- supported local commands and flags are known from capabilities or help output
+- the output directory, \`index.md\` path, or state directory is identified, or the reason it is unavailable is documented
+- console-only conclusions stay provisional until agent-mode artifacts are available`;
+export const AGENT_COMMAND_TASK_MAP = [
+    "`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.",
+    "`allure agent --goal ... -- <command>`: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone.",
+    "`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, or a follow-up task needs prior output before selecting or rerunning tests.",
+    "`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where project-scoped latest pointers are stored.",
+    "`allure agent query --latest summary|tests|findings|test` / `allure agent query --from <output-dir> ...`: output inspection loop. Use when you need a focused JSON summary, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.",
+    "`allure agent select --latest` / `allure agent select --from <output-dir>`: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.",
+    "`allure agent --rerun-latest` / `allure agent --rerun-from <output-dir>`: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.",
+    "`ALLURE_AGENT_STATE_DIR=<dir>`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable.",
 ];
 export const AGENT_VERIFICATION_RULES = [
     "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.",
@@ -130,16 +287,6 @@ export const AGENT_VERIFICATION_RULES = [
     "After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace <package-name> build`).",
     "After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.",
 ];
-export const AGENT_SMALL_TEST_CHANGE_WORKFLOW = [
-    "Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.",
-    "Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.",
-    "Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim.",
-];
-export const AGENT_COVERAGE_REVIEW_WORKFLOW = [
-    "Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.",
-    "Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.",
-    "Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad.",
-];
 export const AGENT_TEST_ENRICHMENT_BEST_PRACTICES = [
     "Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.",
     "Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.",
@@ -174,14 +321,17 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
 - Use \`allure agent\` for smoke checks too, even when the change is small or mechanical.
 - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
 - After each agent-mode test run, print the \`index.md\` path from that run's output directory so users can open the run overview quickly.
-- Use \`ALLURE_AGENT_*\` with \`allure run\` only as the lower-level fallback when you need direct environment control.
-- Use \`allure agent latest\` to reopen the newest run when \`--output\` was omitted.
+- Use \`allure agent latest\` to print the newest output directory and \`index.md\` path when \`--output\` was omitted.
+- Use \`allure agent capabilities --json\` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text.
 - Use \`allure agent state-dir\` to inspect where the current project stores its latest-agent state.
-- Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning.
+- Use \`allure agent latest\`, \`state-dir\`, \`query\`, \`select\`, and \`--rerun-*\` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.
+- Use \`allure agent query --latest summary|tests|findings|test\` or \`allure agent query --from <output-dir> ...\` to inspect prior output as focused JSON before manually opening raw manifests.
+- Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning; add \`--output <file>\` when you want the CLI to write the plan and print a short selection summary.
 - Use \`allure agent --rerun-latest -- <command>\` or \`allure agent --rerun-from <output-dir> -- <command>\` to rerun only the selected tests.
+- When rerunning previous failures, use \`allure agent --rerun-latest --rerun-preset failed -- <command>\` or \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\` instead of manually rebuilding runner-specific test names.
 - Use \`--rerun-preset review|failed|unsuccessful|all\`, repeated \`--rerun-environment <id>\`, and repeated \`--rerun-label name=value\` when you need a narrower rerun selection from the previous output.
 - Use \`ALLURE_AGENT_STATE_DIR\` when you need to override where the current project stores latest-agent state for \`latest\`, \`state-dir\`, or \`--rerun-latest\`.
-- Generate or refresh \`ALLURE_AGENT_EXPECTATIONS\` before each targeted rerun.
+- Prefer inline \`allure agent\` expectation flags such as \`--goal\`, \`--expect-tests\`, \`--expect-test\`, \`--expect-label\`, and \`--expect-step-containing\`; use \`--expectations <file>\` only when flags become awkward.
 - Run tests with \`allure agent\` and review \`manifest/run.json\`, \`manifest/test-events.jsonl\`, \`index.md\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\`.
 - Enrich only the intended tests. Add real steps for real setup, actions, and assertions.
 - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.
@@ -189,26 +339,18 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
 - Instrument stable helpers when several call sites need the same evidence. For example, teach \`runCommand\` to emit a step instead of wrapping every caller.
 - Reject the rerun if scope drifts, evidence stays weak, or high-confidence noop-style findings remain.`;
 const renderBullets = (items) => items.map((item) => `- ${item}`).join("\n");
-const renderNumbered = (items) => items.map((item, index) => `${index + 1}. ${item}`).join("\n");
 const renderRemediationGuide = () => Object.entries(ENRICHMENT_ACTIONS_BY_CHECK_NAME)
     .map(([checkName, action]) => `- \`${checkName}\`: ${action.title}. ${action.guidance}`)
     .join("\n");
-export const renderAgentsGuide = (projectGuidePath) => `# AGENTS Guide
+export const renderAgentsGuide = () => `# AGENTS Guide
 ## Reading Order
-${projectGuidePath
-    ? `1. Read [project guidance](${projectGuidePath}) first for repo-specific testing conventions and loop expectations.
-2. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
-3. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
-4. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
-5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
-6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`
-    : `1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
+1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
 2. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
 3. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
 4. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
-5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`}
+5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.
 ## Directory Contract
@@ -217,28 +359,23 @@ ${projectGuidePath
 - \`manifest/test-events.jsonl\` is the append-only live event stream for machine consumers during the run.
 - \`manifest/tests.jsonl\` contains one logical test summary per line.
 - \`manifest/findings.jsonl\` contains one advisory finding per line.
-- \`manifest/expected.json\` is copied from \`ALLURE_AGENT_EXPECTATIONS\` when provided.
-- \`project/docs/allure-agent-mode.md\` is copied from the project when available so each run keeps the guide used for that execution.
+- \`manifest/expected.json\` contains normalized expectations from inline flags or \`--expectations <file>\` when provided.
 - \`tests/<environment>/<slug>.md\` contains one logical test per file.
 - Retries from the same run are nested inside the same logical test file.
 - \`tests/<environment>/<slug>.assets/\` contains copied attachments for that logical test.
 - \`artifacts/global/\` contains copied global artifacts for the whole run.
-## Enrichment Loop Workflow
-${renderNumbered(AGENT_ENRICHMENT_WORKFLOW)}
-## Verification Standard
+## Command Task Map
-${renderBullets(AGENT_VERIFICATION_RULES)}
+${renderBullets(AGENT_COMMAND_TASK_MAP)}
-## Small Test Change Workflow
+## Agent Workflows
-${renderNumbered(AGENT_SMALL_TEST_CHANGE_WORKFLOW)}
+${AGENT_WORKFLOWS_MARKDOWN}
-## Coverage Review Workflow
+## Verification Standard
-${renderNumbered(AGENT_COVERAGE_REVIEW_WORKFLOW)}
+${renderBullets(AGENT_VERIFICATION_RULES)}
 ## Test Enrichment Best Practices

package/dist/harness.d.ts CHANGED Viewed

@@ -5,18 +5,32 @@ export type AgentFindingCategory = "bootstrap" | "scope" | "metadata" | "evidenc
 export type AgentScopeMatch = "match" | "unexpected" | "forbidden" | "unknown";
 export type AgentAcceptanceStatus = "accept" | "iterate" | "reject";
 export type AgentAcceptanceImpact = "advisory" | "iterate" | "reject";
+export type AgentExpectationResultStatus = "matched" | "failed" | "partial" | "degraded" | "unsupported" | "unavailable" | "not_requested";
+export type AgentExpectationResultImpact = "accept" | "reject" | "iterate" | "advisory";
 export type AgentEnrichmentActionCategory = EnrichmentActionCategory;
 export type AgentExpectationSelector = {
     environments?: string[];
     full_names?: string[];
     full_name_prefixes?: string[];
     label_values?: Record<string, string | string[]>;
+    test_count?: number;
+};
+export type AgentEvidenceExpectations = {
+    required?: boolean;
+    min_steps?: number;
+    min_attachments?: number;
+    step_name_contains?: string[];
+    attachments?: Array<{
+        name?: string;
+        content_type?: string;
+    }>;
 };
 export type AgentExpectations = {
     goal?: string;
     task_id?: string;
     expected?: AgentExpectationSelector;
     forbidden?: AgentExpectationSelector;
+    evidence?: AgentEvidenceExpectations;
     notes?: string[];
 };
 export type AgentHarnessScopeInput = {
@@ -97,7 +111,6 @@ export type AgentRunManifest = {
         findings_manifest: string;
         test_events_manifest?: string;
         expected_manifest: string | null;
-        project_guide: string | null;
         process_logs: {
             stdout: string | null;
             stderr: string | null;
@@ -148,6 +161,28 @@ export type AgentRunManifest = {
         };
     };
     expectations_present: boolean;
+    expectations: AgentExpectations | null;
+    expectation_result: {
+        schema_version: "allure-agent-expectation-result/v1";
+        status: AgentExpectationResultStatus;
+        impact: AgentExpectationResultImpact;
+        source: {
+            kind: "inline" | "file" | "none";
+            path: string | null;
+        };
+        recognized_control_count: number;
+        unsupported_controls: string[];
+        degraded_controls: string[];
+        summary: {
+            expected_tests: number;
+            observed_tests: number;
+            missing_expected: number;
+            forbidden_observed: number;
+            unexpected_observed: number;
+            evidence_mismatches: number;
+        };
+        finding_ids: string[];
+    };
     check_summary: {
         total: number;
         countsBySeverity: Record<AgentFindingSeverity, number>;
@@ -183,17 +218,46 @@ export type AgentTestManifestLine = {
     assets_dir: string;
 };
 export type AgentFindingManifestLine = {
+    schema_version?: "allure-agent-finding/v2";
+    check_id?: string;
+    instance_id?: string;
     finding_id: string;
-    subject: string;
+    subject: string | {
+        type: "run" | "test" | "environment" | "attachment" | "global";
+        id?: string;
+        path?: string;
+        full_name?: string;
+        environment?: string;
+    };
+    subject_ref?: string;
+    subject_type?: "run" | "test";
     severity: AgentFindingSeverity;
+    impact?: AgentAcceptanceImpact;
     category: AgentFindingCategory;
     check_name: string;
+    title?: string;
     message: string;
     explanation: string;
     evidence_paths: string[];
     remediation_hint: string;
     expected_reference?: string;
     confidence?: number;
+    expected?: Record<string, unknown>;
+    observed?: Record<string, unknown>;
+    evidence?: {
+        paths?: string[];
+    };
+    action?: string;
+    legacy?: {
+        finding_id: string;
+        subject: string;
+        subject_type?: "run" | "test";
+        check_name: string;
+        explanation?: string;
+        evidence_paths?: string[];
+        remediation_hint: string;
+        expected_reference?: string;
+    };
 };
 export type AgentOutputBundle = {
     outputDir: string;
@@ -254,8 +318,8 @@ export type AgentReviewOptions = {
 };
 export declare const DEFAULT_ANTI_DUMMY_CONFIDENCE = 0.75;
 export declare const AGENT_ENRICHMENT_ACTIONS: Record<string, AgentEnrichmentAction>;
-export declare const SCOPE_REJECTING_CHECKS: readonly ["missing-expected-test", "missing-expected-prefix", "missing-expected-environment", "unexpected-environment", "forbidden-selector-match", "unexpected-test"];
-export declare const ITERATION_REQUIRED_CHECKS: readonly ["invalid-expectations-file", "no-visible-tests", "runner-failures-outside-logical-results", "missing-expected-label-selector", "metadata-mismatch", "history-id-collision", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", "retries-without-new-evidence", "passed-without-observable-evidence"];
+export declare const SCOPE_REJECTING_CHECKS: readonly ["expected-test-missing", "expected-count-mismatch", "expected-prefix-missing", "expected-label-missing", "expected-environment-missing", "no-tests-observed", "unexpected-environment", "forbidden-label-observed", "unexpected-test"];
+export declare const ITERATION_REQUIRED_CHECKS: readonly ["expectations-invalid", "expectations-empty", "expectations-unsupported-control", "runner-failures-outside-logical-results", "metadata-mismatch", "history-id-collision", "expected-step-containing-missing", "insufficient-expected-steps", "insufficient-expected-attachments", "missing-expected-attachment", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", "retries-without-new-evidence", "passed-without-observable-evidence"];
 export declare const ANTI_DUMMY_CHECKS: readonly ["noop-dominated-steps"];
 export declare const buildAgentExpectations: (input: AgentHarnessRequest) => AgentExpectations;
 export declare const mapFindingToEnrichmentAction: (finding: AgentFindingManifestLine | string) => AgentEnrichmentAction;