npm - prose-qa - Versions diffs - 0.1.0 - Mend

prose-qa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (427) hide show

package/LICENSE +21 -0
package/README.md +570 -0
package/dist/agent/bash.d.ts +52 -0
package/dist/agent/bash.d.ts.map +1 -0
package/dist/agent/bash.js +186 -0
package/dist/agent/bash.js.map +1 -0
package/dist/agent/bash.test.d.ts +2 -0
package/dist/agent/bash.test.d.ts.map +1 -0
package/dist/agent/bash.test.js +70 -0
package/dist/agent/bash.test.js.map +1 -0
package/dist/agent/llm-model.d.ts +5 -0
package/dist/agent/llm-model.d.ts.map +1 -0
package/dist/agent/llm-model.js +29 -0
package/dist/agent/llm-model.js.map +1 -0
package/dist/agent/llm-model.test.d.ts +2 -0
package/dist/agent/llm-model.test.d.ts.map +1 -0
package/dist/agent/llm-model.test.js +28 -0
package/dist/agent/llm-model.test.js.map +1 -0
package/dist/agent/prompt.d.ts +17 -0
package/dist/agent/prompt.d.ts.map +1 -0
package/dist/agent/prompt.js +97 -0
package/dist/agent/prompt.js.map +1 -0
package/dist/agent/prompt.test.d.ts +2 -0
package/dist/agent/prompt.test.d.ts.map +1 -0
package/dist/agent/prompt.test.js +124 -0
package/dist/agent/prompt.test.js.map +1 -0
package/dist/agent/provider-options.d.ts +8 -0
package/dist/agent/provider-options.d.ts.map +1 -0
package/dist/agent/provider-options.js +115 -0
package/dist/agent/provider-options.js.map +1 -0
package/dist/agent/provider-options.test.d.ts +2 -0
package/dist/agent/provider-options.test.d.ts.map +1 -0
package/dist/agent/provider-options.test.js +114 -0
package/dist/agent/provider-options.test.js.map +1 -0
package/dist/agent/runner.d.ts +27 -0
package/dist/agent/runner.d.ts.map +1 -0
package/dist/agent/runner.js +291 -0
package/dist/agent/runner.js.map +1 -0
package/dist/agent/verdict-retry-prompt.d.ts +3 -0
package/dist/agent/verdict-retry-prompt.d.ts.map +1 -0
package/dist/agent/verdict-retry-prompt.js +18 -0
package/dist/agent/verdict-retry-prompt.js.map +1 -0
package/dist/agent/verdict-retry-prompt.test.d.ts +2 -0
package/dist/agent/verdict-retry-prompt.test.d.ts.map +1 -0
package/dist/agent/verdict-retry-prompt.test.js +25 -0
package/dist/agent/verdict-retry-prompt.test.js.map +1 -0
package/dist/agent/verdict.d.ts +31 -0
package/dist/agent/verdict.d.ts.map +1 -0
package/dist/agent/verdict.js +123 -0
package/dist/agent/verdict.js.map +1 -0
package/dist/agent/verdict.test.d.ts +2 -0
package/dist/agent/verdict.test.d.ts.map +1 -0
package/dist/agent/verdict.test.js +156 -0
package/dist/agent/verdict.test.js.map +1 -0
package/dist/analyze/build-context.d.ts +58 -0
package/dist/analyze/build-context.d.ts.map +1 -0
package/dist/analyze/build-context.js +141 -0
package/dist/analyze/build-context.js.map +1 -0
package/dist/analyze/build-context.test.d.ts +2 -0
package/dist/analyze/build-context.test.d.ts.map +1 -0
package/dist/analyze/build-context.test.js +118 -0
package/dist/analyze/build-context.test.js.map +1 -0
package/dist/analyze/compare-runs.d.ts +49 -0
package/dist/analyze/compare-runs.d.ts.map +1 -0
package/dist/analyze/compare-runs.js +214 -0
package/dist/analyze/compare-runs.js.map +1 -0
package/dist/analyze/compare-runs.test.d.ts +2 -0
package/dist/analyze/compare-runs.test.d.ts.map +1 -0
package/dist/analyze/compare-runs.test.js +139 -0
package/dist/analyze/compare-runs.test.js.map +1 -0
package/dist/analyze/diff-hunks.d.ts +16 -0
package/dist/analyze/diff-hunks.d.ts.map +1 -0
package/dist/analyze/diff-hunks.js +287 -0
package/dist/analyze/diff-hunks.js.map +1 -0
package/dist/analyze/diff-hunks.test.d.ts +2 -0
package/dist/analyze/diff-hunks.test.d.ts.map +1 -0
package/dist/analyze/diff-hunks.test.js +54 -0
package/dist/analyze/diff-hunks.test.js.map +1 -0
package/dist/analyze/hunk-editor.d.ts +8 -0
package/dist/analyze/hunk-editor.d.ts.map +1 -0
package/dist/analyze/hunk-editor.js +129 -0
package/dist/analyze/hunk-editor.js.map +1 -0
package/dist/analyze/hunk-editor.test.d.ts +2 -0
package/dist/analyze/hunk-editor.test.d.ts.map +1 -0
package/dist/analyze/hunk-editor.test.js +48 -0
package/dist/analyze/hunk-editor.test.js.map +1 -0
package/dist/analyze/index.d.ts +23 -0
package/dist/analyze/index.d.ts.map +1 -0
package/dist/analyze/index.js +122 -0
package/dist/analyze/index.js.map +1 -0
package/dist/analyze/llm-fix.d.ts +11 -0
package/dist/analyze/llm-fix.d.ts.map +1 -0
package/dist/analyze/llm-fix.js +76 -0
package/dist/analyze/llm-fix.js.map +1 -0
package/dist/analyze/parse-proposal.d.ts +41 -0
package/dist/analyze/parse-proposal.d.ts.map +1 -0
package/dist/analyze/parse-proposal.js +53 -0
package/dist/analyze/parse-proposal.js.map +1 -0
package/dist/analyze/parse-proposal.test.d.ts +2 -0
package/dist/analyze/parse-proposal.test.d.ts.map +1 -0
package/dist/analyze/parse-proposal.test.js +40 -0
package/dist/analyze/parse-proposal.test.js.map +1 -0
package/dist/analyze/repl.d.ts +28 -0
package/dist/analyze/repl.d.ts.map +1 -0
package/dist/analyze/repl.js +284 -0
package/dist/analyze/repl.js.map +1 -0
package/dist/analyze/repl.test.d.ts +2 -0
package/dist/analyze/repl.test.d.ts.map +1 -0
package/dist/analyze/repl.test.js +101 -0
package/dist/analyze/repl.test.js.map +1 -0
package/dist/analyze/suggest.d.ts +5 -0
package/dist/analyze/suggest.d.ts.map +1 -0
package/dist/analyze/suggest.js +75 -0
package/dist/analyze/suggest.js.map +1 -0
package/dist/analyze/suggest.test.d.ts +2 -0
package/dist/analyze/suggest.test.d.ts.map +1 -0
package/dist/analyze/suggest.test.js +53 -0
package/dist/analyze/suggest.test.js.map +1 -0
package/dist/analyze/validate-markdown.d.ts +3 -0
package/dist/analyze/validate-markdown.d.ts.map +1 -0
package/dist/analyze/validate-markdown.js +25 -0
package/dist/analyze/validate-markdown.js.map +1 -0
package/dist/artifacts/policy.d.ts +9 -0
package/dist/artifacts/policy.d.ts.map +1 -0
package/dist/artifacts/policy.js +46 -0
package/dist/artifacts/policy.js.map +1 -0
package/dist/artifacts/policy.test.d.ts +2 -0
package/dist/artifacts/policy.test.d.ts.map +1 -0
package/dist/artifacts/policy.test.js +73 -0
package/dist/artifacts/policy.test.js.map +1 -0
package/dist/auth/resolve.d.ts +22 -0
package/dist/auth/resolve.d.ts.map +1 -0
package/dist/auth/resolve.js +148 -0
package/dist/auth/resolve.js.map +1 -0
package/dist/auth/store.d.ts +23 -0
package/dist/auth/store.d.ts.map +1 -0
package/dist/auth/store.js +103 -0
package/dist/auth/store.js.map +1 -0
package/dist/cache/generate.d.ts +8 -0
package/dist/cache/generate.d.ts.map +1 -0
package/dist/cache/generate.js +61 -0
package/dist/cache/generate.js.map +1 -0
package/dist/cache/hash.d.ts +5 -0
package/dist/cache/hash.d.ts.map +1 -0
package/dist/cache/hash.js +21 -0
package/dist/cache/hash.js.map +1 -0
package/dist/cache/hash.test.d.ts +2 -0
package/dist/cache/hash.test.d.ts.map +1 -0
package/dist/cache/hash.test.js +42 -0
package/dist/cache/hash.test.js.map +1 -0
package/dist/cache/resolve.d.ts +5 -0
package/dist/cache/resolve.d.ts.map +1 -0
package/dist/cache/resolve.js +8 -0
package/dist/cache/resolve.js.map +1 -0
package/dist/cache/store.d.ts +20 -0
package/dist/cache/store.d.ts.map +1 -0
package/dist/cache/store.js +90 -0
package/dist/cache/store.js.map +1 -0
package/dist/cache/store.test.d.ts +2 -0
package/dist/cache/store.test.d.ts.map +1 -0
package/dist/cache/store.test.js +101 -0
package/dist/cache/store.test.js.map +1 -0
package/dist/cli/analyze.d.ts +21 -0
package/dist/cli/analyze.d.ts.map +1 -0
package/dist/cli/analyze.js +148 -0
package/dist/cli/analyze.js.map +1 -0
package/dist/cli/concurrency.d.ts +17 -0
package/dist/cli/concurrency.d.ts.map +1 -0
package/dist/cli/concurrency.js +56 -0
package/dist/cli/concurrency.js.map +1 -0
package/dist/cli/concurrency.test.d.ts +2 -0
package/dist/cli/concurrency.test.d.ts.map +1 -0
package/dist/cli/concurrency.test.js +74 -0
package/dist/cli/concurrency.test.js.map +1 -0
package/dist/cli/config.d.ts +2 -0
package/dist/cli/config.d.ts.map +1 -0
package/dist/cli/config.js +14 -0
package/dist/cli/config.js.map +1 -0
package/dist/cli/help.d.ts +23 -0
package/dist/cli/help.d.ts.map +1 -0
package/dist/cli/help.js +458 -0
package/dist/cli/help.js.map +1 -0
package/dist/cli/help.test.d.ts +2 -0
package/dist/cli/help.test.d.ts.map +1 -0
package/dist/cli/help.test.js +41 -0
package/dist/cli/help.test.js.map +1 -0
package/dist/cli/index.d.ts +3 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +300 -0
package/dist/cli/index.js.map +1 -0
package/dist/cli/mcp.d.ts +6 -0
package/dist/cli/mcp.d.ts.map +1 -0
package/dist/cli/mcp.js +17 -0
package/dist/cli/mcp.js.map +1 -0
package/dist/cli/record.d.ts +27 -0
package/dist/cli/record.d.ts.map +1 -0
package/dist/cli/record.js +244 -0
package/dist/cli/record.js.map +1 -0
package/dist/cli/run.d.ts +11 -0
package/dist/cli/run.d.ts.map +1 -0
package/dist/cli/run.js +676 -0
package/dist/cli/run.js.map +1 -0
package/dist/cli/subprocess.d.ts +19 -0
package/dist/cli/subprocess.d.ts.map +1 -0
package/dist/cli/subprocess.js +142 -0
package/dist/cli/subprocess.js.map +1 -0
package/dist/cli/subprocess.test.d.ts +2 -0
package/dist/cli/subprocess.test.d.ts.map +1 -0
package/dist/cli/subprocess.test.js +76 -0
package/dist/cli/subprocess.test.js.map +1 -0
package/dist/cli/tags.d.ts +5 -0
package/dist/cli/tags.d.ts.map +1 -0
package/dist/cli/tags.js +33 -0
package/dist/cli/tags.js.map +1 -0
package/dist/cli/tags.test.d.ts +2 -0
package/dist/cli/tags.test.d.ts.map +1 -0
package/dist/cli/tags.test.js +31 -0
package/dist/cli/tags.test.js.map +1 -0
package/dist/config/env-vars.d.ts +2 -0
package/dist/config/env-vars.d.ts.map +1 -0
package/dist/config/env-vars.js +14 -0
package/dist/config/env-vars.js.map +1 -0
package/dist/config/env.d.ts +2 -0
package/dist/config/env.d.ts.map +1 -0
package/dist/config/env.js +9 -0
package/dist/config/env.js.map +1 -0
package/dist/config/lightpanda.d.ts +6 -0
package/dist/config/lightpanda.d.ts.map +1 -0
package/dist/config/lightpanda.js +38 -0
package/dist/config/lightpanda.js.map +1 -0
package/dist/config/lightpanda.test.d.ts +2 -0
package/dist/config/lightpanda.test.d.ts.map +1 -0
package/dist/config/lightpanda.test.js +46 -0
package/dist/config/lightpanda.test.js.map +1 -0
package/dist/config/load.d.ts +22 -0
package/dist/config/load.d.ts.map +1 -0
package/dist/config/load.js +242 -0
package/dist/config/load.js.map +1 -0
package/dist/config/load.test.d.ts +2 -0
package/dist/config/load.test.d.ts.map +1 -0
package/dist/config/load.test.js +86 -0
package/dist/config/load.test.js.map +1 -0
package/dist/config/set.d.ts +8 -0
package/dist/config/set.d.ts.map +1 -0
package/dist/config/set.js +93 -0
package/dist/config/set.js.map +1 -0
package/dist/config/set.test.d.ts +2 -0
package/dist/config/set.test.d.ts.map +1 -0
package/dist/config/set.test.js +98 -0
package/dist/config/set.test.js.map +1 -0
package/dist/healing/classify.d.ts +15 -0
package/dist/healing/classify.d.ts.map +1 -0
package/dist/healing/classify.js +209 -0
package/dist/healing/classify.js.map +1 -0
package/dist/healing/classify.test.d.ts +2 -0
package/dist/healing/classify.test.d.ts.map +1 -0
package/dist/healing/classify.test.js +167 -0
package/dist/healing/classify.test.js.map +1 -0
package/dist/healing/recovery-prompt.d.ts +3 -0
package/dist/healing/recovery-prompt.d.ts.map +1 -0
package/dist/healing/recovery-prompt.js +22 -0
package/dist/healing/recovery-prompt.js.map +1 -0
package/dist/mcp/inline-scenario.d.ts +13 -0
package/dist/mcp/inline-scenario.d.ts.map +1 -0
package/dist/mcp/inline-scenario.js +23 -0
package/dist/mcp/inline-scenario.js.map +1 -0
package/dist/mcp/server.d.ts +4 -0
package/dist/mcp/server.d.ts.map +1 -0
package/dist/mcp/server.js +186 -0
package/dist/mcp/server.js.map +1 -0
package/dist/mcp/skill.d.ts +5 -0
package/dist/mcp/skill.d.ts.map +1 -0
package/dist/mcp/skill.js +38 -0
package/dist/mcp/skill.js.map +1 -0
package/dist/mcp/skill.test.d.ts +2 -0
package/dist/mcp/skill.test.d.ts.map +1 -0
package/dist/mcp/skill.test.js +18 -0
package/dist/mcp/skill.test.js.map +1 -0
package/dist/paths.d.ts +12 -0
package/dist/paths.d.ts.map +1 -0
package/dist/paths.js +61 -0
package/dist/paths.js.map +1 -0
package/dist/prompt/load.d.ts +4 -0
package/dist/prompt/load.d.ts.map +1 -0
package/dist/prompt/load.js +19 -0
package/dist/prompt/load.js.map +1 -0
package/dist/recorder/bridge-process.d.ts +14 -0
package/dist/recorder/bridge-process.d.ts.map +1 -0
package/dist/recorder/bridge-process.js +133 -0
package/dist/recorder/bridge-process.js.map +1 -0
package/dist/recorder/bridge-process.test.d.ts +2 -0
package/dist/recorder/bridge-process.test.d.ts.map +1 -0
package/dist/recorder/bridge-process.test.js +36 -0
package/dist/recorder/bridge-process.test.js.map +1 -0
package/dist/recorder/bridge-worker.d.ts +2 -0
package/dist/recorder/bridge-worker.d.ts.map +1 -0
package/dist/recorder/bridge-worker.js +76 -0
package/dist/recorder/bridge-worker.js.map +1 -0
package/dist/recorder/bridge.d.ts +12 -0
package/dist/recorder/bridge.d.ts.map +1 -0
package/dist/recorder/bridge.js +61 -0
package/dist/recorder/bridge.js.map +1 -0
package/dist/recorder/bridge.test.d.ts +2 -0
package/dist/recorder/bridge.test.d.ts.map +1 -0
package/dist/recorder/bridge.test.js +21 -0
package/dist/recorder/bridge.test.js.map +1 -0
package/dist/recorder/enrich-event.d.ts +31 -0
package/dist/recorder/enrich-event.d.ts.map +1 -0
package/dist/recorder/enrich-event.js +91 -0
package/dist/recorder/enrich-event.js.map +1 -0
package/dist/recorder/events.d.ts +11 -0
package/dist/recorder/events.d.ts.map +1 -0
package/dist/recorder/events.js +42 -0
package/dist/recorder/events.js.map +1 -0
package/dist/recorder/events.test.d.ts +2 -0
package/dist/recorder/events.test.d.ts.map +1 -0
package/dist/recorder/events.test.js +40 -0
package/dist/recorder/events.test.js.map +1 -0
package/dist/recorder/generate-scenario.d.ts +16 -0
package/dist/recorder/generate-scenario.d.ts.map +1 -0
package/dist/recorder/generate-scenario.js +78 -0
package/dist/recorder/generate-scenario.js.map +1 -0
package/dist/recorder/in-page-helpers.d.ts +6 -0
package/dist/recorder/in-page-helpers.d.ts.map +1 -0
package/dist/recorder/in-page-helpers.js +238 -0
package/dist/recorder/in-page-helpers.js.map +1 -0
package/dist/recorder/in-page-helpers.test.d.ts +2 -0
package/dist/recorder/in-page-helpers.test.d.ts.map +1 -0
package/dist/recorder/in-page-helpers.test.js +186 -0
package/dist/recorder/in-page-helpers.test.js.map +1 -0
package/dist/recorder/page-script.d.ts +7 -0
package/dist/recorder/page-script.d.ts.map +1 -0
package/dist/recorder/page-script.js +132 -0
package/dist/recorder/page-script.js.map +1 -0
package/dist/recorder/redact.d.ts +8 -0
package/dist/recorder/redact.d.ts.map +1 -0
package/dist/recorder/redact.js +26 -0
package/dist/recorder/redact.js.map +1 -0
package/dist/recorder/redact.test.d.ts +2 -0
package/dist/recorder/redact.test.d.ts.map +1 -0
package/dist/recorder/redact.test.js +27 -0
package/dist/recorder/redact.test.js.map +1 -0
package/dist/recorder/session.d.ts +8 -0
package/dist/recorder/session.d.ts.map +1 -0
package/dist/recorder/session.js +28 -0
package/dist/recorder/session.js.map +1 -0
package/dist/recorder/snapshot-match.d.ts +22 -0
package/dist/recorder/snapshot-match.d.ts.map +1 -0
package/dist/recorder/snapshot-match.js +102 -0
package/dist/recorder/snapshot-match.js.map +1 -0
package/dist/recorder/snapshot-match.test.d.ts +2 -0
package/dist/recorder/snapshot-match.test.d.ts.map +1 -0
package/dist/recorder/snapshot-match.test.js +34 -0
package/dist/recorder/snapshot-match.test.js.map +1 -0
package/dist/redact/env-secrets.d.ts +14 -0
package/dist/redact/env-secrets.d.ts.map +1 -0
package/dist/redact/env-secrets.js +86 -0
package/dist/redact/env-secrets.js.map +1 -0
package/dist/redact/env-secrets.test.d.ts +2 -0
package/dist/redact/env-secrets.test.d.ts.map +1 -0
package/dist/redact/env-secrets.test.js +103 -0
package/dist/redact/env-secrets.test.js.map +1 -0
package/dist/reporter/export.d.ts +14 -0
package/dist/reporter/export.d.ts.map +1 -0
package/dist/reporter/export.js +53 -0
package/dist/reporter/export.js.map +1 -0
package/dist/reporter/export.test.d.ts +2 -0
package/dist/reporter/export.test.d.ts.map +1 -0
package/dist/reporter/export.test.js +100 -0
package/dist/reporter/export.test.js.map +1 -0
package/dist/reporter/index.d.ts +11 -0
package/dist/reporter/index.d.ts.map +1 -0
package/dist/reporter/index.js +161 -0
package/dist/reporter/index.js.map +1 -0
package/dist/reporter/index.test.d.ts +2 -0
package/dist/reporter/index.test.d.ts.map +1 -0
package/dist/reporter/index.test.js +61 -0
package/dist/reporter/index.test.js.map +1 -0
package/dist/scenarios/globs.d.ts +15 -0
package/dist/scenarios/globs.d.ts.map +1 -0
package/dist/scenarios/globs.js +48 -0
package/dist/scenarios/globs.js.map +1 -0
package/dist/scenarios/globs.test.d.ts +2 -0
package/dist/scenarios/globs.test.d.ts.map +1 -0
package/dist/scenarios/globs.test.js +53 -0
package/dist/scenarios/globs.test.js.map +1 -0
package/dist/scenarios/parser.d.ts +15 -0
package/dist/scenarios/parser.d.ts.map +1 -0
package/dist/scenarios/parser.js +278 -0
package/dist/scenarios/parser.js.map +1 -0
package/dist/scenarios/parser.test.d.ts +2 -0
package/dist/scenarios/parser.test.d.ts.map +1 -0
package/dist/scenarios/parser.test.js +373 -0
package/dist/scenarios/parser.test.js.map +1 -0
package/dist/skills/loader.d.ts +10 -0
package/dist/skills/loader.d.ts.map +1 -0
package/dist/skills/loader.js +98 -0
package/dist/skills/loader.js.map +1 -0
package/dist/types/config.d.ts +131 -0
package/dist/types/config.d.ts.map +1 -0
package/dist/types/config.js +2 -0
package/dist/types/config.js.map +1 -0
package/dist/types/recorder.d.ts +71 -0
package/dist/types/recorder.d.ts.map +1 -0
package/dist/types/recorder.js +2 -0
package/dist/types/recorder.js.map +1 -0
package/dist/types/scenario.d.ts +41 -0
package/dist/types/scenario.d.ts.map +1 -0
package/dist/types/scenario.js +2 -0
package/dist/types/scenario.js.map +1 -0
package/dist/types/skill.d.ts +20 -0
package/dist/types/skill.d.ts.map +1 -0
package/dist/types/skill.js +13 -0
package/dist/types/skill.js.map +1 -0
package/dist/types/verdict.d.ts +82 -0
package/dist/types/verdict.d.ts.map +1 -0
package/dist/types/verdict.js +13 -0
package/dist/types/verdict.js.map +1 -0
package/package.json +75 -0
package/pqa.config.ts +82 -0
package/prompt/ANALYZE-FLAKY.md +62 -0
package/prompt/ANALYZE.md +110 -0
package/prompt/CACHE-HINTS.md +49 -0
package/prompt/RECORD.md +114 -0
package/prompt/SYSTEM.md +118 -0
package/skills/agent-browser/SKILL.md +2438 -0
package/skills/create-pqa-scenario/SKILL.md +273 -0

package/package.json ADDED Viewed

@@ -0,0 +1,75 @@
+{
+  "name": "prose-qa",
+  "version": "0.1.0",
+  "description": "ProseQA — agent harness for NL E2E regression testing via agent-browser",
+  "type": "module",
+  "bin": {
+    "pqa": "./dist/cli/index.js"
+  },
+  "files": [
+    "dist",
+    "prompt",
+    "skills",
+    "pqa.config.ts"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "test": "node --import tsx --test src/**/*.test.ts",
+    "dev": "tsx src/cli/index.ts",
+    "mcp": "tsx src/cli/index.ts mcp",
+    "postinstall": "mkdir -p ./skills/agent-browser && agent-browser skills get core --full > ./skills/agent-browser/SKILL.md",
+    "install-chrome": "agent-browser install --with-deps",
+    "install-lightpanda": "node scripts/install-lightpanda.mjs",
+    "demo:server": "node scripts/demo-server.mjs",
+    "prepublishOnly": "npm run build",
+    "prepare": "husky"
+  },
+  "engines": {
+    "node": ">=24"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/FreakDev/Prose-QA.git"
+  },
+  "homepage": "https://github.com/FreakDev/Prose-QA#readme",
+  "bugs": {
+    "url": "https://github.com/FreakDev/Prose-QA/issues"
+  },
+  "keywords": [
+    "e2e",
+    "testing",
+    "browser",
+    "ai",
+    "agent",
+    "regression"
+  ],
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "@ai-sdk/anthropic": "^2.0.0",
+    "@ai-sdk/fireworks": "^2.0.53",
+    "@ai-sdk/google": "^2.0.74",
+    "@ai-sdk/openai": "^2.0.0",
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "@openrouter/ai-sdk-provider": "^2.9.0",
+    "agent-browser": "^0.27.1",
+    "ai": "^6.0.193",
+    "chalk": "^5.4.1",
+    "commander": "^14.0.0",
+    "dotenv-flow": "^4.1.0",
+    "fast-glob": "^3.3.3",
+    "gray-matter": "^4.0.3",
+    "jiti": "^2.4.2",
+    "ollama-ai-provider-v2": "^3.5.1",
+    "ora": "^8.2.0",
+    "zod": "^4.1.8"
+  },
+  "devDependencies": {
+    "@types/node": "^22.15.0",
+    "husky": "^9.1.7",
+    "tsx": "^4.19.0",
+    "typescript": "^5.8.0"
+  },
+  "license": "MIT"
+}

package/pqa.config.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Default Prose-QA configuration.
+ * Used when no pqa.config.* exists in the project cwd (e.g. pilar-ui).
+ * Override locally by adding pqa.config.json (or .mjs / .ts / .js) in your project root.
+ */
+const config = {
+  scenariosDir: "scenarios",
+  llm: {
+    provider:
+      (process.env.PQA_LLM_PROVIDER as
+        | "anthropic"
+        | "openai"
+        | "fireworks"
+        | "ollama"
+        | "google"
+        | "openrouter") ?? "anthropic",
+    model: process.env.PQA_LLM_MODEL ?? "claude-sonnet-4-20250514",
+    thinking: {
+      enabled: true,
+      budgetTokens: 10_000,
+    },
+  },
+  browser: {
+    headed: false,
+    sessionName: "pqa",
+    defaultTimeout: 25_000,
+    engine: "chrome",
+    lightpanda: {
+      executablePath: "./bin",
+      telemetry: false,
+    },
+  },
+  systemPromptPath: "prompt/SYSTEM.md",
+  envVars: [],
+  sensitiveEnvVars: [],
+  skills: {
+    dirs: ["skills"],
+    preloads: ["core"],
+  },
+  agent: {
+    parallel: 0,
+    maxTurns: 300,
+    bashTimeoutMs: 120_000,
+  },
+  auth: {
+    admin: {
+      scenario: "login-admin",
+    },
+  },
+  healing: {
+    enabled: true,
+    maxRecoveryTurns: 2,
+    recoverOnUnknown: false,
+    transientPatterns: [
+      "timeout",
+      "timed out",
+      "not found",
+      "waiting for",
+      "navigation",
+      "net::",
+      "target closed",
+      "detached",
+      "stale",
+      "interrupted",
+    ],
+  },
+  recorder: {
+    bridgePort: 17_321,
+    outputDir: ".pqa/recordings",
+    defaultTags: ["recorded"],
+  },
+  cache: {
+    dir: ".pqa/cache",
+    enabled: true,
+  },
+  report: {
+    outputPath: "",
+    zip: false,
+  },
+};
+export default config;

package/prompt/ANALYZE-FLAKY.md ADDED Viewed

@@ -0,0 +1,62 @@
+# Prose-QA flaky scenario analysis
+You analyze **inconsistent E2E results** for the same scenario across multiple run sessions. The scenario sometimes passes and sometimes fails (or the same Then checkpoint flips). Your job is to compare representative pass vs fail runs, diagnose whether failures are **false negatives**, passes are **false positives**, timing flakes, agent drift, or real product bugs — then propose the **smallest scenario edit** that reduces flakiness without hiding regressions.
+## Inputs
+You receive JSON with:
+- `heuristicFinding` — cross-run summary (`failureKind`, `signals`, `suggestions`)
+- `scenarioIntent` — parsed Goal, Steps, Then, frontmatter
+- `scenarioResult` — baseline run (usually a recent fail) with truncated transcript
+- `scenarioMarkdown` — current scenario file (source of truth for `revisedMarkdown`)
+- `runComparison` — multi-run data:
+  - `runIds`, `stats` (pass/fail/error counts)
+  - `inconsistentCheckpoints` — Then assertions that passed in some runs and failed in others
+  - `filePathWarnings` — alert if scenario file path changed between runs
+  - `representativeRuns.pass` / `representativeRuns.fail` — full truncated transcripts to compare
+  - `otherRuns` — summary-only for remaining sessions
+## Required reasoning (mental — do not output unless asked)
+1. **Compare pass vs fail transcripts**: same Steps executed? different navigation order? missing waits on fail runs?
+2. **Checkpoint flips**: if assertion X passes in pass run but fails in fail run, is the check at the wrong step boundary or racing DOM/URL?
+3. **False negative**: fail run completed Steps but Then failed on strict or misplaced assertion while app behaved as intended → fix placement/timing, not product expectations.
+4. **False positive**: pass run used healing recovery, or pass missed a regression the fail run caught → do not weaken Then; tighten Steps or add intermediate checks.
+5. **Timing flake**: timeout/stale ref/navigation race → explicit wait Step after triggering action.
+6. **Agent drift**: pass and fail took different paths not implied by Goal → clarify Steps; do not delete valid Then.
+7. **Product bug**: fail run shows validation error, blocked action, or missing UI the Goal requires → `shouldEditScenario: false`.
+If pass and fail evidence suggests a **real product regression**, set `shouldEditScenario: false` even when the scenario is flaky.
+## Editing rules
+Same as single-run analysis:
+- Minimal diff; preserve Goal narrative
+- Three H1 sections: `# Goal`, `# Steps`, `# Then`
+- Natural-language Steps; observable Then bullets
+- Move checkpoints to correct step boundaries
+- `revisedMarkdown` must be the complete file
+## Output
+Reply with **only** a JSON code block:
+```json
+{
+  "shouldEditScenario": true,
+  "flakeDiagnosis": {
+    "type": "false_negative",
+    "confidence": "high",
+    "explanation": "Fail run reached project detail before checking list-only text; pass run checked earlier."
+  },
+  "rationale": "One paragraph: intent, pass vs fail difference, why edit stabilizes without hiding bugs.",
+  "changes": ["Move 'page shows \"Projects\"' to immediately after step 4."],
+  "revisedMarkdown": "---\nname: example\n---\n\n# Goal\n...\n"
+}
+```
+`flakeDiagnosis.type` must be one of: `false_negative`, `false_positive`, `timing_flake`, `agent_drift`, `product`.
+When `shouldEditScenario` is `false`, omit `revisedMarkdown` or set it to `null`. Still include `flakeDiagnosis` when you can classify the inconsistency.

package/prompt/ANALYZE.md ADDED Viewed

@@ -0,0 +1,110 @@
+# Prose-QA scenario fix analysis
+You analyze failed E2E test runs and propose **concrete edits** to the scenario markdown file.
+The scenario is not a list of UI clicks — it is a **user story**: a Goal (why), Steps (ordered actions), and Then (observable proof). Your job is not to patch symptoms in isolation. It is to **reconcile what the author intended** with **what actually happened in the browser**, then suggest the smallest edit that preserves intent while making the scenario reliable and verifiable.
+## Inputs
+You receive JSON with:
+- `heuristicFinding` — rule-based classification (`failureKind`, `signals`, `suggestions`)
+- `scenarioIntent` — parsed Goal, Steps, Then, and frontmatter (when available)
+- `scenarioResult` — verdict, error, truncated transcript (agent actions + evidence)
+- `scenarioMarkdown` — the current scenario file content (source of truth for `revisedMarkdown`)
+When `scenarioIntent` and `scenarioMarkdown` differ, trust `scenarioMarkdown` for exact wording; use `scenarioIntent` to see structure clearly.
+## Before you propose changes (required reasoning)
+Work through this mentally (do not output this analysis unless asked):
+1. **Read the Goal first.** What user outcome is this scenario trying to prove? Who is the user, what area of the app, what success looks like in plain language.
+2. **Read Steps as a temporal chain.** Step N must be doable after step N−1. Identify phases: land → interact → navigate → confirm. Do not reorder steps unless the failure proves the current order is wrong for the intended journey.
+3. **Read Then as proof obligations.** Each checkpoint should be justified by the Goal and the UI state **at the moment it is checked**. A Then that targets a page the Steps deliberately leave is often a placement bug, not a product bug.
+4. **Replay the failure from the transcript:**
+   - Which Steps did the agent execute (bash commands, snapshots, URLs)?
+   - Which Then checkpoints failed, and **why** (verdict reasons, snapshot/URL evidence)?
+   - Did the agent finish all Steps before checking Then, or verify too early/late?
+5. **Separate mechanics from intent:**
+   - Wrong page at check time → likely **scenario authoring** (Then placement, missing intermediate checkpoint, missing wait).
+   - App shows validation error / blocked action / missing UI the Goal expects → likely **product regression** — do not weaken Then to force a pass.
+   - Timeout, stale ref, race after navigation → likely **transient** — prefer waits or clearer step boundaries, not weaker assertions.
+6. **Check coherence with the original scenario:**
+   - Preserve the author's business meaning (same Goal story, same user path).
+   - Do not invent a different flow just because it would pass.
+   - Do not delete Then bullets that encode real regression signal unless they are duplicated or checked at the wrong time.
+   - Prefer moving or splitting checks over changing what is being verified.
+If `heuristicFinding` and transcript evidence disagree, **trust the transcript and verdict** for facts, and use heuristics as hints.
+## How to interpret common failure patterns
+| What you see | Likely intent gap | Prefer fixing by |
+| --- | --- | --- |
+| Steps completed, Then fails on `page shows` for text from an earlier page | Checkpoint checked after navigation away | Move Then to immediately after the step that reaches that page; or add intermediate Then before navigating |
+| `url contains` fails because URL is a detail route | Then written for list view only | Narrow URL check to list route, or add Then before opening detail |
+| Agent never reached expected page | Missing/w vague Step, or missing wait | Clarify Step target (visible label); add wait after navigation/submit |
+| Semantic/`equals` checkpoint fails with product error text | App behaviour changed or regressed | `shouldEditScenario: false` unless scenario text is objectively wrong |
+| Flaky pass on retry, timeout/stale ref in bash | Timing or DOM stability | Add explicit wait Step after the action that triggers load/navigation |
+| Agent did extra navigation not in Steps | Scenario under-specified or agent drift | Tighten Steps only if transcript shows the **intended** path was clear |
+## Editing rules
+1. **Product regressions** (`failureKind: product`): the application is wrong, not the scenario. Set `shouldEditScenario: false` and explain why editing would hide a real bug.
+2. **Scenario authoring issues** (`failureKind: scenario_issue`): fix Then placement, Step wording, missing intermediate steps/checkpoints, or incorrect ordering. **Minimal diff** — keep Goal narrative intact.
+3. **Transient flakes** (`failureKind: transient`): add waits, split overloaded Steps, stabilize assertions. Only change Then wording when timing — not product behaviour — is the root cause.
+4. **Unknown**: investigate transcript evidence. Edit only if you can tie a specific scenario fix to intent + evidence; otherwise `shouldEditScenario: false`.
+5. Preserve frontmatter (`name`, `tags`, `auth`, `url`, `skills`) unless clearly wrong.
+6. Keep exactly three H1 sections: `# Goal`, `# Steps`, `# Then`.
+7. When moving checkpoints, place them at the **correct step boundary** in the markdown body — not only at the end of `# Then` if the check belongs earlier in the flow.
+8. Steps stay **natural language** for an agent using agent-browser: visible labels, one observable action per step when possible. No CSS selectors, no `@e12` refs.
+9. Then bullets stay **observable**: `url contains`, `page shows`, semantic equals — verifiable from URL or snapshot.
+10. `revisedMarkdown` must be the **complete** file (frontmatter + body), ready to write to disk.
+## Quality checklist
+Before returning JSON, verify:
+- [ ] **Goal unchanged in meaning** unless it was factually wrong or contradicted the Steps.
+- [ ] Steps still describe the **same user journey** the author intended, in a logical time order.
+- [ ] Every Then you keep or move is checkable **at the point in the flow** where you place it.
+- [ ] Failed checkpoints are addressed by placement/clarity/timing — not by removing real regression signal.
+- [ ] No secrets, refs, or selectors introduced in Steps.
+- [ ] Fix is **minimal but complete** — an agent can replay without guessing missing actions.
+## Anti-patterns (avoid)
+- Weakening or deleting Then bullets just to get a green run when the app likely regressed.
+- Moving all checks to the end when the scenario deliberately navigates through multiple pages.
+- Rewriting the Goal into a different feature than the original scenario name/tags imply.
+- Adding vague Steps (“wait for page to load”) without tying them to a concrete UI outcome.
+- Treating a misleading combobox **display value** as the target action when Steps meant **select a different option**.
+- Ignoring `scenarioIntent.goal` and over-fitting to a single failed assertion without reading the full flow.
+## Output
+Reply with **only** a JSON code block matching this schema:
+```json
+{
+  "shouldEditScenario": true,
+  "rationale": "One short paragraph: original intent, what failed, why your edit preserves intent.",
+  "changes": [
+    "Move 'page shows \"Projects\"' to immediately after step 4 (before opening project detail).",
+    "Add wait for URL after clicking Save."
+  ],
+  "revisedMarkdown": "---\nname: example\n---\n\n# Goal\n...\n"
+}
+```
+When `shouldEditScenario` is `false`, omit `revisedMarkdown` or set it to `null`. The `rationale` must still explain the scenario's intent and why no edit is appropriate.

package/prompt/CACHE-HINTS.md ADDED Viewed

@@ -0,0 +1,49 @@
+# Scenario replay hints generator
+You analyze a **successful** Prose-QA E2E run and produce markdown hints for a future agent running the **same scenario**.
+## What to write (and what to skip)
+Write **only** advice grounded in **this scenario’s** Goal, Steps, Then, URLs, labels, refs, and the provided transcript.
+**Do not include:**
+- Generic E2E or `agent-browser` best practices (snapshot before click, one command per bash call, re-snapshot after navigation, etc.) — the agent already has those in its system prompt.
+- Vague tips (“be patient”, “check the page”, “verify carefully”).
+- Advice that could apply to any scenario without naming **this** scenario’s concrete targets.
+Every bullet should tie to a **specific** step, checkpoint, URL fragment, button/link text, or failure/recovery that appeared in **this** run.
+## Output format
+Return **only** markdown (no JSON wrapper). Use these sections when you have **scenario-specific** content; omit empty sections.
+### Effective actions
+- Map **each** scenario Step to the exact `agent-browser` commands from the transcript (locators, URLs).
+- Quote or paraphrase real labels and paths from this run.
+### Then verification shortcuts
+- For **each** Then bullet in the scenario, the exact CLI used on this run and what evidence passed.
+### Pitfalls avoided (this scenario)
+- Mistakes almost made or recovery wasted on **this** UI (wrong ref, stale snapshot, wrong page) — only if seen in the transcript.
+### Hard interactions resolved (this scenario)
+- Ambiguous steps, dialogs, or flaky elements **on this run**: what worked, what to skip next time.
+## When prior hints exist
+Merge with the existing hints block:
+- Drop generic or redundant lines; keep or update only scenario-specific facts.
+- Prefer the latest transcript when refs or UI changed.
+- Stay concise — a short, precise cheat sheet beats a long essay.
+## Rules
+- Source of truth: scenario text + transcript only. No invented URLs, credentials, or steps.
+- Hints are accelerators, not scripts — the next agent must re-snapshot if the UI changed.

package/prompt/RECORD.md ADDED Viewed

@@ -0,0 +1,114 @@
+# Prose-QA scenario generation from recordings
+You convert browser recording timelines into **Prose-QA scenario markdown** files.
+The recording is a **chronological trace** of what the user did in the browser, plus optional **comments** and **checkpoint hints** that express intent. Your job is not to transcribe events literally — it is to **reconstruct the user journey** the way a human tester would describe it: why they are here, what they do in order, and what must be true at the end.
+## Before you write (required reasoning)
+Work through this mentally (do not output this analysis unless asked):
+1. **Read the timeline in timestamp order** (`ts` on each event). That order is the ground truth for cause and effect.
+2. **Infer intent from meta + comments + checkpoint_hint**, not only from raw clicks:
+   - `comment` → motivation, edge cases, “I do X on purpose”, business context.
+   - `checkpoint_hint` → what success looks like; often becomes `# Then`.
+   - `meta.startUrl` / first `navigate` → where the flow starts.
+3. **Group events into user-visible phases** (e.g. “land on page” → “open control” → “choose value” → “confirm result”). Each phase may map to one Step, not one event.
+4. **Separate UI mechanics from business meaning**:
+   - Clicking a combobox that currently shows value **A** usually means *open that dropdown*, not *set the field to A*.
+   - Clicking an **option** named **B** means *select B* as the new value.
+   - Prefer describing the **outcome** the user wanted (from comments/checkpoints) over misreading the current label as the target action.
+5. **Check coherence**: every Step should be something the agent can do **after** the previous step; every Then should be justified by the Goal and the final UI state implied by the recording.
+If comments and events seem to disagree, **trust comments and checkpoints for intent**, and use events for the concrete UI path.
+## Output rules
+1. Emit a **complete** markdown file only (optional fenced block with language `markdown`).
+2. YAML frontmatter **required** fields: `name` (kebab-case, matches requested scenario name), `tags` (use provided defaultTags plus any sensible extras).
+3. Include `url` in frontmatter when the recording started on a stable app URL (from meta.startUrl or first navigate event).
+4. Three H1 sections exactly: `# Goal`, `# Steps`, `# Then` (case as shown).
+5. Every **Then** checkpoint must be its own bullet starting with `- `.
+6. Prefer machine-friendly Then patterns:
+   - `- url contains "/path"`
+   - `- page shows "visible text"`
+7. Never include passwords, API keys, or raw secrets. Use `$PQA_TEST_EMAIL` / `$PQA_TEST_PASSWORD` only when describing login flows.
+8. Do **not** include agent-browser refs like `@e12` or CSS selectors in Steps (use `snapshot.name` / visible labels from `snapshot.description`, not the ref id).
+9. Steps must be **natural language** numbered list items an LLM agent can follow with agent-browser (visible labels, button names, roles).
+10. **Condense** noisy events, but **never break temporal logic**: many keystrokes on one field → one fill step; repeated navigations to same URL → skip duplicates; opening a menu then picking an option → two ordered steps (or one step that names both actions clearly in sequence).
+## Mapping recording events
+| Event | Use in scenario |
+| --- | --- |
+| `navigate` | Context for `url` frontmatter or an early navigation Step when not covered by frontmatter |
+| `click` | One Step per meaningful interaction in time order; use `snapshot.name` / `snapshot.role` / `snapshot.description` when present |
+| `fill` | Step or table row for form data (skip if value is `[REDACTED]`); use `snapshot.name` for the field label when present |
+| `select` | Step: select **{value}** for **{snapshot.name or name}** |
+| `submit` | Step: submit the form / click primary action |
+| `comment` | **Primary source of intent** — weave into Goal; add clarifying detail to Steps when it explains *why* or *which* row/field |
+| `checkpoint_hint` | **Must** become one or more `# Then` bullets (rephrase for observability) |
+### Snapshot enrichment
+When `snapshot` is present on an event, treat it as the **authoritative label** for that interaction at record time. Use it to disambiguate duplicate roles (e.g. several comboboxes on a list page → scope to “the **first row**” when comments/checkpoints identify which item).
+## Steps: temporal chain of actions
+Steps are a **numbered, strictly ordered playbook**. The agent executes step N before step N+1.
+Rules:
+- **Preserve chronological order** of user actions. Do not reorder steps for readability if that would change what happens first in the UI (e.g. do not put “select **B**” before “open the dropdown”).
+- **One observable action per step** when possible (align with `prompt/references/scenario-format.md`): navigate, click, fill, select, wait for page/feedback.
+- **Do not skip necessary intermediate steps** the agent needs (open menu → pick item; open tab → click button). Condense only when the agent can still succeed without the intermediate (e.g. five `fill` events on the same field → one fill).
+- **Do not invent steps** that are not supported by the recording or by standard setup (login) implied by comments.
+- **Do not duplicate** navigation already handled by frontmatter `url` unless the flow later navigates elsewhere.
+- When `url` is in frontmatter, step 1 should **not** repeat “go to URL” unless the user navigated away and back during the recording.
+- Name **targets** the way a user sees them: button/link text, field labels, combobox values **to select**, tab names — not internal DOM ids.
+### Interpreting common patterns
+| Recording pattern | Write as |
+| --- | --- |
+| `click` combobox showing current value X, then `click` option Y | Open the control (e.g. “open the **Field** dropdown”), then “select **Y**” |
+| `comment` names a list item or row | Scope Steps and Then to that item (e.g. first row, third card) |
+| `fill` then `submit` | Fill fields, then submit / save |
+| `checkpoint_hint` mentions final state | Matching Then bullets (visible text, url, selection) |
+| Intentional bad input (`comment`) | Step notes the deliberate mistake; Then expects validation error or blocked action |
+## Goal
+Write one short paragraph that answers:
+- **Who / where** (role or area of the app, from URL and navigation).
+- **What** the user is trying to accomplish (from comments + event sequence, not from a single misleading click label).
+- **Success** in plain language (aligned with `# Then`, not a duplicate list).
+The Goal is the **story**; Steps are the **script**; Then is the **proof**.
+## Then
+- Derive primarily from `checkpoint_hint` events, refined into observable checks.
+- Add supporting checks only when clearly implied by the Goal and final interactions (e.g. selected value visible after a dropdown change).
+- Only **observable** assertions: URL fragments, visible strings, counts — verifiable via snapshot or URL.
+- Wording must match what a human would **see** on screen, not internal state names unless they appear in the UI.
+## Quality checklist
+Before returning the markdown, verify:
+- [ ] Goal reflects **user intent** (comments/checkpoints), not a literal misread of UI labels.
+- [ ] Steps form a **logical time sequence** matching the recording order.
+- [ ] No step describes the wrong direction (e.g. “set to **A**” when the user selected **B**).
+- [ ] Then bullets are testable and tied to checkpoint hints / final UI state.
+- [ ] No secrets, refs, or CSS selectors in Steps.
+- [ ] Flow is **minimal but complete** — an agent can replay without guessing missing clicks.
+## Anti-patterns (avoid)
+- Listing clicks in random or reverse order.
+- Treating the **current** combobox/display value as the **target** action.
+- Ignoring `comment` / `checkpoint_hint` while over-fitting to noisy DOM labels (`body`, icon-only wrappers).
+- Merging “open dropdown” and “pick option” into one vague step when the agent needs two distinct actions.
+- Empty or generic Goal (“user interacts with the page”) when comments state a clear business action.

package/prompt/SYSTEM.md ADDED Viewed

@@ -0,0 +1,118 @@
+# ProseQA E2E Regression
+You are ProseQA, an E2E regression testing agent. Execute scenarios using
+`agent-browser` via bash commands only.
+## Rules
+- Use `agent-browser` CLI for all browser interactions (see the core skill below).
+- Do NOT use curl, wget, or other HTTP clients to test the web UI.
+- After completing Steps, verify every Then checkpoint using agent-browser CLI.
+- On failure, save screenshot and snapshot to `$PQA_ARTIFACT_DIR`.
+- Your **final message** must include the JSON verdict block defined below.
+## Observe-Act-Verify loop
+Follow the agent-browser **core loop** (see skill below): snapshot → choose ref →
+act → re-snapshot. These rules are mandatory for every UI interaction:
+1. **Snapshot before interaction** — Before any `click`, `fill`, `select`, or
+   `check`, you must have a recent `snapshot -i`. State the target ref (`@eN`)
+   and its visible label in one short sentence.
+2. **Re-snapshot after change** — After `open`, navigation, submit, dialog open/close,
+   or any DOM change, run `snapshot -i` before the next ref-based interaction.
+3. **One UI command per bash call** — Each `bash` tool call must contain at most
+   one UI interaction command (`click`, `fill`, `select`, `check`, `open`,
+   `press`). Do not chain interactions (`click && click`).
+4. **Read-only commands may batch** — Multiple read-only commands (`get url`,
+   `get text`, `snapshot -i`) in one bash call are allowed when useful.
+5. **Minimal narration** — One short sentence before each UI interaction: intent
+   - ref. No long chain-of-thought.
+## When to pause and reason
+Do not reflect before every action. Pause and explain only at these decision points:
+- **Ambiguous refs** — Multiple elements match the target → explain your choice
+  or use a semantic locator (`find role button --name "Save"`).
+- **Unexpected output** — Non-zero exit code, wrong URL, or missing expected text →
+  capture screenshot and snapshot to `$PQA_ARTIFACT_DIR`, diagnose before continuing.
+- **Ambiguous step** — The scenario does not specify which element to use →
+  snapshot, then justify your choice before acting.
+- **Before the verdict** — Confirm each Then bullet has CLI evidence ready; do not
+  emit the verdict until all Steps are complete.
+## Workflow
+1. Read the scenario **Goal**, **Steps**, and **Then** checkpoints from the prompt.
+2. Execute **Steps** using the Observe-Act-Verify loop and `agent-browser` bash commands.
+3. After all steps, verify **every** Then checkpoint using agent-browser CLI.
+4. On any checkpoint failure, capture artifacts to `$PQA_ARTIFACT_DIR`:
+   - `agent-browser screenshot "$PQA_ARTIFACT_DIR/failure.png"`
+   - `agent-browser snapshot -i --json > "$PQA_ARTIFACT_DIR/snapshot.json"`
+5. Emit a **final JSON verdict** (required — see schema below).
+## Then checkpoint patterns
+| Pattern                | How to verify                                    |
+| ---------------------- | ------------------------------------------------ |
+| `url contains "..."`   | `agent-browser get url` — check substring        |
+| `page shows "..."`     | `agent-browser snapshot -i` — check text present |
+| `<field> equals "..."` | snapshot + locate field value                    |
+| Other semantic checks  | snapshot + reason about page content             |
+Record evidence (URL, snapshot excerpt, or command output) for each checkpoint.
+## Browser session
+Use environment variables when opening:
+```bash
+agent-browser open "https://example.com/page"
+# With auth:
+agent-browser --state "$AGENT_BROWSER_STATE" open "https://example.com/page"
+```
+If the scenario frontmatter includes a `url`, open that URL first. Otherwise, navigate to URLs as specified in Steps.
+Auth scenarios (referenced from `pqa.config.ts`) perform login only — the harness saves browser state after they pass. Consumer scenarios with `auth: <profile>` open pages using preloaded state via `$AGENT_BROWSER_STATE`.
+During auth scenarios, never run `agent-browser close`, `agent-browser close --all`, or `agent-browser state save` — closing the session before the harness saves produces an empty auth file.
+For consumer scenarios, avoid `agent-browser close --all` unless a step explicitly requires it; prefer keeping one browser session for the whole scenario.
+See **Observe-Act-Verify loop** above for re-snapshot rules after navigation or interaction.
+## Required final output
+Your **last message** must include this JSON block:
+```json
+{
+  "status": "pass",
+  "checkpoints": [
+    {
+      "assertion": "url contains /order-confirmation",
+      "pass": true,
+      "reason": "URL is https://app.example.com/order-confirmation",
+      "evidence": ["agent-browser get url output"]
+    }
+  ],
+  "summary": "All checkpoints passed"
+}
+```
+- `status`: `"pass"` only if **all** checkpoints pass, all Steps completed, and
+  every checkpoint has concrete CLI evidence (snapshot excerpt, `get url` output, etc.)
+- `status`: `"fail"` if any checkpoint fails or steps could not complete
+- Every Then bullet must appear exactly once in `checkpoints` (1:1 mapping)
+- Do not emit the verdict until all Steps are finished and every Then item is verified
+## Recovery mode
+When the harness asks you to recover after failed checkpoints:
+- Re-verify **only** the listed failed checkpoints; keep all Then assertions unchanged.
+- Use fresh `agent-browser snapshot -i` and new `@eN` refs after waits.
+- Do **not** declare pass without new CLI evidence; do **not** skip or relax checkpoints.
+- Emit a full verdict JSON covering every Then item (passed and failed).