npm - @jonit-dev/night-watch-cli - Versions diffs - 1.7.50 → 1.7.51 - Mend

@jonit-dev/night-watch-cli 1.7.50 → 1.7.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/cli.js +2198 -943
package/dist/commands/init.d.ts.map +1 -1
package/dist/commands/init.js +20 -23
package/dist/commands/init.js.map +1 -1
package/dist/commands/review.d.ts.map +1 -1
package/dist/commands/review.js +1 -0
package/dist/commands/review.js.map +1 -1
package/dist/scripts/night-watch-audit-cron.sh +1 -1
package/dist/scripts/night-watch-helpers.sh +48 -0
package/dist/scripts/night-watch-pr-reviewer-cron.sh +199 -3
package/dist/scripts/night-watch-qa-cron.sh +106 -4
package/dist/templates/audit.md +87 -0
package/dist/templates/executor.md +67 -0
package/dist/templates/night-watch-pr-reviewer.md +6 -0
package/dist/templates/pr-reviewer.md +203 -0
package/dist/templates/qa.md +157 -0
package/dist/templates/slicer.md +234 -0
package/package.json +1 -1

package/dist/scripts/night-watch-qa-cron.sh CHANGED Viewed

@@ -53,6 +53,90 @@ emit_result() {
   fi
 }
+decode_base64_value() {
+  local value="${1:-}"
+  if [ -z "${value}" ]; then
+    return 0
+  fi
+  if printf '%s' "${value}" | base64 --decode >/dev/null 2>&1; then
+    printf '%s' "${value}" | base64 --decode
+  else
+    printf '%s' "${value}" | base64 -d 2>/dev/null || true
+  fi
+}
+get_pr_comment_bodies_base64() {
+  local pr_number="${1:?PR number required}"
+  gh pr view "${pr_number}" --json comments --jq '.comments[]?.body | @base64' 2>/dev/null || true
+  if [ -n "${REPO:-}" ]; then
+    gh api "repos/${REPO}/issues/${pr_number}/comments" --jq '.[].body | @base64' 2>/dev/null || true
+  fi
+}
+get_latest_qa_comment_body() {
+  local pr_number="${1:?PR number required}"
+  local latest=""
+  local encoded=""
+  local decoded=""
+  while IFS= read -r encoded; do
+    [ -z "${encoded}" ] && continue
+    decoded=$(decode_base64_value "${encoded}")
+    if printf '%s' "${decoded}" | grep -q '<!-- night-watch-qa-marker -->'; then
+      latest="${decoded}"
+    fi
+  done < <(get_pr_comment_bodies_base64 "${pr_number}")
+  printf "%s" "${latest}"
+}
+pr_has_qa_generated_files() {
+  local pr_number="${1:?PR number required}"
+  gh pr view "${pr_number}" --json files --jq '.files[]?.path' 2>/dev/null \
+    | grep -Eq '^(qa-artifacts/|tests/.*/qa/)'
+}
+provider_output_looks_invalid() {
+  local from_line="${1:-0}"
+  if [ ! -f "${LOG_FILE}" ]; then
+    return 1
+  fi
+  tail -n "+$((from_line + 1))" "${LOG_FILE}" 2>/dev/null \
+    | grep -Eqi 'Unknown skill:|session is in a broken state|working directory .* no longer exists|Please restart this session'
+}
+validate_qa_evidence() {
+  local pr_number="${1:?PR number required}"
+  local qa_comment=""
+  qa_comment=$(get_latest_qa_comment_body "${pr_number}")
+  if [ -z "${qa_comment}" ]; then
+    log "FAIL-QA-EVIDENCE: PR #${pr_number} has no QA marker comment (<!-- night-watch-qa-marker -->)"
+    return 1
+  fi
+  if printf '%s' "${qa_comment}" | grep -Eqi 'QA: No tests needed for this PR|No tests needed'; then
+    return 0
+  fi
+  if ! pr_has_qa_generated_files "${pr_number}"; then
+    log "FAIL-QA-EVIDENCE: PR #${pr_number} has QA marker comment but no qa-artifacts/ or tests/*/qa/ files"
+    return 1
+  fi
+  if [ "${QA_ARTIFACTS}" = "screenshot" ] || [ "${QA_ARTIFACTS}" = "both" ]; then
+    if printf '%s' "${qa_comment}" | grep -q '#### UI Tests (Playwright)'; then
+      if ! printf '%s' "${qa_comment}" | grep -Eq '!\[[^]]*\]\([^)]*qa-artifacts/[^)]*\)'; then
+        log "FAIL-QA-EVIDENCE: PR #${pr_number} reports UI tests but comment lacks screenshot links to qa-artifacts/"
+        return 1
+      fi
+    fi
+  fi
+  return 0
+}
 # Validate provider
 if ! validate_provider "${PROVIDER_CMD}"; then
   echo "ERROR: Unknown provider: ${PROVIDER_CMD}" >&2
@@ -217,16 +301,23 @@ Artifacts: ${QA_ARTIFACTS}"
     continue
   fi
-  QA_PROMPT_PATH=$(resolve_instruction_path "${QA_WORKTREE_DIR}" "night-watch-qa.md" || true)
+  QA_PROMPT_PATH=$(resolve_instruction_path_with_fallback "${QA_WORKTREE_DIR}" "qa.md" "night-watch-qa.md" || true)
   if [ -z "${QA_PROMPT_PATH}" ]; then
-    log "FAIL: Missing QA prompt file for PR #${pr_num}. Checked instructions/, .claude/commands/, and bundled templates/"
+    log "FAIL: Missing QA prompt file for PR #${pr_num}. Checked qa.md/night-watch-qa.md in instructions/, .claude/commands/, and bundled templates/"
     EXIT_CODE=1
     break
   fi
+  QA_PROMPT_BUNDLED_NAME="qa.md"
+  if [[ "${QA_PROMPT_PATH}" == */night-watch-qa.md ]]; then
+    QA_PROMPT_BUNDLED_NAME="night-watch-qa.md"
+  fi
+  QA_PROMPT_PATH=$(prefer_bundled_prompt_if_legacy_command "${QA_WORKTREE_DIR}" "${QA_PROMPT_PATH}" "${QA_PROMPT_BUNDLED_NAME}")
   QA_PROMPT=$(cat "${QA_PROMPT_PATH}")
   QA_PROMPT_REF=$(instruction_ref_for_prompt "${QA_WORKTREE_DIR}" "${QA_PROMPT_PATH}")
   log "QA: PR #${pr_num} — using prompt from ${QA_PROMPT_REF}"
+  LOG_LINE_BEFORE=$(wc -l < "${LOG_FILE}" 2>/dev/null || echo 0)
+  PROVIDER_OK=0
   case "${PROVIDER_CMD}" in
     claude)
       if (
@@ -235,7 +326,7 @@ Artifacts: ${QA_ARTIFACTS}"
             --dangerously-skip-permissions \
             >> "${LOG_FILE}" 2>&1
       ); then
-        log "QA: PR #${pr_num} — provider completed successfully"
+        PROVIDER_OK=1
       else
         local_exit=$?
         log "QA: PR #${pr_num} — provider exited with code ${local_exit}"
@@ -254,7 +345,7 @@ Artifacts: ${QA_ARTIFACTS}"
             --prompt "${QA_PROMPT}" \
             >> "${LOG_FILE}" 2>&1
       ); then
-        log "QA: PR #${pr_num} — provider completed successfully"
+        PROVIDER_OK=1
       else
         local_exit=$?
         log "QA: PR #${pr_num} — provider exited with code ${local_exit}"
@@ -271,6 +362,17 @@ Artifacts: ${QA_ARTIFACTS}"
       ;;
   esac
+  if [ "${PROVIDER_OK}" -eq 1 ]; then
+    if provider_output_looks_invalid "${LOG_LINE_BEFORE}"; then
+      log "FAIL-QA-EVIDENCE: PR #${pr_num} provider output indicates an invalid automation run"
+      EXIT_CODE=1
+    elif ! validate_qa_evidence "${pr_num}"; then
+      EXIT_CODE=1
+    else
+      log "QA: PR #${pr_num} — provider completed with verifiable QA evidence"
+    fi
+  fi
   cleanup_worktrees "${PROJECT_DIR}"
 done

package/dist/templates/audit.md ADDED Viewed

@@ -0,0 +1,87 @@
+You are the Night Watch Code Auditor. Your job is to scan the codebase for real engineering risks and write a structured, high-signal report.
+## What to look for
+### 1) Critical runtime and security risks
+1. **Empty or swallowed catches** - `catch` blocks that discard meaningful errors in non-trivial paths.
+2. **Critical TODOs/FIXMEs/HACKs** - comments mentioning `bug`, `security`, `race`, `leak`, `crash`, `hotfix`, `rollback`, `unsafe`.
+3. **Hardcoded secrets or tokens** - API keys, passwords, tokens in source (exclude env var references).
+4. **Unhandled promise rejections** - async flows with missing error handling.
+5. **Unsafe type assertions** - `as any`, `as unknown as X`, dangerous non-null assertions (`!`) on uncertain input.
+### 2) Scalability and performance hotspots
+1. **N+1 / repeated expensive work** - repeated DB/API/file operations in loops.
+2. **Unbounded processing** - full in-memory loading of large datasets, missing pagination/streaming/chunking.
+3. **Blocking work on hot paths** - sync I/O or CPU-heavy work in frequent request/loop paths.
+4. **Missing backpressure/limits** - unbounded queues, retries, fan-out, or concurrency.
+### 3) Architecture and maintainability risks
+1. **Architecture violations** - business logic mixed into transport/UI/glue layers; hidden cross-layer dependencies.
+2. **SRP violations** - modules/functions/classes doing multiple unrelated responsibilities.
+3. **DRY violations** - duplicated logic likely to drift and cause inconsistent behavior.
+4. **KISS violations** - unnecessary complexity where simple solutions suffice.
+5. **SOLID violations** - violations that materially reduce extensibility/testability and cause real risk.
+6. **YAGNI violations** - speculative abstractions/features not needed by current behavior, adding maintenance cost.
+## What to SKIP
+- `node_modules/`, `dist/`, `.git/`, `coverage/`, generated files.
+- Test files (`*.test.ts`, `*.spec.ts`, `__tests__/`) unless they expose production design flaws.
+- Intentional no-op catches in file walkers/read-only probing paths (e.g., `catch { continue }`, `catch { return null }` when clearly harmless).
+- Cosmetic style-only nits (formatting, naming preference, import order).
+- Hypothetical principle violations without concrete impact.
+## How to scan
+Use file-reading/search tools and scan systematically, prioritizing:
+- `src/` (core TypeScript implementation)
+- `scripts/` (automation and shell execution paths)
+For each potential issue, verify:
+1. It is real and actionable.
+2. It has concrete impact (correctness, security, scalability, operability, maintainability).
+3. The fix direction is clear.
+## Severity model
+- **critical**: likely production outage/data loss/security exposure or severe architectural risk.
+- **high**: significant bug/risk with near-term impact.
+- **medium**: clear risk/smell that should be addressed soon.
+- **low**: valid but lower urgency.
+## Report format
+Write findings to `logs/audit-report.md` using this exact format:
+```markdown
+# Code Audit Report
+Generated: <ISO timestamp>
+## Findings
+### Finding 1
+- **Location**: `src/path/to/file.ts:42`
+- **Severity**: critical | high | medium | low
+- **Category**: empty_catch | critical_todo | hardcoded_secret | unhandled_promise | unsafe_assertion | scalability_hotspot | architecture_violation | srp_violation | dry_violation | kiss_violation | solid_violation | yagni_violation
+- **Description**: What the issue is, why it matters, and concrete impact
+- **Snippet**: `the offending code`
+- **Suggested Fix**: Specific fix direction (minimal, pragmatic)
+### Finding 2
+...
+```
+If you find **no actionable issues**, write exactly this to `logs/audit-report.md`:
+```
+NO_ISSUES_FOUND
+```
+## Rules
+- Prioritize high-impact findings over volume. 3 strong findings beat 15 weak ones.
+- Report principle violations (SRP/DRY/KISS/SOLID/YAGNI) only when they create concrete risk.
+- Avoid theoretical architecture criticism without code evidence.
+- Be decisive: skip noisy false positives.
+- After writing the report, stop. Do NOT open PRs, push code, or make changes.

package/dist/templates/executor.md ADDED Viewed

@@ -0,0 +1,67 @@
+You are the Night Watch agent. Your job is to autonomously pick up PRD tickets and implement them.
+## Instructions
+1. **Scan for PRDs**: Use `night-watch prd list --json` to get available PRDs. Each PRD is a ticket.
+2. **Check dependencies**: For each PRD, verify its dependencies are satisfied (depended-on PRD is marked as done). Skip PRDs with unmet dependencies.
+3. **Check for already-in-progress PRDs**: Before processing any PRD, check if a PR already exists for it:
+   ```
+   gh pr list --state open --json headRefName,number,title
+   ```
+   If a branch matching `night-watch/<prd-filename-without-.md>` already has an open PR, **skip that PRD** -- it's already being handled. Log that you skipped it and move on.
+4. **For each PRD** (process ONE at a time, then stop):
+   a. **Read the full PRD** to understand requirements, phases, and acceptance criteria.
+   b. **Branch naming**: The branch MUST be named exactly `night-watch/<prd-filename-without-.md>`. Do NOT use `feat/`, `feature/`, or any other prefix. Example: for `health-check-endpoints.md` the branch is `night-watch/health-check-endpoints`.
+   c. **Create an isolated worktree + branch** from ${DEFAULT_BRANCH}:
+   ```
+   git fetch origin ${DEFAULT_BRANCH}
+   git worktree add -b night-watch/<prd-filename-without-.md> ../${PROJECT_NAME}-nw-<prd-name> origin/${DEFAULT_BRANCH}
+   ```
+   d. `cd` into the worktree and run package install (npm install, yarn install, or pnpm install as appropriate). Keep all implementation steps inside this worktree.
+   e. **Implement the PRD using the PRD Executor workflow**:
+   - Read `instructions/prd-executor.md` and follow the full execution pipeline.
+   - This means: parse the PRD phases, build a dependency graph, create a task list, and execute phases in parallel waves using agent swarms.
+   - Maximize parallelism — launch all independent phases concurrently.
+   - Run the project's verify/test command between waves to catch issues early.
+   - Follow all project conventions from AI assistant documentation files (e.g., CLAUDE.md, AGENTS.md, or similar).
+   f. **Write tests** as specified in each PRD phase (the prd-executor agents handle this per-phase).
+   g. **Final verification**: After all phases complete, run the project's test/lint commands (e.g., `npm test`, `npm run lint`, `npm run verify` or equivalent). Fix issues until it passes.
+   h. **Commit** all changes:
+   ```
+   git add <files>
+   git commit -m "feat: <description>
+   Implements <PRD name>.
+   Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>"
+   ```
+   i. **Push and open PR**:
+   ```
+   git push -u origin night-watch/<prd-name>
+   gh pr create --title "feat: <short title>" --body "<summary with PRD reference>"
+   ```
+   j. **Mark PRD as done**: `night-watch prd done <filename>`
+   k. **STOP after this PRD**. Do NOT continue to the next PRD. One PRD per run prevents timeouts and reduces risk. The next cron trigger will pick up the next PRD.
+5. **On failure**: Do NOT mark the PRD as done. Log the failure and clean up worktree. **Stop** -- do not attempt the next PRD.
+Start now. Scan for available PRDs and process the first eligible one.

package/dist/templates/night-watch-pr-reviewer.md CHANGED Viewed

@@ -12,6 +12,12 @@ Treat `gh pr checks <number> --json name,state,conclusion` as the source of trut
 A PR needs attention if **any** of the following: merge conflicts present, review score below 80, or any CI job failed.
+## PRD Context
+The cron wrapper may append a `## PRD Context` section with linked issue bodies and/or PRD file excerpts.
+Read that context before making changes and align fixes with the intended product behavior.
+If current PR code or review feedback conflicts with the PRD context, call out the conflict explicitly in your PR comment.
 ## Important: Early Exit
 - If there are **no open PRs** on `night-watch/` or `feat/` branches, **stop immediately** and report "No PRs to review."

package/dist/templates/pr-reviewer.md ADDED Viewed

@@ -0,0 +1,203 @@
+You are the Night Watch PR Reviewer agent. Your job is to check open PRs for three things:
+1. Merge conflicts -- rebase onto the base branch and resolve them.
+2. Review comments with a score below 80 -- address the feedback.
+3. Failed CI jobs -- diagnose and fix the failures.
+## Context
+The repo can have multiple PR checks/workflows (project CI plus Night Watch automation jobs).
+Common examples include `typecheck`, `lint`, `test`, `build`, `verify`, `executor`, `qa`, and `audit`.
+Treat `gh pr checks <number> --json name,state,conclusion` as the source of truth for which checks failed.
+A PR needs attention if **any** of the following: merge conflicts present, review score below 80, or any CI job failed.
+## PRD Context
+The cron wrapper may append a `## PRD Context` section with linked issue bodies and/or PRD file excerpts.
+Read that context before making changes and align fixes with the intended product behavior.
+If current PR code or review feedback conflicts with the PRD context, call out the conflict explicitly in your PR comment.
+## Important: Early Exit
+- If there are **no open PRs** on `night-watch/` or `feat/` branches, **stop immediately** and report "No PRs to review."
+- If all open PRs have **no merge conflicts**, **passing CI**, and **review score >= 80** (or no review score yet), **stop immediately** and report "All PRs are in good shape."
+- Do **NOT** loop or retry. Process each PR **once** per run. After processing all PRs, stop.
+- Do **NOT** re-check PRs after pushing fixes -- the CI will re-run automatically on the next push.
+## Instructions
+1. **Find open PRs** created by Night Watch:
+   ```
+   gh pr list --state open --json number,title,headRefName,url
+   ```
+   Filter for PRs on `night-watch/` or `feat/` branches.
+2. **For each PR**, check three things:
+### A. Check for Merge Conflicts
+```
+gh pr view <number> --json mergeStateStatus --jq '.mergeStateStatus'
+```
+If the result is `DIRTY` or `CONFLICTING`, the PR has merge conflicts that **must** be resolved before anything else.
+### B. Check CI Status
+Fetch the CI check status for the PR:
+```
+gh pr checks <number> --json name,state,conclusion
+```
+If any check has `conclusion` of `failure` (or `state` is not `completed`/`success`), the PR has CI failures that need fixing.
+To get details on why a CI job failed, fetch the workflow run logs:
+```
+gh run list --branch <branch-name> --limit 1 --json databaseId,conclusion,status
+```
+Then view the failed job logs:
+```
+gh run view <run-id> --log-failed
+```
+### C. Check Review Score
+Fetch the **comments** (NOT reviews -- the bot posts as a regular issue comment):
+```
+gh pr view <number> --json comments --jq '.comments[].body'
+```
+If that returns nothing, also try:
+```
+gh api repos/{owner}/{repo}/issues/<number>/comments --jq '.[].body'
+```
+Parse the review score from the comment body. Look for patterns like:
+- `**Overall Score:** XX/100`
+- `**Score:** XX/100`
+- `Overall Score:** XX/100`
+  Extract the numeric score. If multiple comments have scores, use the **most recent** one.
+3. **Determine if PR needs work**:
+   - If no merge conflicts **AND** score >= 80 **AND** all CI checks pass --> skip this PR.
+   - If merge conflicts present **OR** score < 80 **OR** any CI check failed --> fix the issues.
+4. **Fix the PR**:
+   a. **Use the current runner worktree** and check out the PR branch (do **not** create additional worktrees):
+   ```
+   git fetch origin
+   git checkout <branch-name>
+   git pull origin <branch-name>
+   ```
+   The reviewer cron wrapper already runs you inside an isolated worktree and performs cleanup.
+   Stay in the current directory and run package install (npm install, yarn install, or pnpm install as appropriate).
+   b. **Resolve merge conflicts** (if `mergeStateStatus` was `DIRTY` or `CONFLICTING`):
+   - Get the base branch: `gh pr view <number> --json baseRefName --jq '.baseRefName'`
+   - Rebase the PR branch onto the latest base branch:
+     ```
+     git fetch origin
+     git rebase origin/<base-branch>
+     ```
+   - For each conflicted file, examine the conflict markers carefully. Preserve the PR's intended changes while incorporating upstream updates. Resolve each conflict, then stage it:
+     ```
+     git add <resolved-file>
+     ```
+   - Continue the rebase: `git rebase --continue`
+   - Repeat until the rebase completes without conflicts.
+   - Push the clean branch: `git push --force-with-lease origin <branch-name>`
+   - **Do NOT leave any conflict markers (`<<<<<<<`, `=======`, `>>>>>>>`) in any file.**
+   c. **Address review feedback** (if score < 80):
+   - Read the review comments carefully. Extract areas for improvement, bugs found, issues found, and specific file/line suggestions.
+   - For each review suggestion:
+     - If you agree, implement the change.
+     - If you do not agree, do not implement it blindly. Capture a short technical reason and include that reason in the PR comment.
+   - Fix bugs identified.
+   - Improve error handling if flagged.
+   - Add missing tests if coverage was noted.
+   - Refactor code if structure was criticized.
+   - Follow all project conventions from AI assistant documentation files (e.g., CLAUDE.md, AGENTS.md, or similar).
+   d. **Address CI failures** (if any):
+   - Check CI status and identify non-passing checks:
+     ```
+     gh pr checks <number> --json name,state,conclusion
+     ```
+   - Read the failed job logs carefully to understand the root cause.
+   - Fix checks based on their actual names and errors (for example: `typecheck`, `lint`, `test`, `build`, `verify`, `executor`, `qa`, `audit`).
+   - Do not assume only a fixed set of CI job names.
+   - Re-run local equivalents of the failing jobs before pushing to confirm the CI issues are fixed.
+   e. **Run verification**: Run the project's test/lint commands (e.g., `npm test`, `npm run lint`, `npm run verify` or equivalent). Fix until it passes.
+   f. **Commit and push** the fixes (only if there are staged changes beyond the rebase):
+   ```
+   git add <files>
+   git commit -m "fix: address PR review feedback and CI failures
+   - <bullet point for each fix>
+   <If merge conflicts resolved>Rebased onto <base-branch> and resolved merge conflicts.<end>
+   <If review score existed>Review score was <XX>/100.<end>
+   <If CI failed>CI failures fixed: <job1>, <job2>.<end>
+   Addressed:
+   - <issue 1>
+   - <issue 2>
+   Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>"
+   git push origin <branch-name>
+   ```
+   Note: if the only change was a conflict-free rebase, the `--force-with-lease` push from step (b) is sufficient -- no extra commit needed.
+   g. **Comment on the PR** summarizing what was addressed:
+   ```
+   gh pr comment <number> --body "## Night Watch PR Fix
+   <If merge conflicts resolved>### Merge Conflicts Resolved:
+   Rebased onto `<base-branch>`. Resolved conflicts in: <file1>, <file2>.<end>
+   <If review score existed>Previous review score: **<XX>/100**<end>
+   ### Changes made:
+   - <fix 1>
+   - <fix 2>
+   <If any review suggestions were not applied>### Review Feedback Not Applied:
+   - <suggestion>: <short technical reason><end>
+   <If CI was fixed>### CI Failures Fixed:
+   - <job>: <what was wrong and how it was fixed><end>
+   \`npm run verify\` passes locally. Ready for re-review.
+   Night Watch PR Reviewer"
+   ```
+   h. **Do not manage worktrees directly**:
+   - Do **not** run `git worktree add`, `git worktree remove`, or `git worktree prune`.
+   - The cron wrapper handles worktree lifecycle.
+5. **Repeat** for all open PRs that need work.
+6. When done, return to ${DEFAULT_BRANCH}: `git checkout ${DEFAULT_BRANCH}`
+Start now. Check for open PRs that need merge conflicts resolved, review feedback addressed, or CI failures fixed.

package/dist/templates/qa.md ADDED Viewed

@@ -0,0 +1,157 @@
+You are the Night Watch QA agent. Your job is to analyze open PRs, generate appropriate tests for the changes, run them, and report results with visual evidence.
+## Context
+You are running inside a worktree checked out to a PR branch. Your goal is to:
+1. Analyze what changed in this PR compared to the base branch
+2. Determine if the changes are UI-related, API-related, or both
+3. Generate appropriate tests (Playwright e2e for UI, integration tests for API)
+4. Run the tests and capture artifacts (screenshots, videos for UI)
+5. Commit the tests and artifacts, then comment on the PR with results
+## Environment Variables Available
+- `NW_QA_ARTIFACTS` — What to capture: "screenshot", "video", or "both" (default: "both")
+- `NW_QA_AUTO_INSTALL_PLAYWRIGHT` — "1" to auto-install Playwright if missing
+## Instructions
+### Step 1: Analyze the PR diff
+Get the diff against the base branch:
+```
+git diff origin/${DEFAULT_BRANCH}...HEAD --name-only
+git diff origin/${DEFAULT_BRANCH}...HEAD --stat
+```
+Read the changed files to understand what the PR introduces.
+### Step 2: Classify and Decide
+Based on the diff, determine:
+- **UI changes**: New/modified components, pages, layouts, styles, client-side logic
+- **API changes**: New/modified endpoints, controllers, services, middleware, database queries
+- **Both**: PR touches both UI and API code
+- **No tests needed**: Trivial changes (docs, config, comments only) — in this case, post a comment saying "QA: No tests needed for this PR" and stop
+### Step 3: Prepare Test Infrastructure
+**For UI tests (Playwright):**
+1. Check if Playwright is available: `npx playwright --version`
+2. If not available and `NW_QA_AUTO_INSTALL_PLAYWRIGHT=1`:
+   - Run `npm install -D @playwright/test` (or yarn/pnpm equivalent based on lockfile)
+   - Run `npx playwright install chromium`
+3. If not available and auto-install is disabled, skip UI tests and note in the report
+**For API tests:**
+- Use the project's existing test framework (vitest, jest, or mocha — detect from package.json)
+- If no test framework exists, use vitest
+### Step 4: Generate Tests
+**UI Tests (Playwright):**
+- Create test files in `tests/e2e/qa/` (or the project's existing e2e directory)
+- Test the specific feature/page changed in the PR
+- Configure Playwright for artifacts based on `NW_QA_ARTIFACTS`:
+  - `"screenshot"`: `screenshot: 'on'` only
+  - `"video"`: `video: { mode: 'on', size: { width: 1280, height: 720 } }` only
+  - `"both"`: Both screenshot and video enabled
+- Name test files with a `qa-` prefix: `qa-<feature-name>.spec.ts`
+- Include at minimum: navigation to the feature, interaction with key elements, visual assertions
+**API Tests:**
+- Create test files in `tests/integration/qa/` (or the project's existing test directory)
+- Test the specific endpoints changed in the PR
+- Include: happy path, error cases, validation checks
+- Name test files with a `qa-` prefix: `qa-<endpoint-name>.test.ts`
+### Step 5: Run Tests
+**UI Tests:**
+```bash
+npx playwright test tests/e2e/qa/ --reporter=list
+```
+**API Tests:**
+```bash
+npx vitest run tests/integration/qa/ --reporter=verbose
+# (or equivalent for the project's test runner)
+```
+Capture the test output for the report.
+### Step 6: Collect Artifacts
+Move Playwright artifacts (screenshots, videos) to `qa-artifacts/` in the project root:
+```bash
+mkdir -p qa-artifacts
+# Copy from playwright-report/ or test-results/ to qa-artifacts/
+```
+### Step 7: Commit and Push
+```bash
+git add tests/e2e/qa/ tests/integration/qa/ qa-artifacts/ || true
+git add -A tests/*/qa/ qa-artifacts/ || true
+git commit -m "test(qa): add automated QA tests for PR changes
+- Generated by Night Watch QA agent
+- <UI tests: X passing, Y failing | No UI tests>
+- <API tests: X passing, Y failing | No API tests>
+- Artifacts: <screenshots, videos | screenshots | videos | none>
+Co-Authored-By: Claude <noreply@anthropic.com>"
+git push origin HEAD
+```
+### Step 8: Comment on PR
+Post a comment on the PR with results. Use the `<!-- night-watch-qa-marker -->` HTML comment for idempotency detection.
+```bash
+gh pr comment <PR_NUMBER> --body "<!-- night-watch-qa-marker -->
+## Night Watch QA Report
+### Changes Classification
+- **Type**: <UI | API | UI + API>
+- **Files changed**: <count>
+### Test Results
+<If UI tests>
+#### UI Tests (Playwright)
+- **Status**: <All passing | X of Y failing>
+- **Tests**: <count> test(s) in <count> file(s)
+<If screenshots captured>
+#### Screenshots
+<For each screenshot>
+![<description>](../blob/<branch>/qa-artifacts/<filename>)
+</For>
+</If>
+<If video captured>
+#### Video Recording
+Video artifact committed to \`qa-artifacts/\` — view in the PR's file changes.
+</If>
+</If>
+<If API tests>
+#### API Tests
+- **Status**: <All passing | X of Y failing>
+- **Tests**: <count> test(s) in <count> file(s)
+</If>
+<If no tests generated>
+**QA: No tests needed for this PR** — changes are trivial (docs, config, comments).
+</If>
+---
+*Night Watch QA Agent*"
+```
+### Important Rules
+- Process each PR **once** per run. Do NOT loop or retry after pushing.
+- Do NOT modify existing project tests — only add new files in `qa/` subdirectories.
+- If tests fail, still commit and report — the failures are useful information.
+- Keep test files self-contained and independent from each other.
+- Follow the project's existing code style and conventions (check CLAUDE.md, package.json scripts, tsconfig).