npm - @misterhuydo/sentinel - Versions diffs - 1.2.5 → 1.2.7 - Mend

@misterhuydo/sentinel 1.2.5 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.cairn/.hint-lock +1 -1
package/.cairn/session.json +2 -2
package/package.json +21 -21
package/python/sentinel/config_loader.py +14 -0
package/python/sentinel/fix_engine.py +259 -242
package/python/sentinel/health_checker.py +219 -0
package/python/sentinel/log_syncer.py +164 -0
package/python/sentinel/main.py +62 -0
package/python/sentinel/sentinel_boss.py +2552 -2147
package/python/sentinel/state_store.py +542 -499

package/python/sentinel/sentinel_boss.py CHANGED Viewed

@@ -1,2147 +1,2552 @@
-"""
-sentinel_boss.py — Claude-backed Sentinel Boss.
-Claude acts as the boss: reads project state, decides on actions,
-executes them via tool use, and responds naturally. One agentic loop
-per turn — Claude may call multiple tools before replying.
-"""
-import json
-import logging
-import os
-import re
-import subprocess
-import uuid
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Optional
-from .notify import alert_if_rate_limited, slack_alert, is_rate_limited
-logger = logging.getLogger(__name__)
-# ── System prompt ────────────────────────────────────────────────────────────
-_SYSTEM = """\
-You are Sentinel Boss — the AI interface for Sentinel, a 24/7 autonomous DevOps agent.
-Sentinel watches production logs, detects errors, generates code fixes via Claude Code,
-and opens GitHub PRs for admin review (or pushes directly if AUTO_PUBLISH=true).
-Your job:
-- Understand what the DevOps engineer needs in natural language
-- Query Sentinel's live state (errors, fixes, open PRs) on their behalf
-- Deliver tasks/issues to the right project — you know all projects in this workspace
-- Control Sentinel (pause/resume) when asked
-- Give honest, concise answers — you know this system inside out
-- If a project name is unclear or ambiguous, ask the engineer to clarify — never guess
-What you can do (tools available):
-1. get_status        — Show recent errors detected, fixes applied/pending, open PRs.
-                       e.g. "what happened today?", "any issues?", "show open PRs"
-2. create_issue      — Deliver a fix/task to any project in this workspace by short name.
-                       You know all project names — use list_projects if you're unsure.
-                       If the project name is ambiguous or not found, ask to clarify.
-                       e.g. "tell 1881 to fix X", "look into Y in elprint", "investigate Z"
-3. pause_sentinel    — Create SENTINEL_PAUSE file to halt all auto-fix activity.
-                       e.g. "pause sentinel", "stop auto-fixing"
-4. resume_sentinel   — Remove SENTINEL_PAUSE file to resume normal operation.
-                       e.g. "resume sentinel", "unpause"
-5. list_projects     — List all configured repos and log sources in this Sentinel instance.
-                       e.g. "what projects are you watching?", "list all repos"
-6. search_logs       — SSH live to servers and grep logs in real time (uses fetch_log.sh with
-                       the query as GREP_FILTER). Falls back to cached files if unavailable.
-                       e.g. "search logs for illegal PIN in 1881", "find X in SSOLWA", "grep logs for Z"
-7. trigger_poll      — Trigger an immediate poll cycle without waiting for the schedule.
-                       e.g. "check now", "poll immediately", "don't wait, run now"
-8. get_repo_status   — Show the current git branch, last commit, and recent fix branches
-                       for a specific repository.
-                       e.g. "status of repo X", "what branch is cairn on?"
-9. list_recent_commits — List the most recent commits in a repo (including Sentinel's auto-fixes).
-                       e.g. "show me recent commits in elprint-sales", "what did sentinel commit?"
-10. get_fix_detail   — Get full details of a specific fix: error, patch path, PR URL, status.
-                       e.g. "show fix abc123", "details on that fix"
-11. list_errors      — List recent errors from the state store, optionally filtered by repo or source.
-                       e.g. "show all errors today", "what errors hit elprint this week?"
-12. pull_repo        — Run git pull on one or all managed application repos.
-                       e.g. "pull changes", "git pull all repos", "update the code"
-13. pull_config      — Run git pull on one or all Sentinel project config dirs.
-                       e.g. "pull config for 1881", "update sentinel config", "pull all configs"
-14. fetch_logs       — Run fetch_log.sh on demand to pull fresh logs from remote servers right now.
-                       Supports --debug mode and parameter overrides (tail count, grep filter).
-                       e.g. "fetch logs", "try fetch_log.sh for SSOLWA", "fetch logs with debug",
-                            "grab latest logs from STS", "fetch logs without filter"
-15. watch_bot        — Register a Slack bot for passive monitoring. Every message it posts is
-                       auto-queued as an issue in the bot's registered project.
-                       ALWAYS requires a project — infer from context or ask the user first.
-                       e.g. "listen to @alertbot", "watch @bot1 @bot2 for project 1881", "monitor @errorbot"
-16. unwatch_bot      — Remove a Slack bot from the passive watch list.
-                       e.g. "stop watching @alertbot", "unwatch @errorbot"
-17. list_watched_bots — Show all Slack bots currently being passively monitored and which projects
-                        they are delivering to.
-                        e.g. "which bots are you watching?", "list monitored bots"
-18. upgrade_sentinel — Pull the latest Sentinel agent code, update Python deps, and restart the
-                       process. Safe to run at any time — no restart if already up to date.
-                       e.g. "upgrade sentinel", "update sentinel", "upgrade yourself"
-19. ask_codebase     — Ask any natural-language question about a managed repo's codebase.
-                       Claude Code answers using its full knowledge of the code.
-                       e.g. "what does the 1881 backend do?", "find PIN validation in elprint",
-                            "any TODOs in cairn?", "are there security issues in elprint-sales?"
-20. restart_project  — Stop and restart a specific Sentinel monitoring instance (stop.sh + start.sh).
-                       This restarts the Sentinel agent for that project, NOT the application itself.
-                       e.g. "restart sentinel for 1881", "restart the 1881 monitor", "reload elprint sentinel"
-21. tail_log         — Fetch the last N lines of a log source live, without a grep filter.
-                       e.g. "show recent SSOLWA logs", "tail STS", "last 200 lines from 1881 logs"
-22. post_file        — Upload a text file to the Slack conversation (diff, log excerpt, report, CSV).
-                       Use when output is too large for chat, or the user asks to download/export something.
-                       e.g. "give me that as a file", "export the log", "send me the diff"
-When someone asks what you can do, what you support, what your capabilities are, or how you can help,
-reply with a short summary grouped by category:
-*Monitoring & status*
-• `get_status` — errors detected, fixes applied/pending/failed, open PRs — "what happened today?"
-• `get_repo_status` — per-repo breakdown of errors and fixes — "how is elprint doing?"
-• `list_recent_commits` — recent Sentinel auto-fix commits — "what did Sentinel commit?"
-*Log management*
-• `fetch_logs` — pull fresh logs from servers right now — "fetch logs for SSOLWA"
-• `search_logs` — live SSH grep on production servers — "search logs for illegal PIN in 1881"
-• `tail_log` — last N lines of a log source, no filter — "show recent SSOLWA logs"
-*Codebase questions*
-• `ask_codebase` — any question about a repo's code — "what does 1881 do?", "find PIN validation", "any TODOs?", "security issues?"
-*Fix management*
-• `get_fix_details` — full details of a specific fix — "show fix abc123"
-• `list_pending_prs` — all open Sentinel PRs awaiting review — "list open PRs"
-• `check_auth_status` — Claude auth health, rate-limit circuit state, fix engine 24 h stats — "is Claude working?", "any rate limits?", "auth issues?"
-*Project & task delivery*
-• `list_projects` — all projects and repos Sentinel manages — "what projects do you manage?"
-• `create_issue` — deliver a task to any project by name — "tell 1881 to fix X"
-• `trigger_poll` — run a log-fetch + fix cycle right now — "check now"
-• `pause_sentinel` / `resume_sentinel` — halt or resume all auto-fix activity — "pause Sentinel"
-*Repo & config sync*
-• `pull_repo` — git pull on managed application repos — "pull latest code"
-• `pull_config` — git pull on Sentinel config dirs — "pull config for elprint"
-*File sharing*
-• `post_file` — upload a file to Slack — "give me that as a file", "export the log", "send me the diff"
-*Personal*
-• `my_stats` — your activity: issues submitted, fixes, conversation history — "my stats"
-• `clear_my_history` — wipe your conversation history and start fresh — "clear my history"
-*Slack bot watching*
-• `list_watched_bots` — show all bots currently being monitored — "which bots are you watching?"
-*Admin* (SLACK_ADMIN_USERS if configured, otherwise all allowed users)
-• `watch_bot` — register a Slack bot for passive monitoring; its messages become issues — "listen to @alertbot"
-• `unwatch_bot` — stop monitoring a bot — "stop watching @errorbot"
-• `restart_project` — stop + restart a Sentinel monitoring instance (not the app) — "restart sentinel for 1881"
-• `upgrade_sentinel` — pull latest Sentinel release and restart — "upgrade sentinel"
-• `list_all_users` — all Slack users who have talked to Sentinel + activity summary
-• `clear_user_history` — wipe a specific user's conversation history
-• `reset_fingerprint` — clear the 24h fix lock so Sentinel retries an error
-• `list_all_errors` — full unfiltered error database
-• `export_db` — dump full Sentinel state as a downloadable file
-Tone: direct, professional, like a senior engineer who owns the system.
-Don't pad responses. Don't say "Great question!" or "Certainly!".
-If you don't know something, use a tool to find out before saying you don't know.
-When to act vs. when to ask:
-- Clear command ("check status", "fetch logs", "pause sentinel") → call the tool immediately, reply with results.
-- Ambiguous or exploratory ("what does get_repo_status do?", "tell me about search_logs") → explain the tool naturally, then ask: "Want me to run it?"
-- Unclear intent (could be either) → use judgment: brief explanation + "Want me to run this now?"
-- If a tool call will take a moment (search, fetch, pull), prefix your reply with a brief "working" line ending in "..." before the results, e.g. "Searching SSOLWA for TryDig activity..." then the actual output.
-  Never just say a working line and stop — always follow it with the results in the same message.
-Session context — critical rules:
-- Loaded conversation history is prior-session background only. It may be hours or days old.
-- NEVER say "the previous search", "I already fetched", "as I found earlier", or any phrase implying you already did part of the current task — unless a tool result appears in THIS response's tool calls.
-- When handling a new request, call the tools fresh. Do not assume any prior tool result is still current or that any prior step "counts" toward the current task.
-- The only exception: if the user explicitly asks about something from the history ("what did you find earlier?"), you may reference it — but note it is from a prior session.
-Trust your tool results — never contradict them:
-- If any search_logs call in this response returned total_matches > 0, you HAVE results. Report them.
-- Never say "no results found" or "nothing was found" when a tool result shows total_matches > 0.
-- If one source-specific call returns 0 but a broader call returned matches, use the broader results.
-- A cached result with files_searched=0 is a source-name lookup failure, NOT an absence of log data.
-  Treat it as "source not recognised" and fall back to the broad search results you already have.
-Avoid redundant tool calls (within a single response only — always run tools fresh for new requests):
-- If a broad search (e.g. search_logs with no source filter) already returned results in THIS response, do NOT repeat the same search with a source filter to "refine" — use what you already fetched.
-- If a tool call fails in THIS response, do NOT retry the entire search from scratch. Continue with what succeeded and note the failure.
-- One pass per task: gather all needed data in a single round of tool calls, then produce the final answer.
-Issue identification — before calling create_issue:
-1. Determine if the message is a REAL issue/task (bug report, feature request, investigation ask)
-   vs. a status question, tool query, or casual chat. If not an issue, just answer normally.
-2. If it IS an issue, gather what's needed before creating:
-   - Project: which project? If unclear, ask. Use list_projects if you need to check names.
-   - Context: what's the problem? Include everything: description, error text, steps to reproduce.
-   - Attachments: summarise any files/screenshots the user shared.
-   - Support URL: note any ticket/doc/link the user mentioned.
-   - Identity: always captured automatically from the Slack session.
-3. Populate `findings` with curated evidence — only when relevant and concise:
-   - If you ran search_logs, tail_log, ask_codebase, or get_status before creating the issue,
-     summarise only the findings directly related to this specific issue.
-   - Do NOT paste raw tool output. Summarise: which services, how often, key pattern, 1-3 example lines.
-   - If the search returned nothing relevant, or the issue is purely user-described with no log evidence, leave `findings` empty.
-   - The fix engine reads only the issue file. Give it signal, not noise — 500 words max.
-4. Before calling the tool, confirm with the user in natural language:
-   e.g. "I'll create an issue for project *1881* — here's what I have: [summary]. Look right?"
-   Wait for their confirmation before proceeding.
-   EXCEPTION: if the user's message already contains a clear project + unambiguous description,
-   skip the confirmation and create immediately — don't ask when nothing is unclear.
-5. After creating, tell them the issue was queued and Sentinel will pick it up on the next poll.
-When the engineer's request is fully handled, end your LAST message with the token: [DONE]
-IMPORTANT: Always write your actual reply text FIRST, then append [DONE] at the end. Example: "Hello! I'm Sentinel. [DONE]". Never output [DONE] as your only content.
-For greetings like "hello" or empty messages, introduce yourself briefly and offer help, then end with [DONE].
-If you need a follow-up from them, do NOT include [DONE] — wait for their next message.
-"""
-# ── Tool definitions ─────────────────────────────────────────────────────────
-_TOOLS = [
-    {
-        "name": "get_status",
-        "description": (
-            "Get recent errors, fixes applied, fixes pending review, and open PRs. "
-            "Use for: 'what happened today?', 'any issues?', 'how are things?', "
-            "'what are the open PRs?', 'did sentinel fix anything?'"
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "hours": {
-                    "type": "integer",
-                    "description": "Look-back window in hours (default 24)",
-                    "default": 24,
-                },
-            },
-        },
-    },
-    {
-        "name": "create_issue",
-        "description": (
-            "Deliver a confirmed issue/task to a Sentinel project instance. "
-            "Only call this after you have: (1) confirmed the message is a real issue or task, "
-            "(2) identified the target project, (3) gathered enough context, and "
-            "(4) confirmed with the user ('I'll create this issue for project X — does that look right?'). "
-            "Do NOT call this for status questions, tool queries, or casual chat."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "description": {
-                    "type": "string",
-                    "description": "Full problem/task description — include all context the user gave you",
-                },
-                "project": {
-                    "type": "string",
-                    "description": "Project short name (e.g. '1881', 'elprint'). Ask if unclear.",
-                },
-                "target_repo": {
-                    "type": "string",
-                    "description": "Specific repo within the project (omit to let Sentinel auto-route)",
-                },
-                "support_url": {
-                    "type": "string",
-                    "description": "Any URL the user shared (ticket, doc, screenshot link, etc.)",
-                },
-                "attachments_summary": {
-                    "type": "string",
-                    "description": "Summary of any files/screenshots the user attached",
-                },
-                "findings": {
-                    "type": "string",
-                    "description": (
-                        "A concise, curated summary of evidence directly relevant to this issue — "
-                        "NOT raw tool output. Include only what the fix engine needs: "
-                        "key error patterns, affected services, approximate frequency/timestamps, "
-                        "and 1-3 representative log lines. Omit unrelated results. "
-                        "Keep under 500 words. Leave empty if no tool results are relevant."
-                    ),
-                },
-            },
-            "required": ["description"],
-        },
-    },
-    {
-        "name": "get_fix_details",
-        "description": "Get full details of a specific fix by fingerprint (8+ hex chars).",
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "fingerprint": {"type": "string"},
-            },
-            "required": ["fingerprint"],
-        },
-    },
-    {
-        "name": "list_pending_prs",
-        "description": "List all open Sentinel PRs awaiting admin review.",
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "check_auth_status",
-        "description": (
-            "Check Claude authentication health, current rate-limit / usage-limit circuit state, "
-            "and fix engine stats for the last 24 h. "
-            "Use when someone asks: 'is Claude working?', 'any rate limits?', 'why aren't fixes running?', "
-            "'is the API key OK?', 'auth issues?', 'fix engine status'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "pause_sentinel",
-        "description": (
-            "Pause ALL Sentinel fix activity immediately. "
-            "Use when the engineer says 'pause', 'stop', 'freeze', or 'hold off'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "resume_sentinel",
-        "description": "Resume Sentinel fix activity after a pause.",
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "list_projects",
-        "description": (
-            "List all projects (Sentinel instances) in this workspace and the repos "
-            "each one manages. Use for: 'what projects do you manage?', 'list projects', "
-            "'what repos are configured?', 'show me all projects'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "search_logs",
-        "description": (
-            "Search production logs for a keyword or pattern. "
-            "When a project or source is specified (or can be inferred), performs a LIVE fetch "
-            "via fetch_log.sh with the query as the grep filter — SSHes directly to the server. "
-            "Falls back to searching locally-cached log files when no source can be determined. "
-            "Use for: 'search logs for illegal PIN in 1881', 'find X in SSOLWA logs', "
-            "'what did user Y do?', 'show entries for appid=Z', 'grep logs for X'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Keyword or regex to grep for",
-                },
-                "source": {
-                    "type": "string",
-                    "description": "Log source name to search (partial match against log-config filenames, e.g. 'SSOLWA', '1881'). Leave empty to search all sources.",
-                },
-                "max_matches": {
-                    "type": "integer",
-                    "description": "Max matching lines to return per source (default 30)",
-                    "default": 30,
-                },
-                "tail": {
-                    "type": "integer",
-                    "description": (
-                        "Number of log lines to fetch from the server before grepping (default: config value, typically 500). "
-                        "Increase when the user asks for a longer time window — e.g. 'yesterday up to now' → use 5000-10000. "
-                        "Higher values take longer but cover more history."
-                    ),
-                },
-            },
-            "required": ["query"],
-        },
-    },
-    {
-        "name": "trigger_poll",
-        "description": (
-            "Trigger an immediate log-fetch and error-detection cycle without waiting "
-            "for the next scheduled interval. Use when: 'check now', 'run now', "
-            "'poll immediately', 'don't wait'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "get_repo_status",
-        "description": (
-            "Per-repository breakdown of errors detected and fixes applied. "
-            "Use for: 'how is repo X doing?', 'which repo has the most issues?', "
-            "'break down by repo'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "hours": {
-                    "type": "integer",
-                    "description": "Look-back window in hours (default 24)",
-                    "default": 24,
-                },
-            },
-        },
-    },
-    {
-        "name": "list_recent_commits",
-        "description": (
-            "List recent commits made by Sentinel across all managed repos. "
-            "Use for: 'what did Sentinel commit?', 'show recent auto-fixes', 'what was changed?'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "limit": {
-                    "type": "integer",
-                    "description": "Max commits per repo (default 5)",
-                    "default": 5,
-                },
-            },
-        },
-    },
-    {
-        "name": "pull_repo",
-        "description": (
-            "Run git pull on one or all managed repos to fetch latest changes from GitHub. "
-            "Use for: 'pull changes', 'git pull', 'update repo X', 'fetch latest code'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "repo": {
-                    "type": "string",
-                    "description": "Repo name to pull (omit to pull all configured repos)",
-                },
-            },
-        },
-    },
-    {
-        "name": "pull_config",
-        "description": (
-            "Run git pull on one or all Sentinel project config directories. "
-            "Projects are matched by short name ('1881', 'elprint') or full dir name ('sentinel-1881'). "
-            "Use for: 'pull config for 1881', 'update sentinel config', 'pull all configs'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "project": {
-                    "type": "string",
-                    "description": "Project short name or dir name to pull (omit for all projects)",
-                },
-            },
-        },
-    },
-    {
-        "name": "fetch_logs",
-        "description": (
-            "Run fetch_log.sh for one or all configured log sources to pull the latest logs "
-            "from remote servers right now. Use for: 'fetch logs', 'run fetch_log.sh', "
-            "'grab latest logs from SSOLWA', 'try fetch_log.sh for STS', "
-            "'pull logs from server', 'get fresh logs'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "source": {
-                    "type": "string",
-                    "description": "Log source name to fetch (partial match, e.g. 'SSOLWA'). Omit to fetch all.",
-                },
-                "debug": {
-                    "type": "boolean",
-                    "description": "Run fetch_log.sh with --debug flag to show SSH/grep details",
-                    "default": False,
-                },
-                "tail": {
-                    "type": "integer",
-                    "description": "Override TAIL lines (how many log lines to fetch)",
-                },
-                "grep_filter": {
-                    "type": "string",
-                    "description": "Override GREP_FILTER (regex). Pass 'none' to disable filtering.",
-                },
-            },
-        },
-    },
-    {
-        "name": "watch_bot",
-        "description": (
-            "Tell Sentinel to passively monitor a Slack bot — queuing its messages as issues. "
-            "Extract all <@UXXXXXX> user IDs from the message and pass them here. "
-            "Sentinel verifies each is actually a bot (not a human) before adding to the watch list. "
-            "IMPORTANT: a bot watcher is only useful if its issues can be delivered to a project. "
-            "Try to infer the project from context (bot name, prior messages, available projects). "
-            "If it cannot be determined, do NOT call this tool — instead ask the user which project "
-            "the bot's alerts belong to, then call this tool with the project filled in. "
-            "Use for: 'listen to @alertbot', 'watch @bot1 @bot2', 'monitor @errorbot'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "user_ids": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "Slack user IDs to watch — extract from <@UXXXXXX> patterns in the message",
-                },
-                "project": {
-                    "type": "string",
-                    "description": "Project short name this bot's issues should be routed to (e.g. '1881', 'elprint'). Infer from context or ask user before calling.",
-                },
-            },
-            "required": ["user_ids"],
-        },
-    },
-    {
-        "name": "unwatch_bot",
-        "description": (
-            "Stop Sentinel from monitoring a Slack bot. "
-            "Use for: 'stop watching @alertbot', 'unwatch @bot', 'remove @errorbot from watchers'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "user_ids": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "Slack user IDs to remove from the watch list",
-                },
-            },
-            "required": ["user_ids"],
-        },
-    },
-    {
-        "name": "list_watched_bots",
-        "description": (
-            "List all Slack bots Sentinel is currently monitoring passively. "
-            "Use for: 'who are you watching?', 'which bots are you monitoring?', 'list watched bots'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "upgrade_sentinel",
-        "description": (
-            "Upgrade the Sentinel agent itself: git pull the latest code, update Python deps, "
-            "then restart the process. Safe to call at any time — if already up to date, "
-            "no restart is triggered. "
-            "Use for: 'upgrade sentinel', 'update sentinel', 'upgrade yourself', "
-            "'pull latest sentinel code', 'restart sentinel after upgrade'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "ask_codebase",
-        "description": (
-            "Ask any natural-language question about a managed codebase. "
-            "Accepts a repo name (e.g. 'STS', 'elprint-sales') OR a project name (e.g. '1881', 'elprint') "
-            "— if a project name is given and it has multiple repos, all are queried. "
-            "Claude Code answers using its full codebase knowledge — no need to specify how. "
-            "Use for: 'what does 1881 do?', 'TODOs in 1881', 'find PIN validation in STS', "
-            "'security issues in elprint-sales?', 'summarize the cairn repo'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "repo": {
-                    "type": "string",
-                    "description": "Repo name (e.g. 'STS', 'elprint-sales') OR project name (e.g. '1881', 'elprint') — project name queries all its repos",
-                },
-                "question": {
-                    "type": "string",
-                    "description": "Natural language question about the codebase",
-                },
-            },
-            "required": ["repo", "question"],
-        },
-    },
-    {
-        "name": "restart_project",
-        "description": (
-            "Stop and restart a specific Sentinel monitoring instance (runs stop.sh then start.sh). "
-            "This restarts the Sentinel agent process for that project — it does NOT restart the application itself. "
-            "Use when: 'restart sentinel for 1881', 'reload the 1881 monitor', 'restart elprint sentinel'. "
-            "Safer than restarting all projects at once."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "project": {
-                    "type": "string",
-                    "description": "Project short name or dir name (e.g. '1881', 'elprint')",
-                },
-            },
-            "required": ["project"],
-        },
-    },
-    {
-        "name": "my_stats",
-        "description": (
-            "Show the current user's personal Sentinel dashboard: "
-            "conversation history length, issues they submitted, and "
-            "a summary of Sentinel fix activity (errors caught, fixes applied, "
-            "fixes pending PR review, fixes confirmed live, fixes failed). "
-            "Use for: 'what have you done for me?', 'show my stats', "
-            "'how many issues have been fixed?', 'my history', 'summary', "
-            "'what did sentinel fix this week?', 'pending fixes', 'open PRs'."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "hours": {
-                    "type": "integer",
-                    "description": "Look-back window in hours (default 168 = 7 days)",
-                    "default": 168,
-                },
-            },
-        },
-    },
-    {
-        "name": "clear_my_history",
-        "description": (
-            "Clear the current user's conversation history with Sentinel. "
-            "After clearing, future sessions start with no memory of past conversations. "
-            "Use for: 'clear my history', 'forget our conversation', "
-            "'start fresh', 'reset my context', 'wipe my history'."
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "tail_log",
-        "description": (
-            "Fetch the last N lines of a log source's live production logs without any grep filter. "
-            "Use when: 'show me recent SSOLWA logs', 'tail STS', 'what's happening in 1881 logs right now', "
-            "'show last 100 lines from SSOLWA'. Different from search_logs — no pattern required."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "source": {
-                    "type": "string",
-                    "description": "Log source name (partial match against log-config filenames, e.g. 'SSOLWA', 'STS')",
-                },
-                "lines": {
-                    "type": "integer",
-                    "description": "Number of recent lines to fetch (default 100)",
-                    "default": 100,
-                },
-            },
-            "required": ["source"],
-        },
-    },
-    {
-        "name": "post_file",
-        "description": (
-            "Upload a text file directly to the Slack conversation so the user can read or download it. "
-            "Use when: output is too large for a chat message, the user asks to 'download', 'export', or "
-            "'send as a file', or when formatted content (diffs, logs, CSVs, reports) is clearer as a file. "
-            "e.g. 'give me that as a file', 'export the log', 'send me the diff for PR #41', "
-            "'download the health report', 'export recent errors as CSV'"
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "content": {
-                    "type": "string",
-                    "description": "The full text content of the file to upload",
-                },
-                "filename": {
-                    "type": "string",
-                    "description": "Filename with extension, e.g. 'fix-ab12.diff', 'sentinel-report.txt', 'errors.csv', 'ssolwa.log'",
-                },
-                "title": {
-                    "type": "string",
-                    "description": "Optional display title shown above the file in Slack (defaults to filename)",
-                },
-            },
-            "required": ["content", "filename"],
-        },
-    },
-    {
-        "name": "list_all_users",
-        "description": (
-            "ADMIN ONLY. List all Slack users who have ever talked to Sentinel, "
-            "with their issue count and conversation message count. "
-            "e.g. 'list all users', 'who has talked to you?', 'show user activity'"
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "clear_user_history",
-        "description": (
-            "ADMIN ONLY. Clear the conversation history for a specific Slack user. "
-            "e.g. 'clear history for huy', 'reset bob's conversation'"
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "user_id": {
-                    "type": "string",
-                    "description": "Slack user ID to clear (e.g. U01AB2CD3EF)",
-                },
-            },
-            "required": ["user_id"],
-        },
-    },
-    {
-        "name": "reset_fingerprint",
-        "description": (
-            "ADMIN ONLY. Remove the 24h fix lock for an error fingerprint so Sentinel will retry it "
-            "on the next poll cycle. Use when a fix attempt failed and you want to force a retry. "
-            "e.g. 'retry fix abc123', 'reset fingerprint abc123de', 'let Sentinel try that error again'"
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "fingerprint": {
-                    "type": "string",
-                    "description": "Error fingerprint hash (8+ hex chars, from get_fix_details or list_all_errors)",
-                },
-            },
-            "required": ["fingerprint"],
-        },
-    },
-    {
-        "name": "list_all_errors",
-        "description": (
-            "ADMIN ONLY. Return the full unfiltered error database — all fingerprints, counts, "
-            "sources, and last-seen times. "
-            "e.g. 'show all errors', 'full error list', 'dump the error DB'"
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "hours": {
-                    "type": "integer",
-                    "description": "Limit to errors seen in the last N hours (0 = all time)",
-                    "default": 0,
-                },
-            },
-        },
-    },
-    {
-        "name": "export_db",
-        "description": (
-            "ADMIN ONLY. Export the full Sentinel state (errors, fixes, PRs, users) as a "
-            "downloadable text file posted to Slack. "
-            "e.g. 'export the DB', 'download state', 'give me a full report file'"
-        ),
-        "input_schema": {"type": "object", "properties": {}},
-    },
-]
-# ── Workspace helpers ─────────────────────────────────────────────────────────
-def _workspace_dir() -> Path:
-    return Path(".").resolve().parent
-def _short_name(dir_name: str) -> str:
-    """'sentinel-1881' → '1881', 'sentinel-elprint' → 'elprint', others unchanged."""
-    if dir_name.startswith("sentinel-"):
-        return dir_name[len("sentinel-"):]
-    return dir_name
-def _read_project_name(project_dir: Path) -> str:
-    """Return PROJECT_NAME from sentinel.properties if set, else fall back to _short_name(dir)."""
-    props = project_dir / "config" / "sentinel.properties"
-    if props.exists():
-        try:
-            for line in props.read_text(encoding="utf-8", errors="ignore").splitlines():
-                line = line.strip()
-                if line.startswith("PROJECT_NAME"):
-                    _, _, val = line.partition("=")
-                    val = val.partition("#")[0].strip()
-                    if val:
-                        return val
-        except Exception:
-            pass
-    return _short_name(project_dir.name)
-def _find_project_dirs(target: str = "") -> list[Path]:
-    """Return project dirs matching target (PROJECT_NAME, short name, or full dir name), or all if target empty."""
-    workspace = _workspace_dir()
-    results = []
-    try:
-        for d in sorted(workspace.iterdir()):
-            if not d.is_dir() or d.name in ("code", ".git"):
-                continue
-            if not (d / "config").exists():
-                continue
-            if target:
-                t = target.lower()
-                if (t not in d.name.lower()
-                        and t not in _short_name(d.name).lower()
-                        and t not in _read_project_name(d).lower()):
-                    continue
-            results.append(d)
-    except Exception:
-        pass
-    return results
-def _git_pull(path: Path) -> dict:
-    try:
-        r = subprocess.run(
-            ["git", "pull", "--rebase", "origin"],
-            cwd=str(path), capture_output=True, text=True, timeout=60,
-        )
-        last = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
-        return {"status": "ok" if r.returncode == 0 else "error",
-                "detail": last if r.returncode == 0 else r.stderr.strip()}
-    except Exception as e:
-        return {"status": "error", "detail": str(e)}
-# ── Log-source name resolver ──────────────────────────────────────────────────
-def _filter_log_sources(props_files: list, source_hint: str) -> list:
-    """
-    Return the subset of props_files whose log source matches source_hint.
-    Matching is tried in order (first match wins per file):
-      1. Substring of the filename stem   (e.g. "sts"   → STS.properties)
-      2. Substring of REMOTE_SERVICE_USER (e.g. "ssolwa" → ...SSOLoginWebApp...)
-      3. Substring of HOSTS               (e.g. hostname fragment)
-    Case-insensitive throughout. An empty source_hint returns all files unchanged.
-    """
-    if not source_hint:
-        return props_files
-    hint = source_hint.lower()
-    def _props_contains(path: Path, key: str, hint: str) -> bool:
-        try:
-            for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
-                stripped = line.strip()
-                if stripped.startswith("#"):
-                    continue
-                if stripped.upper().startswith(key + "="):
-                    val = stripped.split("=", 1)[1].partition("#")[0].strip().lower()
-                    if hint in val:
-                        return True
-        except OSError:
-            pass
-        return False
-    matched = []
-    for p in props_files:
-        if hint in p.stem.lower():
-            matched.append(p)
-        elif _props_contains(p, "REMOTE_SERVICE_USER", hint):
-            matched.append(p)
-        elif _props_contains(p, "HOSTS", hint):
-            matched.append(p)
-    return matched
-# ── Tool execution ────────────────────────────────────────────────────────────
-async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "", channel: str = "", is_admin: bool = False) -> str:
-    if name == "get_status":
-        hours = int(inputs.get("hours", 24))
-        errors = store.get_recent_errors(hours)
-        fixes  = store.get_recent_fixes(hours)
-        prs    = store.get_open_prs()
-        top_errors = [
-            {
-                "message": e["message"][:120],
-                "count":   e["count"],
-                "source":  e["source"],
-                "last_seen": e["last_seen"],
-            }
-            for e in errors[:8]
-        ]
-        return json.dumps({
-            "window_hours":    hours,
-            "errors_detected": len(errors),
-            "top_errors":      top_errors,
-            "fixes_applied":   sum(1 for f in fixes if f["status"] == "applied"),
-            "fixes_pending":   sum(1 for f in fixes if f["status"] == "pending"),
-            "fixes_failed":    sum(1 for f in fixes if f["status"] == "failed"),
-            "open_prs":        [
-                {
-                    "repo":   p["repo_name"],
-                    "branch": p["branch"],
-                    "pr_url": p["pr_url"],
-                    "age":    p.get("timestamp", ""),
-                }
-                for p in prs
-            ],
-            "sentinel_paused": Path("SENTINEL_PAUSE").exists(),
-        })
-    if name == "check_auth_status":
-        import subprocess as _sp
-        from .notify import get_circuit_status
-        cfg = cfg_loader.sentinel
-        # Auth configuration
-        has_key      = bool(cfg.anthropic_api_key)
-        pro_for_tasks = cfg.claude_pro_for_tasks
-        if pro_for_tasks and has_key:
-            primary, fallback = "claude_pro_oauth", "api_key"
-        elif pro_for_tasks:
-            primary, fallback = "claude_pro_oauth", None
-        else:
-            primary, fallback = "api_key", "claude_pro_oauth" if not has_key else "claude_pro_oauth"
-        # Claude CLI liveness check
-        cli_ok, cli_version = False, ""
-        try:
-            r = _sp.run(
-                [cfg.claude_code_bin, "--version"],
-                capture_output=True, text=True, timeout=10,
-            )
-            if r.returncode == 0:
-                cli_ok     = True
-                cli_version = r.stdout.strip() or r.stderr.strip()
-        except Exception:
-            pass
-        # Circuit breaker snapshot — only open (unhealthy) circuits appear here
-        circuits = get_circuit_status()
-        # Fix engine stats (last 24 h)
-        recent = store.get_recent_fixes(hours=24)
-        counts = {"applied": 0, "failed": 0, "skipped": 0, "pending": 0}
-        last_success = None
-        for f in recent:
-            s = f.get("status", "")
-            if s in counts:
-                counts[s] += 1
-            if s == "applied" and not last_success:
-                last_success = f.get("timestamp", "")
-        overall = "healthy"
-        if circuits:
-            overall = "degraded — rate/auth limit active on: " + ", ".join(circuits)
-        elif not cli_ok:
-            overall = "warning — claude CLI not reachable"
-        return json.dumps({
-            "overall": overall,
-            "auth": {
-                "api_key_configured": has_key,
-                "claude_pro_for_tasks": pro_for_tasks,
-                "primary_method": primary,
-                "fallback_method": fallback,
-            },
-            "claude_cli": {"available": cli_ok, "version": cli_version},
-            "rate_limit_circuits": circuits,
-            "fix_engine_24h": {**counts, "last_successful_fix": last_success},
-        })
-    if name == "create_issue":
-        description = inputs["description"]
-        target_repo = inputs.get("target_repo", "")
-        project_arg = inputs.get("project", "")
-        if project_arg:
-            project_dirs = _find_project_dirs(project_arg)
-            if not project_dirs:
-                all_names = [_read_project_name(d) for d in _find_project_dirs()]
-                return json.dumps({
-                    "error": f"No project found matching '{project_arg}'",
-                    "available_projects": all_names,
-                    "action_needed": "Ask the user which project they meant.",
-                })
-            if len(project_dirs) > 1:
-                matches = [_read_project_name(d) for d in project_dirs]
-                return json.dumps({
-                    "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
-                    "action_needed": "Ask the user to clarify which project they mean.",
-                })
-            project_dir = project_dirs[0]
-        else:
-            project_dir = Path(".")
-        support_url          = inputs.get("support_url", "").strip()
-        attachments_summary  = inputs.get("attachments_summary", "").strip()
-        findings             = inputs.get("findings", "").strip()
-        issues_dir = project_dir / "issues"
-        issues_dir.mkdir(exist_ok=True)
-        fname = f"slack-{uuid.uuid4().hex[:8]}.txt"
-        submitter_name = store.get_user_name(user_id) if user_id else ""
-        submitter_line = f"SUBMITTED_BY: {submitter_name} ({user_id})" if user_id else ""
-        lines = []
-        if submitter_line:
-            lines.append(submitter_line)
-        if target_repo:
-            lines.append(f"TARGET_REPO: {target_repo}")
-        if support_url:
-            lines.append(f"SUPPORT_URL: {support_url}")
-        lines.append(f"SUBMITTED_AT: {datetime.now(timezone.utc).isoformat()}")
-        lines.append("")
-        lines.append(description)
-        if findings:
-            lines.append(f"\nEVIDENCE (gathered by Sentinel Boss):\n{findings}")
-        if attachments_summary:
-            lines.append(f"\nATTACHMENTS:\n{attachments_summary}")
-        content = "\n".join(lines)
-        (issues_dir / fname).write_text(content, encoding="utf-8")
-        # Touch SENTINEL_POLL_NOW so the target instance picks it up immediately
-        (project_dir / "SENTINEL_POLL_NOW").touch()
-        project_label = _read_project_name(project_dir.resolve()) if project_arg else "this project"
-        logger.info("Boss created issue for %s: %s", project_label, fname)
-        if user_id:
-            try:
-                store.record_submitted_issue(
-                    user_id=user_id,
-                    user_name=submitter_name,
-                    project=project_label,
-                    fname=fname,
-                    description=description,
-                )
-            except Exception as _rec_err:
-                logger.debug("Boss: could not record submitted issue: %s", _rec_err)
-        return json.dumps({
-            "status":  "queued",
-            "project": project_label,
-            "file":    fname,
-            "note":    f"Delivered to '{project_label}'. Sentinel will process it on the next poll cycle.",
-        })
-    if name == "get_fix_details":
-        fp  = inputs["fingerprint"]
-        fix = store.get_confirmed_fix(fp) or store.get_marker_seen_fix(fp)
-        if not fix:
-            # Fallback: search recent fixes by prefix
-            recent = store.get_recent_fixes(hours=72)
-            fix    = next((f for f in recent if f.get("fingerprint", "").startswith(fp)), None)
-        return json.dumps(fix or {"error": "not found"})
-    if name == "list_pending_prs":
-        prs = store.get_open_prs()
-        return json.dumps({
-            "count":    len(prs),
-            "open_prs": [
-                {
-                    "repo":      p["repo_name"],
-                    "branch":    p["branch"],
-                    "pr_url":    p["pr_url"],
-                    "timestamp": p.get("timestamp", ""),
-                }
-                for p in prs
-            ],
-        })
-    if name == "pause_sentinel":
-        Path("SENTINEL_PAUSE").touch()
-        logger.info("Boss: SENTINEL_PAUSE created")
-        return json.dumps({"status": "paused"})
-    if name == "resume_sentinel":
-        p = Path("SENTINEL_PAUSE")
-        if p.exists():
-            p.unlink()
-        logger.info("Boss: SENTINEL_PAUSE removed")
-        return json.dumps({"status": "resumed"})
-    if name == "list_projects":
-        projects = []
-        for d in _find_project_dirs():
-            repo_cfg_dir = d / "config" / "repo-configs"
-            repos_in_project = []
-            if repo_cfg_dir.exists():
-                for p in sorted(repo_cfg_dir.glob("*.properties")):
-                    if p.name.startswith("_"):
-                        continue
-                    repo_url = ""
-                    for line in p.read_text(encoding="utf-8", errors="ignore").splitlines():
-                        if line.startswith("REPO_URL"):
-                            repo_url = line.split("=", 1)[-1].strip()
-                            break
-                    repos_in_project.append({"repo": p.stem, "url": repo_url})
-            projects.append({
-                "project": _read_project_name(d),
-                "dir":     d.name,
-                "running": (d / "sentinel.pid").exists(),
-                "this":    d.resolve() == Path(".").resolve(),
-                "repos":   repos_in_project,
-            })
-        return json.dumps({"projects": projects})
-    if name == "search_logs":
-        query       = inputs.get("query", "")
-        source      = inputs.get("source", "").lower()
-        max_matches = int(inputs.get("max_matches", 30))
-        tail_override = inputs.get("tail")
-        # ── Live fetch path: SSH to servers and grep in real time ──────────────
-        script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
-        log_cfg_dir = Path("config") / "log-configs"
-        if script.exists() and log_cfg_dir.exists():
-            props_files = _filter_log_sources(sorted(log_cfg_dir.glob("*.properties")), source)
-            if props_files:
-                live_results = []
-                for props in props_files:
-                    env = os.environ.copy()
-                    env["GREP_FILTER"] = query
-                    if tail_override:
-                        env["TAIL"] = str(tail_override)
-                    try:
-                        r = subprocess.run(
-                            ["bash", str(script), str(props)],
-                            capture_output=True, text=True, timeout=60, env=env,
-                        )
-                        try:
-                            _qpat = re.compile(query, re.IGNORECASE)
-                        except re.error:
-                            _qpat = re.compile(re.escape(query), re.IGNORECASE)
-                        lines = (r.stdout or "").strip().splitlines()
-                        matches = [ln[:300] for ln in lines if _qpat.search(ln)][:max_matches]
-                        if matches:
-                            live_results.append({"source": props.stem, "matches": matches})
-                        logger.info("Boss search_logs live %s rc=%d found=%d", props.stem, r.returncode, len(matches))
-                    except subprocess.TimeoutExpired:
-                        live_results.append({"source": props.stem, "error": "timed out"})
-                    except Exception as e:
-                        live_results.append({"source": props.stem, "error": str(e)})
-                total = sum(len(r.get("matches", [])) for r in live_results)
-                return json.dumps({
-                    "query": query,
-                    "mode": "live",
-                    "total_matches": total,
-                    "results": live_results,
-                    "note": (
-                        "Results already include a per-source breakdown. "
-                        "Do NOT call search_logs again with a source filter to 'refine' — "
-                        "use these results directly."
-                    ) if total > 0 else None,
-                })
-        # ── Fallback: search locally-cached log files ──────────────────────────
-        # Reaching here means: live script unavailable OR source filter matched no config files.
-        # A result with files_searched=0 means the source name wasn't recognised — NOT that
-        # there are no log entries. Do not interpret this as "no results found".
-        fetched_dir = Path("workspace/fetched")
-        if not fetched_dir.exists():
-            return json.dumps({
-                "error": "No fetched logs found and fetch_log.sh unavailable",
-                "note": "This is a config/setup problem, not a 'no results' answer.",
-            })
-        try:
-            pattern = re.compile(query, re.IGNORECASE)
-        except re.error as e:
-            return json.dumps({"error": f"Invalid regex: {e}"})
-        results = []
-        for log_file in sorted(fetched_dir.glob("*.log")):
-            if source and source not in log_file.name.lower():
-                continue
-            try:
-                lines   = log_file.read_text(encoding="utf-8", errors="ignore").splitlines()
-                matches = [
-                    {"line": i + 1, "text": line[:300]}
-                    for i, line in enumerate(lines)
-                    if pattern.search(line)
-                ][:max_matches]
-                if matches:
-                    results.append({"file": log_file.name, "matches": matches})
-            except Exception:
-                pass
-        total = sum(len(r["matches"]) for r in results)
-        files_searched = len(list(fetched_dir.glob("*.log")))
-        result = {
-            "query": query,
-            "mode": "cached",
-            "total_matches": total,
-            "files_searched": files_searched,
-            "results": results,
-        }
-        if files_searched == 0:
-            result["warning"] = (
-                "Source name not recognised in cached files — this is a lookup failure, not 'no results'. "
-                "If you already have results from a broader search_logs call, use those. Stop retrying."
-            )
-        return json.dumps(result)
-    if name == "trigger_poll":
-        Path("SENTINEL_POLL_NOW").touch()
-        logger.info("Boss: immediate poll requested")
-        return json.dumps({"status": "triggered", "note": "Sentinel will run a poll cycle within seconds"})
-    if name == "get_repo_status":
-        hours  = int(inputs.get("hours", 24))
-        fixes  = store.get_recent_fixes(hours)
-        errors = store.get_recent_errors(hours)
-        by_repo: dict = {}
-        for fix in fixes:
-            repo = fix.get("repo_name", "unknown")
-            s    = by_repo.setdefault(repo, {"applied": 0, "pending": 0, "failed": 0, "skipped": 0})
-            key  = fix.get("status", "failed")
-            s[key] = s.get(key, 0) + 1
-        return json.dumps({"window_hours": hours, "total_errors": len(errors), "by_repo": by_repo})
-    if name == "list_recent_commits":
-        limit   = int(inputs.get("limit", 5))
-        results = []
-        for repo_name, repo in cfg_loader.repos.items():
-            local = Path(repo.local_path)
-            if not local.exists():
-                continue
-            try:
-                r = subprocess.run(
-                    ["git", "log", "--oneline", "--grep=sentinel", "-n", str(limit)],
-                    cwd=str(local), capture_output=True, text=True, timeout=10,
-                )
-                commits = r.stdout.strip().splitlines()
-                if commits:
-                    results.append({"repo": repo_name, "commits": commits})
-            except Exception:
-                pass
-        return json.dumps({"sentinel_commits": results})
-    if name == "pull_repo":
-        target = inputs.get("repo", "").lower()
-        results = []
-        for repo_name, repo in cfg_loader.repos.items():
-            if target and target not in repo_name.lower():
-                continue
-            local = Path(repo.local_path)
-            if not local.exists():
-                results.append({"repo": repo_name, "status": "error", "detail": "local path not found"})
-                continue
-            try:
-                r = subprocess.run(
-                    ["git", "pull", "--rebase", "origin", repo.branch],
-                    cwd=str(local), capture_output=True, text=True, timeout=60,
-                )
-                last_line = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
-                if r.returncode == 0:
-                    results.append({"repo": repo_name, "status": "ok", "detail": last_line})
-                else:
-                    results.append({"repo": repo_name, "status": "error", "detail": r.stderr.strip()})
-            except Exception as e:
-                results.append({"repo": repo_name, "status": "error", "detail": str(e)})
-        return json.dumps({"results": results})
-    if name == "pull_config":
-        target = inputs.get("project", "")
-        dirs = _find_project_dirs(target)
-        if not dirs:
-            return json.dumps({"error": f"No project found matching '{target}'"})
-        results = []
-        for d in dirs:
-            res = _git_pull(d)
-            results.append({"project": _read_project_name(d), "dir": d.name, **res})
-            logger.info("Boss: pull_config %s → %s", d.name, res["status"])
-        return json.dumps({"results": results})
-    if name == "fetch_logs":
-        source_filter = inputs.get("source", "").lower()
-        debug         = bool(inputs.get("debug", False))
-        tail_override = inputs.get("tail")
-        grep_override = inputs.get("grep_filter", "")
-        # Find fetch_log.sh relative to this file
-        script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
-        if not script.exists():
-            return json.dumps({"error": f"fetch_log.sh not found at {script}"})
-        log_cfg_dir = Path("config") / "log-configs"
-        if not log_cfg_dir.exists():
-            return json.dumps({"error": "config/log-configs/ not found"})
-        props_files = _filter_log_sources(sorted(log_cfg_dir.glob("*.properties")), source_filter)
-        if not props_files:
-            return json.dumps({"error": f"No log-config found matching '{source_filter}'"})
-        results = []
-        for props in props_files:
-            env = os.environ.copy()
-            if tail_override:
-                env["TAIL"] = str(tail_override)
-            if grep_override:
-                env["GREP_FILTER"] = grep_override
-            cmd = ["bash", str(script)]
-            if debug:
-                cmd.append("--debug")
-            cmd.append(str(props))
-            try:
-                r = subprocess.run(
-                    cmd, capture_output=True, text=True, timeout=120, env=env,
-                )
-                output = (r.stdout or "").strip()
-                stderr = (r.stderr or "").strip()
-                results.append({
-                    "source":     props.stem,
-                    "returncode": r.returncode,
-                    "output":     output[-2000:] if output else "",
-                    "stderr":     stderr[-1000:] if stderr else "",
-                })
-                logger.info("Boss fetch_logs %s rc=%d", props.stem, r.returncode)
-            except subprocess.TimeoutExpired:
-                results.append({"source": props.stem, "error": "timed out after 120s"})
-            except Exception as e:
-                results.append({"source": props.stem, "error": str(e)})
-        return json.dumps({"fetched": len(results), "results": results})
-    if name == "watch_bot":
-        if not is_admin:
-            return json.dumps({"error": "Admin access required to register bots for monitoring."})
-        user_ids    = inputs.get("user_ids", [])
-        project_arg = inputs.get("project", "").strip()
-        if not user_ids:
-            return json.dumps({"error": "No user_ids provided"})
-        # Resolve + validate project — required for bot issue routing
-        resolved_project = ""
-        if project_arg:
-            project_dirs = _find_project_dirs(project_arg)
-            if not project_dirs:
-                all_names = [_read_project_name(d) for d in _find_project_dirs()]
-                return json.dumps({
-                    "error": f"No project found matching '{project_arg}'",
-                    "available_projects": all_names,
-                    "action_needed": "Ask the user which project these bot alerts belong to.",
-                })
-            if len(project_dirs) > 1:
-                matches = [_read_project_name(d) for d in project_dirs]
-                return json.dumps({
-                    "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
-                    "action_needed": "Ask the user to clarify which project.",
-                })
-            resolved_project = _read_project_name(project_dirs[0])
-        else:
-            all_projects = _find_project_dirs()
-            if len(all_projects) == 1:
-                # Single project in workspace — auto-assign
-                resolved_project = _read_project_name(all_projects[0])
-            elif all_projects:
-                all_names = [_read_project_name(d) for d in all_projects]
-                return json.dumps({
-                    "error": "Cannot determine which project these bot alerts belong to.",
-                    "available_projects": all_names,
-                    "action_needed": "Ask the user to specify the project, then retry with project filled in.",
-                })
-        results = []
-        for uid in user_ids:
-            if not slack_client:
-                results.append({"user_id": uid, "status": "error", "reason": "no Slack client available"})
-                continue
-            try:
-                info = await slack_client.users_info(user=uid)
-                user = info.get("user", {})
-                if not user.get("is_bot", False):
-                    results.append({"user_id": uid, "status": "skipped", "reason": "not a bot — only bots can be watched passively"})
-                    continue
-                bot_name = user.get("real_name") or user.get("name") or uid
-                store.add_watched_bot(uid, bot_name, added_by="boss", project_name=resolved_project)
-                logger.info("Boss: now watching bot %s (%s) → project '%s'", bot_name, uid, resolved_project or "unset")
-                results.append({"user_id": uid, "bot_name": bot_name, "project": resolved_project, "status": "watching"})
-            except Exception as e:
-                results.append({"user_id": uid, "status": "error", "reason": str(e)})
-        return json.dumps({"results": results})
-    if name == "unwatch_bot":
-        if not is_admin:
-            return json.dumps({"error": "Admin access required to remove bots from monitoring."})
-        user_ids = inputs.get("user_ids", [])
-        if not user_ids:
-            return json.dumps({"error": "No user_ids provided"})
-        results = []
-        for uid in user_ids:
-            removed = store.remove_watched_bot(uid)
-            logger.info("Boss: unwatch bot %s → %s", uid, "removed" if removed else "not found")
-            results.append({"user_id": uid, "status": "removed" if removed else "not found"})
-        return json.dumps({"results": results})
-    if name == "list_watched_bots":
-        bots = store.get_watched_bots()
-        return json.dumps({
-            "count": len(bots),
-            "bots": [
-                {
-                    "bot_id":   b["bot_id"],
-                    "bot_name": b["bot_name"],
-                    "project":  b.get("project_name") or "",
-                    "added_by": b["added_by"],
-                    "added_at": b["added_at"],
-                }
-                for b in bots
-            ],
-        })
-    if name == "upgrade_sentinel":
-        if not is_admin:
-            return json.dumps({"error": "Admin access required to upgrade Sentinel."})
-        import threading
-        # Sentinel is installed via npm — use `sentinel upgrade` which handles
-        # npm install + Python bundle copy + restart via stopAll/startAll.
-        # Run it in the background after a short delay so the Slack reply is
-        # sent before the process is replaced.
-        try:
-            r = subprocess.run(
-                ["sentinel", "--version"],
-                capture_output=True, text=True, timeout=10,
-            )
-            sentinel_bin_ok = r.returncode == 0
-        except Exception:
-            sentinel_bin_ok = False
-        if not sentinel_bin_ok:
-            return json.dumps({
-                "status": "error",
-                "note":   "`sentinel` CLI not found. Run: npm install -g @misterhuydo/sentinel",
-            })
-        def _do_upgrade():
-            import time
-            time.sleep(10)   # give Slack time to post the reply
-            subprocess.Popen(["sentinel", "upgrade"], close_fds=True)
-        threading.Thread(target=_do_upgrade, daemon=True).start()
-        logger.info("Boss: upgrade_sentinel scheduled via `sentinel upgrade`")
-        return json.dumps({
-            "status": "ok",
-            "note":   "Upgrade started — pulling latest version via npm and restarting. Give me ~30 seconds then I'll be back.",
-        })
-    if name == "ask_codebase":
-        target   = inputs.get("repo", "").lower()
-        question = inputs.get("question", "")
-        # 1. Find repos whose name contains the target (e.g. "STS", "elprint-sales")
-        matched = [(rn, r) for rn, r in cfg_loader.repos.items() if target in rn.lower()]
-        # 2. No repo match — check if target is a project name → use ALL repos in cfg_loader
-        #    (each Sentinel instance is scoped to one project, so all repos belong to it)
-        if not matched:
-            current_project = _read_project_name(Path("."))
-            if target in current_project.lower() or current_project.lower() in target:
-                matched = list(cfg_loader.repos.items())
-        if not matched:
-            return json.dumps({
-                "error": f"No repo or project found matching '{target}'",
-                "available_repos": list(cfg_loader.repos.keys()),
-            })
-        cfg = cfg_loader.sentinel
-        env = os.environ.copy()
-        # Only inject API key when Claude Pro is NOT preferred for heavy tasks
-        if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
-            env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
-        def _ask_one(repo_name, repo_cfg) -> dict:
-            local_path = Path(repo_cfg.local_path)
-            if not local_path.exists():
-                return {"repo": repo_name, "error": f"not cloned yet at {local_path}"}
-            prompt = (
-                f"You are a code analyst. Answer the following question about the codebase at: {local_path}\n\n"
-                f"Question: {question}\n\n"
-                f"Use whatever tools you need to answer accurately. Be concise and direct. Plain text only."
-            )
-            try:
-                r = subprocess.run(
-                    ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
-                    if os.getuid() != 0 else
-                    [cfg.claude_code_bin, "--print", prompt]),
-                    capture_output=True, text=True, timeout=180, env=env,
-                    cwd=str(local_path),
-                )
-                output = (r.stdout or "").strip()
-                logger.info("Boss ask_codebase %s rc=%d len=%d", repo_name, r.returncode, len(output))
-                if r.returncode != 0 and not output:
-                    raw_err = (r.stderr or "")
-                    alert_if_rate_limited(
-                        cfg.slack_bot_token, cfg.slack_channel,
-                        f"ask_codebase/{repo_name}", raw_err,
-                    )
-                    return {"repo": repo_name, "error": f"claude --print failed (rc={r.returncode}): {raw_err[:200]}"}
-                return {"repo": repo_name, "answer": output[:3000]}
-            except subprocess.TimeoutExpired:
-                return {"repo": repo_name, "error": "timed out after 180s"}
-            except Exception as e:
-                return {"repo": repo_name, "error": str(e)}
-        if len(matched) == 1:
-            result = _ask_one(*matched[0])
-            # Unwrap single-repo result for cleaner response
-            return json.dumps(result)
-        # Multiple repos — query each and combine
-        results = [_ask_one(rn, r) for rn, r in matched]
-        return json.dumps({"project": target, "repos_queried": len(results), "results": results})
-    if name == "restart_project":
-        if not is_admin:
-            return json.dumps({"error": "Admin access required to restart a project."})
-        project_arg = inputs.get("project", "").lower()
-        dirs = _find_project_dirs(project_arg)
-        if not dirs:
-            return json.dumps({"error": f"No project found matching '{project_arg}'"})
-        results = []
-        for d in dirs:
-            stop_sh  = d / "stop.sh"
-            start_sh = d / "start.sh"
-            if not stop_sh.exists() or not start_sh.exists():
-                results.append({"project": d.name, "status": "error", "detail": "stop.sh or start.sh not found"})
-                continue
-            try:
-                subprocess.run(["bash", str(stop_sh)],  cwd=str(d), timeout=30)
-                subprocess.run(["bash", str(start_sh)], cwd=str(d), timeout=30)
-                results.append({"project": d.name, "status": "restarted"})
-                logger.info("Boss: restarted project %s", d.name)
-            except Exception as e:
-                results.append({"project": d.name, "status": "error", "detail": str(e)})
-        return json.dumps({"results": results})
-    if name == "tail_log":
-        source      = inputs.get("source", "").lower()
-        lines       = int(inputs.get("lines", 100))
-        script      = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
-        log_cfg_dir = Path("config") / "log-configs"
-        if not script.exists():
-            return json.dumps({"error": "fetch_log.sh not found"})
-        if not log_cfg_dir.exists():
-            return json.dumps({"error": "config/log-configs/ not found"})
-        props_files = sorted(log_cfg_dir.glob("*.properties"))
-        if source:
-            props_files = [p for p in props_files if source in p.stem.lower()]
-        if not props_files:
-            return json.dumps({"error": f"No log-config found matching '{source}'"})
-        results = []
-        for props in props_files:
-            env = os.environ.copy()
-            env["TAIL"]        = str(lines)
-            env["GREP_FILTER"] = ""   # no filter — show everything
-            try:
-                r = subprocess.run(
-                    ["bash", str(script), str(props)],
-                    capture_output=True, text=True, timeout=60, env=env,
-                )
-                tail_lines = (r.stdout or "").strip().splitlines()[-lines:]
-                results.append({
-                    "source":  props.stem,
-                    "lines":   len(tail_lines),
-                    "content": "\n".join(tail_lines),
-                })
-                logger.info("Boss tail_log %s rc=%d lines=%d", props.stem, r.returncode, len(tail_lines))
-            except subprocess.TimeoutExpired:
-                results.append({"source": props.stem, "error": "timed out"})
-            except Exception as e:
-                results.append({"source": props.stem, "error": str(e)})
-        return json.dumps({"results": results})
-    if name == "post_file":
-        if not slack_client or not channel:
-            return json.dumps({"error": "No Slack channel context — cannot upload file"})
-        content  = inputs.get("content", "")
-        filename = inputs.get("filename", "sentinel-output.txt")
-        title    = inputs.get("title", filename)
-        if not content:
-            return json.dumps({"error": "No content provided"})
-        try:
-            await slack_client.files_upload_v2(
-                channel=channel,
-                content=content,
-                filename=filename,
-                title=title,
-            )
-            logger.info("Boss post_file: uploaded %s (%d bytes) to %s", filename, len(content), channel)
-            return json.dumps({"ok": True, "filename": filename, "bytes": len(content)})
-        except Exception as e:
-            logger.warning("Boss post_file failed: %s", e)
-            return json.dumps({"error": str(e)})
-    if name == "my_stats":
-        hours  = int(inputs.get("hours", 168))
-        errors = store.get_recent_errors(hours)
-        fixes  = store.get_recent_fixes(hours)
-        prs    = store.get_open_prs()
-        pending_conf = store.get_fixes_pending_confirmation()
-        # Conversation stats
-        history      = store.load_conversation(user_id) if user_id else []
-        hist_len     = len(history)
-        # Load conversation updated_at from DB
-        conv_updated = ""
-        try:
-            import sqlite3 as _sqlite3
-            with _sqlite3.connect(store.db_path) as _db:
-                row = _db.execute(
-                    "SELECT updated_at FROM conversations WHERE user_id=?", (user_id,)
-                ).fetchone()
-                if row:
-                    conv_updated = row[0]
-        except Exception:
-            pass
-        # Tally fix statuses
-        by_status: dict = {}
-        for fix in fixes:
-            s = fix.get("status", "unknown")
-            by_status[s] = by_status.get(s, 0) + 1
-        # Fixes confirmed via sentinel marker in prod
-        confirmed = [f for f in fixes if f.get("fix_outcome") == "confirmed"]
-        regressed = [f for f in fixes if f.get("fix_outcome") == "regressed"]
-        submitted = store.get_submitted_issues(user_id, hours=hours) if user_id else []
-        submitted_recent = store.get_submitted_issues(user_id, hours=hours) if user_id else []
-        return json.dumps({
-            "conversation": {
-                "messages_in_history": hist_len,
-                "turns":               hist_len // 2,
-                "last_active":         conv_updated or "no history",
-            },
-            "issues_you_submitted": {
-                "total_in_window": len(submitted_recent),
-                "all_time":        len(store.get_submitted_issues(user_id) if user_id else []),
-                "recent": [
-                    {"project": i["project"], "description": i["description"][:80],
-                     "submitted_at": i["submitted_at"]}
-                    for i in submitted_recent[:5]
-                ],
-            },
-            "window_hours": hours,
-            "errors_detected": len(errors),
-            "fixes": {
-                "applied":    by_status.get("applied", 0),
-                "pending_pr": len(prs),
-                "failed":     by_status.get("failed", 0),
-                "skipped":    by_status.get("skipped", 0),
-                "error":      by_status.get("error", 0),
-            },
-            "confirmed_in_prod":     len(confirmed),
-            "regressed_after_fix":   len(regressed),
-            "awaiting_confirmation": len(pending_conf),
-            "open_prs": [
-                {"repo": p["repo_name"], "pr_url": p["pr_url"], "timestamp": p["timestamp"]}
-                for p in prs
-            ],
-            "top_errors": [
-                {"message": e["message"][:100], "count": e["count"], "source": e["source"]}
-                for e in errors[:5]
-            ],
-        })
-    if name == "clear_my_history":
-        if user_id:
-            store.save_conversation(user_id, [])
-            logger.info("Boss: cleared conversation history for user %s", user_id)
-            return json.dumps({
-                "status":  "cleared",
-                "note":    "Your conversation history has been wiped. Next session starts fresh. [DONE]",
-            })
-        return json.dumps({"error": "cannot determine user — not clearing"})
-    # ── Admin-only tools ──────────────────────────────────────────────────────
-    _ADMIN_TOOLS = {"list_all_users", "clear_user_history", "reset_fingerprint", "list_all_errors", "export_db"}
-    if name in _ADMIN_TOOLS:
-        if not is_admin:
-            return json.dumps({"error": "Admin access required. You are not in SLACK_ADMIN_USERS."})
-        if name == "list_all_users":
-            stats = store.get_all_user_stats()
-            return json.dumps({"users": stats, "total": len(stats)})
-        if name == "clear_user_history":
-            target = inputs.get("target_user_id", "").strip()
-            if not target:
-                return json.dumps({"error": "target_user_id is required"})
-            store.save_conversation(target, [])
-            display = store.get_user_name(target)
-            logger.info("Boss admin: cleared history for user %s (%s) by admin %s", target, display, user_id)
-            return json.dumps({"status": "cleared", "target_user_id": target, "display_name": display})
-        if name == "reset_fingerprint":
-            fp = inputs.get("fingerprint", "").strip()
-            if not fp:
-                return json.dumps({"error": "fingerprint is required"})
-            found = store.reset_fingerprint(fp)
-            logger.info("Boss admin: reset fingerprint %s by admin %s (found=%s)", fp, user_id, found)
-            return json.dumps({"status": "reset" if found else "not_found", "fingerprint": fp,
-                               "note": "Sentinel will retry this error on the next poll." if found else "No fix record found for this fingerprint."})
-        if name == "list_all_errors":
-            hours = int(inputs.get("hours", 0))
-            errors = store.get_all_errors(hours)
-            return json.dumps({"errors": errors[:100], "total": len(errors),
-                               "window_hours": hours or "all time"})
-        if name == "export_db":
-            if not slack_client or not channel:
-                return json.dumps({"error": "No Slack channel context — cannot upload file"})
-            try:
-                import sqlite3 as _sq
-                import io as _io
-                lines = []
-                with _sq.connect(store.db_path) as _db:
-                    for tbl in ["errors", "fixes", "reports", "slack_users", "conversations", "submitted_issues"]:
-                        try:
-                            rows = _db.execute(f"SELECT * FROM {tbl}").fetchall()  # noqa: S608
-                            cols = [d[0] for d in _db.execute(f"SELECT * FROM {tbl} LIMIT 0").description]  # noqa: S608
-                            lines.append(f"=== {tbl} ({len(rows)} rows) ===")
-                            lines.append("\t".join(cols))
-                            for row in rows:
-                                lines.append("\t".join(str(v) if v is not None else "" for v in row))
-                            lines.append("")
-                        except Exception:
-                            lines.append(f"=== {tbl} (unavailable) ===\n")
-                content = "\n".join(lines)
-                await slack_client.files_upload_v2(
-                    channel=channel,
-                    content=content,
-                    filename="sentinel-db-export.tsv",
-                    title="Sentinel DB Export",
-                )
-                logger.info("Boss admin: exported DB (%d bytes) by admin %s", len(content), user_id)
-                return json.dumps({"ok": True, "bytes": len(content)})
-            except Exception as e:
-                return json.dumps({"error": str(e)})
-    return json.dumps({"error": f"unknown tool: {name}"})
-# ── CLI fallback (OAuth / no API key) ────────────────────────────────────────
-def _attachments_to_text(attachments: list[dict]) -> str:
-    """Produce a plain-text summary of attachments to append to CLI prompts."""
-    if not attachments:
-        return ""
-    parts = []
-    for att in attachments:
-        if att["type"] == "text":
-            parts.append(
-                f"[Attached file: {att['name']}]\n{att['content']}"
-            )
-        elif att["type"] == "image":
-            parts.append(
-                f"[Attached image: {att['name']}] (saved at {att['path']})"
-            )
-        else:
-            parts.append(
-                f"[Attached file: {att['name']}] (saved at {att['path']} — read it if relevant)"
-            )
-    return "\n\nATTACHMENTS:\n" + "\n---\n".join(parts)
-def _attachments_to_api_blocks(attachments: list[dict]) -> list[dict]:
-    """Convert attachments into Anthropic API message content blocks."""
-    blocks: list[dict] = []
-    for att in attachments:
-        if att["type"] == "image":
-            blocks.append({
-                "type": "image",
-                "source": {
-                    "type":       "base64",
-                    "media_type": att.get("mime", "image/png"),
-                    "data":       att["content"],
-                },
-            })
-        elif att["type"] == "text":
-            blocks.append({
-                "type": "text",
-                "text": f"[Attached file: {att['name']}]\n{att['content']}",
-            })
-        else:
-            blocks.append({
-                "type": "text",
-                "text": f"[Attached file: {att['name']}] saved at {att['path']}",
-            })
-    return blocks
-_ACTION_RE = re.compile(r"^ACTION:\s*(\{.*\})", re.MULTILINE)
-async def _handle_with_cli(
-    message: str,
-    history: list,
-    cfg_loader,
-    store,
-    slack_client=None,
-    user_name: str = "",
-    user_id: str = "",
-    attachments: list | None = None,
-    is_admin: bool = False,
-) -> tuple[str, bool]:
-    """Fallback: use `claude --print` for users without an Anthropic API key."""
-    status_json = await _run_tool("get_status", {"hours": 24}, cfg_loader, store)
-    prs_json    = await _run_tool("list_pending_prs", {}, cfg_loader, store)
-    # Pre-fetch log search if the message is a search request.
-    # Use quoted strings as the query, or fall back to the full message.
-    # Never hardcode field names — the query is whatever the user said.
-    search_json = ""
-    _search_kws = ("search", "find", "look for", "show me log", "grep", "entries for")
-    if any(kw in message.lower() for kw in _search_kws):
-        quoted = re.findall(r'"([^"]+)"', message)
-        query  = quoted[0] if quoted else message
-        search_json = await _run_tool("search_logs", {"query": query}, cfg_loader, store)
-    paused      = Path("SENTINEL_PAUSE").exists()
-    repos       = list(cfg_loader.repos.keys())
-    log_sources = list(cfg_loader.log_sources.keys())
-    ts          = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
-    history_text = ""
-    for msg in history[-8:]:
-        role    = msg["role"].upper()
-        content = msg["content"]
-        if isinstance(content, list):
-            content = " ".join(
-                (b.get("text", "") if isinstance(b, dict) else getattr(b, "text", ""))
-                for b in content
-                if (isinstance(b, dict) and b.get("type") == "text")
-                or (hasattr(b, "type") and b.type == "text")
-            )
-        history_text += f"\n{role}: {content}"
-    slack_mention = f"<@{user_id}>" if user_id else (user_name or "")
-    known_users   = store.get_all_users()
-    users_hint    = ", ".join(f"<@{uid}> = {name}" for uid, name in known_users.items())
-    prompt = (
-        _SYSTEM
-        + (f"\nYou are speaking with: {user_name} (Slack mention: {slack_mention})" if user_name else "")
-        + "\nAlways start your reply by addressing the user directly using their Slack mention, e.g. \"<@U123> here is what I found...\"."
-        + " Never use their plain name — always use the <@USER_ID> format so Slack highlights it."
-        + (f"\nKnown Slack users: {users_hint}" if users_hint else "")
-        + f"\n\nCurrent time: {ts}"
-        + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
-        + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
-        + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
-        + f"\nAdmin access for this user: {'YES — admin tools are available' if is_admin else 'NO — admin tools will be refused'}"
-        + "\nNOTE: Running in CLI fallback mode — admin tools and some features are unavailable. Ask user to configure ANTHROPIC_API_KEY for full features."
-        + f"\n\nCurrent status (last 24 h):\n{status_json}"
-        + f"\n\nOpen PRs:\n{prs_json}"
-        + (f"\n\nLog search results:\n{search_json}" if search_json else "")
-        + (f"\n\nConversation so far:{history_text}" if history_text else "")
-        + _attachments_to_text(attachments or [])
-        + f"\n\nUSER: {message}"
-        + "\n\nIf you need to take an action, include a line like:\n"
-        + "  ACTION: {\"action\": \"pause_sentinel\"}\n"
-        + "  ACTION: {\"action\": \"resume_sentinel\"}\n"
-        + "  ACTION: {\"action\": \"trigger_poll\"}\n"
-        + "  ACTION: {\"action\": \"create_issue\", \"description\": \"...\", \"target_repo\": \"\"}\n"
-        + "  ACTION: {\"action\": \"search_logs\", \"query\": \"<whatever the user asked to find>\"}\n"
-        + "End with [DONE] if the request is fully handled."
-    )
-    cfg = cfg_loader.sentinel
-    env = os.environ.copy()
-    if cfg.anthropic_api_key:
-        env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
-    try:
-        result = subprocess.run(
-            ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
-                    if os.getuid() != 0 else
-                    [cfg.claude_code_bin, "--print", prompt]),
-            capture_output=True, text=True, timeout=180, env=env,
-        )
-        output = (result.stdout or "").strip()
-        if result.returncode != 0 or not output:
-            stderr = (result.stderr or "").strip()
-            logger.error(
-                "Boss CLI call failed (rc=%d): stdout=%r stderr=%r",
-                result.returncode, output[:200], stderr[:200],
-            )
-        raw_err = (result.stderr or "").strip()
-        if result.returncode != 0 and not output:
-            full_err = f"exit {result.returncode}: {raw_err[:300]}"
-            cfg = cfg_loader.sentinel
-            alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
-                                  "sentinel_boss/cli", raw_err or full_err)
-            return f":warning: `claude --print` failed ({full_err})", True
-    except Exception as e:
-        logger.error("Boss CLI call failed: %s", e)
-        return f":warning: Boss unavailable: {e}", True
-    for m in _ACTION_RE.finditer(output):
-        try:
-            action = json.loads(m.group(1))
-            name   = action.pop("action", "")
-            if name:
-                result_str = await _run_tool(name, action, cfg_loader, store, user_id=user_id)
-                logger.info("Boss CLI action: %s → %s", name, result_str[:80])
-        except Exception as e:
-            logger.warning("Boss action parse error: %s", e)
-    reply   = _ACTION_RE.sub("", output).strip()
-    is_done = "[DONE]" in reply
-    reply   = reply.replace("[DONE]", "").strip()
-    if not reply:
-        greeting = f"Hi {user_name}! " if user_name else "Hi! "
-        reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
-    history.append({"role": "user",      "content": message})
-    history.append({"role": "assistant", "content": reply})
-    return reply, is_done
-# ── History serialization helpers ────────────────────────────────────────────
-def _serialize_content(content) -> list:
-    """Convert Anthropic SDK response content (Pydantic objects) to plain dicts.
-    The SDK returns TextBlock / ToolUseBlock instances.  json.dumps(..., default=str)
-    turns them into useless strings like "TextBlock(type='text', text='...')".
-    This converts them to proper dicts so history round-trips through SQLite safely.
-    """
-    if not isinstance(content, list):
-        return content
-    result = []
-    for block in content:
-        if isinstance(block, dict):
-            result.append(block)
-        elif hasattr(block, "model_dump"):
-            result.append(block.model_dump())
-        elif hasattr(block, "dict"):
-            result.append(block.dict())
-        elif hasattr(block, "type"):
-            if block.type == "text":
-                result.append({"type": "text", "text": getattr(block, "text", "")})
-            elif block.type == "tool_use":
-                result.append({
-                    "type":  "tool_use",
-                    "id":    getattr(block, "id", ""),
-                    "name":  getattr(block, "name", ""),
-                    "input": getattr(block, "input", {}),
-                })
-        else:
-            result.append({"type": "text", "text": str(block)})
-    return result
-def _clean_history(history: list) -> list:
-    """Remove turns that would cause a 400 from the Anthropic API.
-    Strips orphaned tool_use blocks (assistant turn with tool_use but no
-    following tool_result turn) and consecutive same-role turns that result
-    from a previous session that crashed mid-tool-loop.
-    """
-    cleaned = []
-    i = 0
-    while i < len(history):
-        turn = history[i]
-        role    = turn.get("role", "")
-        content = turn.get("content", [])
-        # Drop assistant turns that contain tool_use if the next turn isn't tool_result
-        if role == "assistant" and isinstance(content, list):
-            has_tool_use = any(
-                (isinstance(b, dict) and b.get("type") == "tool_use")
-                for b in content
-            )
-            if has_tool_use:
-                next_turn = history[i + 1] if i + 1 < len(history) else None
-                next_content = (next_turn or {}).get("content", [])
-                has_result = isinstance(next_content, list) and any(
-                    (isinstance(b, dict) and b.get("type") == "tool_result")
-                    for b in next_content
-                )
-                if not has_result:
-                    i += 1   # skip orphaned tool_use turn
-                    continue
-        # Drop consecutive same-role turns (keep the last one)
-        if cleaned and cleaned[-1].get("role") == role:
-            cleaned[-1] = turn
-        else:
-            cleaned.append(turn)
-        i += 1
-    return cleaned
-# ── API-key path (structured tools, full agentic loop) ────────────────────────
-async def _handle_with_api(
-    message: str,
-    history: list,
-    cfg_loader,
-    store,
-    slack_client=None,
-    user_name: str = "",
-    user_id: str = "",
-    attachments: list | None = None,
-    channel: str = "",
-    is_admin: bool = False,
-) -> tuple[str, bool]:
-    import anthropic
-    api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
-    client  = anthropic.Anthropic(api_key=api_key)
-    paused         = Path("SENTINEL_PAUSE").exists()
-    repos          = list(cfg_loader.repos.keys())
-    ts             = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
-    known_projects = [_read_project_name(d) for d in _find_project_dirs()]
-    log_sources    = list(cfg_loader.log_sources.keys())
-    slack_mention = f"<@{user_id}>" if user_id else (user_name or "")
-    known_users   = store.get_all_users()   # {user_id: display_name}
-    users_hint    = ", ".join(f"<@{uid}> = {name}" for uid, name in known_users.items())
-    system = (
-        _SYSTEM
-        + (f"\nYou are speaking with: {user_name} (Slack mention: {slack_mention})" if user_name else "")
-        + "\nAlways start your reply by addressing the user directly using their Slack mention, e.g. \"<@U123> here is what I found...\"."
-        + " Never use their plain name — always use the <@USER_ID> format so Slack highlights it."
-        + (f"\nKnown Slack users: {users_hint}" if users_hint else "")
-        + f"\n\nCurrent time: {ts}"
-        + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
-        + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
-        + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
-        + (f"\nKnown projects in workspace: {', '.join(known_projects)}" if known_projects else "")
-        + f"\nAdmin access for this user: {'YES — admin tools are available' if is_admin else 'NO — admin tools will be refused'}"
-    )
-    # Build user content — include attachment blocks if any
-    attach_blocks = _attachments_to_api_blocks(attachments or [])
-    if attach_blocks:
-        user_content = attach_blocks + [{"type": "text", "text": message}]
-    else:
-        user_content = message
-    # Work on a local copy — only commit to history on success to prevent
-    # cascading 400s if the API rejects a malformed/corrupted history.
-    messages = list(history) + [{"role": "user", "content": user_content}]
-    while True:
-        response = client.messages.create(
-            model="claude-opus-4-6",
-            max_tokens=2048,
-            system=system,
-            tools=_TOOLS,
-            messages=messages,
-        )
-        text_parts  = []
-        tool_blocks = []
-        for block in response.content:
-            if block.type == "text":
-                text_parts.append(block.text)
-            elif block.type == "tool_use":
-                tool_blocks.append(block)
-        if not tool_blocks:
-            reply   = " ".join(text_parts).strip()
-            is_done = "[DONE]" in reply
-            reply   = reply.replace("[DONE]", "").strip()
-            if not reply:
-                greeting = f"Hi {user_name}! " if user_name else "Hi! "
-                reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
-            # Heuristic override: if reply ends with a question, Claude is waiting for input
-            if is_done and re.search(r'\?\s*$', reply):
-                is_done = False
-            # Commit to history only on success — serialize SDK objects to plain dicts
-            history.append({"role": "user", "content": user_content})
-            history.append({"role": "assistant", "content": _serialize_content(response.content)})
-            return reply, is_done
-        messages.append({"role": "assistant", "content": _serialize_content(response.content)})
-        tool_results = []
-        for tc in tool_blocks:
-            result = await _run_tool(tc.name, tc.input, cfg_loader, store, slack_client=slack_client, user_id=user_id, channel=channel, is_admin=is_admin)
-            logger.info("Boss tool: %s(%s) → %s", tc.name, tc.input, result[:120])
-            tool_results.append({
-                "type":        "tool_result",
-                "tool_use_id": tc.id,
-                "content":     result,
-            })
-        messages.append({"role": "user", "content": tool_results})
-# ── Main entry point ──────────────────────────────────────────────────────────
-async def handle_message(
-    message: str,
-    history: list,
-    cfg_loader,
-    store,
-    slack_client=None,
-    user_name: str = "",
-    user_id: str = "",
-    attachments: list | None = None,
-    channel: str = "",
-    is_admin: bool = False,
-) -> tuple[str, bool]:
-    """
-    Process one user message through the Sentinel Boss (Claude with tool use).
-    Priority:
-      1. Claude Pro / OAuth via `claude --print` (CLI path — no API key needed)
-      2. ANTHROPIC_API_KEY fallback (structured tools, full agentic loop)
-    Returns:
-        (reply_text, is_done)
-        is_done=True  → session complete, release the Slack queue slot.
-        is_done=False → waiting for user follow-up, keep the slot.
-    """
-    api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
-    # 1st priority: ANTHROPIC_API_KEY — full structured tools, cheap per-token for Boss queries
-    if api_key:
-        try:
-            import anthropic  # noqa: F401
-            return await _handle_with_api(
-                message, history, cfg_loader, store, slack_client=slack_client,
-                user_name=user_name, user_id=user_id, attachments=attachments, channel=channel,
-                is_admin=is_admin,
-            )
-        except Exception as api_err:
-            err_str = str(api_err)
-            # Detect rate-limit / auth failure and alert Slack before falling through
-            cfg = cfg_loader.sentinel
-            if is_rate_limited(err_str):
-                from .notify import rate_limit_message
-                alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
-                                      "sentinel_boss/api", err_str)
-            logger.warning("Boss: API key path failed (%s), trying CLI fallback", err_str)
-    # 2nd priority: Claude Pro / OAuth via CLI (limited tools but no API key needed)
-    cli_reply, cli_done = await _handle_with_cli(
-        message, history, cfg_loader, store, slack_client=slack_client, user_name=user_name,
-        user_id=user_id, attachments=attachments, is_admin=is_admin,
-    )
-    if not cli_reply.startswith(":warning:"):
-        return cli_reply, cli_done
-    # Both paths failed — alert Slack and return error
-    cfg = cfg_loader.sentinel
-    err_output = cli_reply
-    alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
-                          "sentinel_boss/cli", err_output)
-    if not api_key:
-        # No auth at all configured
-        no_auth_msg = (
-            ":warning: *Sentinel Boss — no Claude auth configured*\n"
-            "Configure at least one of:\n"
-            "• `ANTHROPIC_API_KEY` in `sentinel.properties` — full features\n"
-            "• Claude Pro OAuth: run `claude login` on the server — required for fix_engine\n"
-            "See: https://github.com/misterhuydo/Sentinel#authentication"
-        )
-        slack_alert(cfg.slack_bot_token, cfg.slack_channel, no_auth_msg)
-        return ":warning: No Claude authentication configured. See Slack for details.", True
-    return cli_reply, cli_done
+"""
+sentinel_boss.py — Claude-backed Sentinel Boss.
+Claude acts as the boss: reads project state, decides on actions,
+executes them via tool use, and responds naturally. One agentic loop
+per turn — Claude may call multiple tools before replying.
+"""
+import json
+import logging
+import os
+import re
+import subprocess
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+from .notify import alert_if_rate_limited, slack_alert, is_rate_limited
+logger = logging.getLogger(__name__)
+# ── System prompt ────────────────────────────────────────────────────────────
+_SYSTEM = """\
+You are Sentinel Boss — the AI interface for Sentinel, a 24/7 autonomous DevOps agent.
+Sentinel watches production logs, detects errors, generates code fixes via Claude Code,
+and opens GitHub PRs for admin review (or pushes directly if AUTO_PUBLISH=true).
+Your job:
+- Understand what the DevOps engineer needs in natural language
+- Query Sentinel's live state (errors, fixes, open PRs) on their behalf
+- Deliver tasks/issues to the right project — you know all projects in this workspace
+- Control Sentinel (pause/resume) when asked
+- Give honest, concise answers — you know this system inside out
+- If a project name is unclear or ambiguous, ask the engineer to clarify — never guess
+What you can do (tools available):
+1. get_status        — Show recent errors detected, fixes applied/pending, open PRs.
+                       e.g. "what happened today?", "any issues?", "show open PRs"
+2. create_issue      — Deliver a fix/task to any project in this workspace by short name.
+                       You know all project names — use list_projects if you're unsure.
+                       If the project name is ambiguous or not found, ask to clarify.
+                       e.g. "tell 1881 to fix X", "look into Y in elprint", "investigate Z"
+3. pause_sentinel    — Create SENTINEL_PAUSE file to halt all auto-fix activity.
+                       e.g. "pause sentinel", "stop auto-fixing"
+4. resume_sentinel   — Remove SENTINEL_PAUSE file to resume normal operation.
+                       e.g. "resume sentinel", "unpause"
+5. list_projects     — List all configured repos and log sources in this Sentinel instance.
+                       e.g. "what projects are you watching?", "list all repos"
+6. search_logs       — SSH live to servers and grep logs in real time (uses fetch_log.sh with
+                       the query as GREP_FILTER). Falls back to cached files if unavailable.
+                       e.g. "search logs for illegal PIN in 1881", "find X in SSOLWA", "grep logs for Z"
+6b. filter_logs     — Instant keyword/regex search on locally-synced logs. No SSH, sub-second.
+                       Supports time range (since_hours) and case options.
+                       e.g. "filter logs for TryDig", "find appid=X in STS logs", "errors last 6h"
+7. trigger_poll      — Trigger an immediate poll cycle without waiting for the schedule.
+                       e.g. "check now", "poll immediately", "don't wait, run now"
+8. get_repo_status   — Show the current git branch, last commit, and recent fix branches
+                       for a specific repository.
+                       e.g. "status of repo X", "what branch is cairn on?"
+9. list_recent_commits — List the most recent commits in a repo (including Sentinel's auto-fixes).
+                       e.g. "show me recent commits in elprint-sales", "what did sentinel commit?"
+10. get_fix_detail   — Get full details of a specific fix: error, patch path, PR URL, status.
+                       e.g. "show fix abc123", "details on that fix"
+11. list_errors      — List recent errors from the state store, optionally filtered by repo or source.
+                       e.g. "show all errors today", "what errors hit elprint this week?"
+12. pull_repo        — Run git pull on one or all managed application repos.
+                       e.g. "pull changes", "git pull all repos", "update the code"
+13. pull_config      — Run git pull on one or all Sentinel project config dirs.
+                       e.g. "pull config for 1881", "update sentinel config", "pull all configs"
+14. fetch_logs       — Run fetch_log.sh on demand to pull fresh logs from remote servers right now.
+                       Supports --debug mode and parameter overrides (tail count, grep filter).
+                       e.g. "fetch logs", "try fetch_log.sh for SSOLWA", "fetch logs with debug",
+                            "grab latest logs from STS", "fetch logs without filter"
+15. watch_bot        — Register a Slack bot for passive monitoring. Every message it posts is
+                       auto-queued as an issue in the bot's registered project.
+                       ALWAYS requires a project — infer from context or ask the user first.
+                       e.g. "listen to @alertbot", "watch @bot1 @bot2 for project 1881", "monitor @errorbot"
+16. unwatch_bot      — Remove a Slack bot from the passive watch list.
+                       e.g. "stop watching @alertbot", "unwatch @errorbot"
+17. list_watched_bots — Show all Slack bots currently being passively monitored and which projects
+                        they are delivering to.
+                        e.g. "which bots are you watching?", "list monitored bots"
+18. upgrade_sentinel — Pull the latest Sentinel agent code, update Python deps, and restart the
+                       process. Safe to run at any time — no restart if already up to date.
+                       e.g. "upgrade sentinel", "update sentinel", "upgrade yourself"
+19. ask_codebase     — Ask any natural-language question about a managed repo's codebase.
+                       Claude Code answers using its full knowledge of the code.
+                       e.g. "what does the 1881 backend do?", "find PIN validation in elprint",
+                            "any TODOs in cairn?", "are there security issues in elprint-sales?"
+20. restart_project  — Stop and restart a specific Sentinel monitoring instance (stop.sh + start.sh).
+                       This restarts the Sentinel agent for that project, NOT the application itself.
+                       e.g. "restart sentinel for 1881", "restart the 1881 monitor", "reload elprint sentinel"
+21. tail_log         — Fetch the last N lines of a log source live, without a grep filter.
+                       e.g. "show recent SSOLWA logs", "tail STS", "last 200 lines from 1881 logs"
+22. post_file        — Upload a text file to the Slack conversation (diff, log excerpt, report, CSV).
+                       Use when output is too large for chat, or the user asks to download/export something.
+                       e.g. "give me that as a file", "export the log", "send me the diff"
+When someone asks what you can do, what you support, what your capabilities are, or how you can help,
+reply with a short summary grouped by category:
+*Monitoring & status*
+• `get_status` — errors detected, fixes applied/pending/failed, open PRs — "what happened today?"
+• `get_repo_status` — per-repo breakdown of errors and fixes — "how is elprint doing?"
+• `list_recent_commits` — recent Sentinel auto-fix commits — "what did Sentinel commit?"
+*Log management*
+• `fetch_logs` — pull fresh logs from servers right now — "fetch logs for SSOLWA"
+• `search_logs` — live SSH grep on production servers — "search logs for illegal PIN in 1881"
+• `filter_logs` — instant grep on locally-synced logs (no SSH) — "filter logs for TryDig", "show errors from last 24h"
+• `tail_log` — last N lines of a log source, no filter — "show recent SSOLWA logs"
+*Codebase questions*
+• `ask_codebase` — any question about a repo's code — "what does 1881 do?", "find PIN validation", "any TODOs?", "security issues?"
+*Fix management*
+• `get_fix_details` — full details of a specific fix — "show fix abc123"
+• `list_pending_prs` — all open Sentinel PRs awaiting review — "list open PRs"
+• `check_auth_status` — Claude auth health, rate-limit circuit state, fix engine 24 h stats — "is Claude working?", "any rate limits?", "auth issues?"
+*Project & task delivery*
+• `list_projects` — all projects and repos Sentinel manages — "what projects do you manage?"
+• `create_issue` — deliver a task to any project by name — "tell 1881 to fix X"
+• `trigger_poll` — run a log-fetch + fix cycle right now — "check now"
+• `pause_sentinel` / `resume_sentinel` — halt or resume all auto-fix activity — "pause Sentinel"
+*Repo & config sync*
+• `pull_repo` — git pull on managed application repos — "pull latest code"
+• `pull_config` — git pull on Sentinel config dirs — "pull config for elprint"
+*File sharing*
+• `post_file` — upload a file to Slack — "give me that as a file", "export the log", "send me the diff"
+*Personal*
+• `my_stats` — your activity: issues submitted, fixes, conversation history — "my stats"
+• `clear_my_history` — wipe your conversation history and start fresh — "clear my history"
+*Slack bot watching*
+• `list_watched_bots` — show all bots currently being monitored — "which bots are you watching?"
+*Admin* (SLACK_ADMIN_USERS if configured, otherwise all allowed users)
+• `watch_bot` — register a Slack bot for passive monitoring; its messages become issues — "listen to @alertbot"
+• `unwatch_bot` — stop monitoring a bot — "stop watching @errorbot"
+• `restart_project` — stop + restart a Sentinel monitoring instance (not the app) — "restart sentinel for 1881"
+• `upgrade_sentinel` — pull latest Sentinel release and restart — "upgrade sentinel"
+• `list_all_users` — all Slack users who have talked to Sentinel + activity summary
+• `clear_user_history` — wipe a specific user's conversation history
+• `reset_fingerprint` — clear the 24h fix lock so Sentinel retries an error
+• `list_all_errors` — full unfiltered error database
+• `export_db` — dump full Sentinel state as a downloadable file
+About Sentinel — answer any question someone asks:
+Sentinel is a 24/7 autonomous DevOps agent deployed per-project. Here is everything you know:
+Architecture:
+- Poll loop every POLL_INTERVAL_SECONDS (default 120s)
+- Log sources: SSH servers (rsync + live grep) or Cloudflare worker endpoints
+- Local sync: rsync --append-verify copies remote logs to workspace/synced/ every SYNC_INTERVAL_SECONDS (default 300s); full history accumulated locally
+- Error detection: regex-based parsing, multi-line stack trace grouping, fingerprinting (hash of normalised message + top 3 stack frames)
+- Dedup: SQLite state_store.db — 24h cooldown per fingerprint, plus git log check before each fix
+- Routing: TARGET_REPO=auto uses PACKAGE_PREFIXES to map stack trace frames to the correct repo; explicit TARGET_REPO overrides
+- Fix engine: Claude Code headless (claude --print) with structured prompt (error + stack trace + Cairn MCP context); unified diff output; max 5 files / 200 lines
+- Commit: git pull --rebase, apply patch, run tests, commit with sentinel/fix-<fp> marker
+- Publish: AUTO_PUBLISH=true → push to main + CI/CD trigger; AUTO_PUBLISH=false → branch + GitHub PR
+- Fix confirmation: SENTINEL marker injected into every modified method; when marker appears in production logs → quiet period starts; after MARKER_CONFIRM_HOURS with no recurrence → fix confirmed
+Health monitoring (HEALTH_URL per repo):
+- Polls the URL on each cycle; expects JSON with "Status": "true"
+- 502/503/504 or connection refused → status=stopped
+- 200 + Status != true → status=failing
+- stopped + startup failure in synced logs → auto-fix attempt (Spring Boot BeanCreationException, NoSuchMethodError, APPLICATION FAILED TO START, etc.)
+- stopped + no startup errors → asks human ONCE "is this deliberate?", then stays silent (state=pending)
+- Human says "maintenance <repo>" → state=confirmed, fully silent until recovery
+- Recovery (health=healthy again) → clears state, posts "App X is back online"
+Duplicate / cross-source dedup:
+- Fingerprint-based: same error from monitor bot + log scan → same fingerprint → state_store dedup
+- git log check: before each fix attempt, checks recent commits for the fingerprint — skips if already fixed
+- 24h cooldown per fingerprint prevents retry spam
+Slack Boss:
+- Socket Mode (xapp-... app-level token + xoxb-... bot token)
+- Per-user sessions with SQLite-persisted history (last 40 messages)
+- Tool-use loop with Anthropic API (cheap per-token, structured tools)
+- Falls back to claude CLI if no API key configured
+- Admin users (SLACK_ADMIN_USERS) can access destructive/sensitive tools
+Common config questions:
+- ANTHROPIC_API_KEY: used by Boss conversation (structured tool-use, cheap); optional for Fix Engine when CLAUDE_PRO_FOR_TASKS=true
+- CLAUDE_PRO_FOR_TASKS=true (default): Fix Engine calls claude CLI using Claude Pro OAuth billing; falls back to API key on auth error
+- AUTO_PUBLISH=false (default): Sentinel opens a PR for admin review; =true: pushes directly to main
+- SYNC_RETENTION_DAYS (default 30): delete synced log files older than N days
+- SYNC_MAX_FILE_MB (default 200): truncate synced log files exceeding this size (drops oldest half of lines)
+- HEALTH_URL: HTTP endpoint per repo; JSON with "Status": "true" = healthy
+- TARGET_REPO=auto: route errors to repo by longest-matching PACKAGE_PREFIXES; =<name>: always route to that repo
+- SLACK_ALLOWED_USERS: if set, only these Slack user IDs can interact with Boss
+- SLACK_ADMIN_USERS: subset of allowed users with access to admin-only tools (reset_fingerprint, export_db, watch_bot, etc.)
+Required Slack Bot Token scopes: app_mentions:read, channels:history, groups:history, im:history, chat:write, files:read, reactions:write, users:read, conversations.connect:read
+Required App-Level Token scope (Socket Mode): connections:write
+Events to subscribe: app_mention, message.im, message.channels
+Tone: direct, professional, like a senior engineer who owns the system.
+Don't pad responses. Don't say "Great question!" or "Certainly!".
+If you don't know something, use a tool to find out before saying you don't know.
+When to act vs. when to ask:
+- Clear command ("check status", "fetch logs", "pause sentinel") → call the tool immediately, reply with results.
+- Ambiguous or exploratory ("what does get_repo_status do?", "tell me about search_logs") → explain the tool naturally, then ask: "Want me to run it?"
+- Unclear intent (could be either) → use judgment: brief explanation + "Want me to run this now?"
+- Prefer filter_logs over search_logs when synced logs are available — it's instant and never causes session timeout.
+  Use search_logs only when the user explicitly wants live/real-time data or synced logs are not yet available.
+- If a tool call will take a moment (search, fetch, pull), prefix your reply with a brief "working" line ending in "..." before the results, e.g. "Searching SSOLWA for TryDig activity..." then the actual output.
+  Never just say a working line and stop — always follow it with the results in the same message.
+Search reasoning — always do this before calling filter_logs or search_logs:
+1. Interpret intent: what is the user actually looking for? Don't pass the raw message as the query.
+   Examples:
+   - "TryDig errors" → query="TryDig" (component name; look for it in any context)
+   - "payment failures last hour" → query="pay|payment|transaction", since_hours=1
+   - "why is the app crashing" → query="Exception|Error|FAILED|crash", look for stack traces
+   - "login issues today" → query="login|auth|401|403|session", since_hours=24
+   - "slow requests" → query="timeout|slow|latency|took [0-9]+ms|duration"
+   - "startup problems" → query="APPLICATION FAILED|BeanCreation|NoSuchMethod|ClassNotFound"
+   Use | in the regex to cover synonyms and related terms. Keep it focused — not too broad.
+2. Choose since_hours if a time window is implied ("last hour", "today", "this morning").
+3. Pick source if the user mentioned a specific service (SSOLWA, STS, etc.) or server.
+After getting filter_logs results, always synthesize — never dump raw output:
+- Lead with 1-2 sentences: total count, affected sources, dominant pattern.
+  e.g. "Found 47 matches across SSOLWA and STS — mostly NullPointerException in DigService (31 hits)."
+- List the top 3-5 patterns with counts in plain language.
+- Call out any notable time clustering (e.g. "spike between 10:23–10:47 UTC").
+- Show 2-3 example lines at most — only the most informative ones.
+- End with a recommendation if the pattern suggests something actionable:
+  e.g. "Looks like a dependency resolution issue — create an issue?" or "Pattern consistent with a null config value at startup."
+- If total_matches=0, say so plainly and suggest what else to try.
+Session context — critical rules:
+- Loaded conversation history is prior-session background only. It may be hours or days old.
+- NEVER say "the previous search", "I already fetched", "as I found earlier", or any phrase implying you already did part of the current task — unless a tool result appears in THIS response's tool calls.
+- When handling a new request, call the tools fresh. Do not assume any prior tool result is still current or that any prior step "counts" toward the current task.
+- The only exception: if the user explicitly asks about something from the history ("what did you find earlier?"), you may reference it — but note it is from a prior session.
+Trust your tool results — never contradict them:
+- If any search_logs call in this response returned total_matches > 0, you HAVE results. Report them.
+- Never say "no results found" or "nothing was found" when a tool result shows total_matches > 0.
+- If one source-specific call returns 0 but a broader call returned matches, use the broader results.
+- A cached result with files_searched=0 is a source-name lookup failure, NOT an absence of log data.
+  Treat it as "source not recognised" and fall back to the broad search results you already have.
+Avoid redundant tool calls (within a single response only — always run tools fresh for new requests):
+- If a broad search (e.g. search_logs with no source filter) already returned results in THIS response, do NOT repeat the same search with a source filter to "refine" — use what you already fetched.
+- If a tool call fails in THIS response, do NOT retry the entire search from scratch. Continue with what succeeded and note the failure.
+- One pass per task: gather all needed data in a single round of tool calls, then produce the final answer.
+Issue identification — before calling create_issue:
+1. Determine if the message is a REAL issue/task (bug report, feature request, investigation ask)
+   vs. a status question, tool query, or casual chat. If not an issue, just answer normally.
+2. If it IS an issue, gather what's needed before creating:
+   - Project: which project? If unclear, ask. Use list_projects if you need to check names.
+   - Context: what's the problem? Include everything: description, error text, steps to reproduce.
+   - Attachments: summarise any files/screenshots the user shared.
+   - Support URL: note any ticket/doc/link the user mentioned.
+   - Identity: always captured automatically from the Slack session.
+3. Populate `findings` with curated evidence — only when relevant and concise:
+   - If you ran search_logs, tail_log, ask_codebase, or get_status before creating the issue,
+     summarise only the findings directly related to this specific issue.
+   - Do NOT paste raw tool output. Summarise: which services, how often, key pattern, 1-3 example lines.
+   - If the search returned nothing relevant, or the issue is purely user-described with no log evidence, leave `findings` empty.
+   - The fix engine reads only the issue file. Give it signal, not noise — 500 words max.
+4. Before calling the tool, confirm with the user in natural language:
+   e.g. "I'll create an issue for project *1881* — here's what I have: [summary]. Look right?"
+   Wait for their confirmation before proceeding.
+   EXCEPTION: if the user's message already contains a clear project + unambiguous description,
+   skip the confirmation and create immediately — don't ask when nothing is unclear.
+5. After creating, tell them the issue was queued and Sentinel will pick it up on the next poll.
+When the engineer's request is fully handled, end your LAST message with the token: [DONE]
+IMPORTANT: Always write your actual reply text FIRST, then append [DONE] at the end. Example: "Hello! I'm Sentinel. [DONE]". Never output [DONE] as your only content.
+For greetings like "hello" or empty messages, introduce yourself briefly and offer help, then end with [DONE].
+If you need a follow-up from them, do NOT include [DONE] — wait for their next message.
+"""
+# ── Tool definitions ─────────────────────────────────────────────────────────
+_TOOLS = [
+    {
+        "name": "get_status",
+        "description": (
+            "Get recent errors, fixes applied, fixes pending review, and open PRs. "
+            "Use for: 'what happened today?', 'any issues?', 'how are things?', "
+            "'what are the open PRs?', 'did sentinel fix anything?'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "hours": {
+                    "type": "integer",
+                    "description": "Look-back window in hours (default 24)",
+                    "default": 24,
+                },
+            },
+        },
+    },
+    {
+        "name": "create_issue",
+        "description": (
+            "Deliver a confirmed issue/task to a Sentinel project instance. "
+            "Only call this after you have: (1) confirmed the message is a real issue or task, "
+            "(2) identified the target project, (3) gathered enough context, and "
+            "(4) confirmed with the user ('I'll create this issue for project X — does that look right?'). "
+            "Do NOT call this for status questions, tool queries, or casual chat."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string",
+                    "description": "Full problem/task description — include all context the user gave you",
+                },
+                "project": {
+                    "type": "string",
+                    "description": "Project short name (e.g. '1881', 'elprint'). Ask if unclear.",
+                },
+                "target_repo": {
+                    "type": "string",
+                    "description": "Specific repo within the project (omit to let Sentinel auto-route)",
+                },
+                "support_url": {
+                    "type": "string",
+                    "description": "Any URL the user shared (ticket, doc, screenshot link, etc.)",
+                },
+                "attachments_summary": {
+                    "type": "string",
+                    "description": "Summary of any files/screenshots the user attached",
+                },
+                "findings": {
+                    "type": "string",
+                    "description": (
+                        "A concise, curated summary of evidence directly relevant to this issue — "
+                        "NOT raw tool output. Include only what the fix engine needs: "
+                        "key error patterns, affected services, approximate frequency/timestamps, "
+                        "and 1-3 representative log lines. Omit unrelated results. "
+                        "Keep under 500 words. Leave empty if no tool results are relevant."
+                    ),
+                },
+            },
+            "required": ["description"],
+        },
+    },
+    {
+        "name": "get_fix_details",
+        "description": "Get full details of a specific fix by fingerprint (8+ hex chars).",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "fingerprint": {"type": "string"},
+            },
+            "required": ["fingerprint"],
+        },
+    },
+    {
+        "name": "list_pending_prs",
+        "description": "List all open Sentinel PRs awaiting admin review.",
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "check_auth_status",
+        "description": (
+            "Check Claude authentication health, current rate-limit / usage-limit circuit state, "
+            "and fix engine stats for the last 24 h. "
+            "Use when someone asks: 'is Claude working?', 'any rate limits?', 'why aren't fixes running?', "
+            "'is the API key OK?', 'auth issues?', 'fix engine status'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "pause_sentinel",
+        "description": (
+            "Pause ALL Sentinel fix activity immediately. "
+            "Use when the engineer says 'pause', 'stop', 'freeze', or 'hold off'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "resume_sentinel",
+        "description": "Resume Sentinel fix activity after a pause.",
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "list_projects",
+        "description": (
+            "List all projects (Sentinel instances) in this workspace and the repos "
+            "each one manages. Use for: 'what projects do you manage?', 'list projects', "
+            "'what repos are configured?', 'show me all projects'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "search_logs",
+        "description": (
+            "Search production logs for a keyword or pattern. "
+            "When a project or source is specified (or can be inferred), performs a LIVE fetch "
+            "via fetch_log.sh with the query as the grep filter — SSHes directly to the server. "
+            "Falls back to searching locally-cached log files when no source can be determined. "
+            "Use for: 'search logs for illegal PIN in 1881', 'find X in SSOLWA logs', "
+            "'what did user Y do?', 'show entries for appid=Z', 'grep logs for X'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Keyword or regex to grep for",
+                },
+                "source": {
+                    "type": "string",
+                    "description": "Log source name to search (partial match against log-config filenames, e.g. 'SSOLWA', '1881'). Leave empty to search all sources.",
+                },
+                "max_matches": {
+                    "type": "integer",
+                    "description": "Max matching lines to return per source (default 30)",
+                    "default": 30,
+                },
+                "tail": {
+                    "type": "integer",
+                    "description": (
+                        "Number of log lines to fetch from the server before grepping (default: config value, typically 500). "
+                        "Increase when the user asks for a longer time window — e.g. 'yesterday up to now' → use 5000-10000. "
+                        "Higher values take longer but cover more history."
+                    ),
+                },
+            },
+            "required": ["query"],
+        },
+    },
+    {
+        "name": "filter_logs",
+        "description": (
+            "Search locally-synced log files by keyword or regex — instant, no SSH required. "
+            "Use this for fast queries once logs are synced (check with list_projects if unsure). "
+            "Supports time-range filtering and case options. "
+            "Use for: 'find TryDig in synced logs', 'show errors from last 24h', "
+            "'filter logs for appid=X', 'search local logs for Y'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Keyword or regex to search for",
+                },
+                "source": {
+                    "type": "string",
+                    "description": "Log source name (partial match, e.g. 'STS', 'SSOLWA'). Leave empty to search all synced sources.",
+                },
+                "since_hours": {
+                    "type": "integer",
+                    "description": "Only return lines from the last N hours (uses log line timestamps). Omit for all available history.",
+                },
+                "max_matches": {
+                    "type": "integer",
+                    "description": "Max matching lines to return per source file (default 50)",
+                    "default": 50,
+                },
+                "case_sensitive": {
+                    "type": "boolean",
+                    "description": "Case-sensitive match (default false)",
+                    "default": False,
+                },
+            },
+            "required": ["query"],
+        },
+    },
+    {
+        "name": "trigger_poll",
+        "description": (
+            "Trigger an immediate log-fetch and error-detection cycle without waiting "
+            "for the next scheduled interval. Use when: 'check now', 'run now', "
+            "'poll immediately', 'don't wait'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "get_repo_status",
+        "description": (
+            "Per-repository breakdown of errors detected and fixes applied. "
+            "Use for: 'how is repo X doing?', 'which repo has the most issues?', "
+            "'break down by repo'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "hours": {
+                    "type": "integer",
+                    "description": "Look-back window in hours (default 24)",
+                    "default": 24,
+                },
+            },
+        },
+    },
+    {
+        "name": "list_recent_commits",
+        "description": (
+            "List recent commits made by Sentinel across all managed repos. "
+            "Use for: 'what did Sentinel commit?', 'show recent auto-fixes', 'what was changed?'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "limit": {
+                    "type": "integer",
+                    "description": "Max commits per repo (default 5)",
+                    "default": 5,
+                },
+            },
+        },
+    },
+    {
+        "name": "pull_repo",
+        "description": (
+            "Run git pull on one or all managed repos to fetch latest changes from GitHub. "
+            "Use for: 'pull changes', 'git pull', 'update repo X', 'fetch latest code'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "repo": {
+                    "type": "string",
+                    "description": "Repo name to pull (omit to pull all configured repos)",
+                },
+            },
+        },
+    },
+    {
+        "name": "pull_config",
+        "description": (
+            "Run git pull on one or all Sentinel project config directories. "
+            "Projects are matched by short name ('1881', 'elprint') or full dir name ('sentinel-1881'). "
+            "Use for: 'pull config for 1881', 'update sentinel config', 'pull all configs'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "project": {
+                    "type": "string",
+                    "description": "Project short name or dir name to pull (omit for all projects)",
+                },
+            },
+        },
+    },
+    {
+        "name": "fetch_logs",
+        "description": (
+            "Run fetch_log.sh for one or all configured log sources to pull the latest logs "
+            "from remote servers right now. Use for: 'fetch logs', 'run fetch_log.sh', "
+            "'grab latest logs from SSOLWA', 'try fetch_log.sh for STS', "
+            "'pull logs from server', 'get fresh logs'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "source": {
+                    "type": "string",
+                    "description": "Log source name to fetch (partial match, e.g. 'SSOLWA'). Omit to fetch all.",
+                },
+                "debug": {
+                    "type": "boolean",
+                    "description": "Run fetch_log.sh with --debug flag to show SSH/grep details",
+                    "default": False,
+                },
+                "tail": {
+                    "type": "integer",
+                    "description": "Override TAIL lines (how many log lines to fetch)",
+                },
+                "grep_filter": {
+                    "type": "string",
+                    "description": "Override GREP_FILTER (regex). Pass 'none' to disable filtering.",
+                },
+            },
+        },
+    },
+    {
+        "name": "watch_bot",
+        "description": (
+            "Tell Sentinel to passively monitor a Slack bot — queuing its messages as issues. "
+            "Extract all <@UXXXXXX> user IDs from the message and pass them here. "
+            "Sentinel verifies each is actually a bot (not a human) before adding to the watch list. "
+            "IMPORTANT: a bot watcher is only useful if its issues can be delivered to a project. "
+            "Try to infer the project from context (bot name, prior messages, available projects). "
+            "If it cannot be determined, do NOT call this tool — instead ask the user which project "
+            "the bot's alerts belong to, then call this tool with the project filled in. "
+            "Use for: 'listen to @alertbot', 'watch @bot1 @bot2', 'monitor @errorbot'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "user_ids": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Slack user IDs to watch — extract from <@UXXXXXX> patterns in the message",
+                },
+                "project": {
+                    "type": "string",
+                    "description": "Project short name this bot's issues should be routed to (e.g. '1881', 'elprint'). Infer from context or ask user before calling.",
+                },
+            },
+            "required": ["user_ids"],
+        },
+    },
+    {
+        "name": "unwatch_bot",
+        "description": (
+            "Stop Sentinel from monitoring a Slack bot. "
+            "Use for: 'stop watching @alertbot', 'unwatch @bot', 'remove @errorbot from watchers'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "user_ids": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Slack user IDs to remove from the watch list",
+                },
+            },
+            "required": ["user_ids"],
+        },
+    },
+    {
+        "name": "list_watched_bots",
+        "description": (
+            "List all Slack bots Sentinel is currently monitoring passively. "
+            "Use for: 'who are you watching?', 'which bots are you monitoring?', 'list watched bots'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "upgrade_sentinel",
+        "description": (
+            "Upgrade the Sentinel agent itself: git pull the latest code, update Python deps, "
+            "then restart the process. Safe to call at any time — if already up to date, "
+            "no restart is triggered. "
+            "Use for: 'upgrade sentinel', 'update sentinel', 'upgrade yourself', "
+            "'pull latest sentinel code', 'restart sentinel after upgrade'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "ask_codebase",
+        "description": (
+            "Ask any natural-language question about a managed codebase. "
+            "Accepts a repo name (e.g. 'STS', 'elprint-sales') OR a project name (e.g. '1881', 'elprint') "
+            "— if a project name is given and it has multiple repos, all are queried. "
+            "Claude Code answers using its full codebase knowledge — no need to specify how. "
+            "Use for: 'what does 1881 do?', 'TODOs in 1881', 'find PIN validation in STS', "
+            "'security issues in elprint-sales?', 'summarize the cairn repo'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "repo": {
+                    "type": "string",
+                    "description": "Repo name (e.g. 'STS', 'elprint-sales') OR project name (e.g. '1881', 'elprint') — project name queries all its repos",
+                },
+                "question": {
+                    "type": "string",
+                    "description": "Natural language question about the codebase",
+                },
+            },
+            "required": ["repo", "question"],
+        },
+    },
+    {
+        "name": "restart_project",
+        "description": (
+            "Stop and restart a specific Sentinel monitoring instance (runs stop.sh then start.sh). "
+            "This restarts the Sentinel agent process for that project — it does NOT restart the application itself. "
+            "Use when: 'restart sentinel for 1881', 'reload the 1881 monitor', 'restart elprint sentinel'. "
+            "Safer than restarting all projects at once."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "project": {
+                    "type": "string",
+                    "description": "Project short name or dir name (e.g. '1881', 'elprint')",
+                },
+            },
+            "required": ["project"],
+        },
+    },
+    {
+        "name": "my_stats",
+        "description": (
+            "Show the current user's personal Sentinel dashboard: "
+            "conversation history length, issues they submitted, and "
+            "a summary of Sentinel fix activity (errors caught, fixes applied, "
+            "fixes pending PR review, fixes confirmed live, fixes failed). "
+            "Use for: 'what have you done for me?', 'show my stats', "
+            "'how many issues have been fixed?', 'my history', 'summary', "
+            "'what did sentinel fix this week?', 'pending fixes', 'open PRs'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "hours": {
+                    "type": "integer",
+                    "description": "Look-back window in hours (default 168 = 7 days)",
+                    "default": 168,
+                },
+            },
+        },
+    },
+    {
+        "name": "clear_my_history",
+        "description": (
+            "Clear the current user's conversation history with Sentinel. "
+            "After clearing, future sessions start with no memory of past conversations. "
+            "Use for: 'clear my history', 'forget our conversation', "
+            "'start fresh', 'reset my context', 'wipe my history'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "tail_log",
+        "description": (
+            "Fetch the last N lines of a log source's live production logs without any grep filter. "
+            "Use when: 'show me recent SSOLWA logs', 'tail STS', 'what's happening in 1881 logs right now', "
+            "'show last 100 lines from SSOLWA'. Different from search_logs — no pattern required."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "source": {
+                    "type": "string",
+                    "description": "Log source name (partial match against log-config filenames, e.g. 'SSOLWA', 'STS')",
+                },
+                "lines": {
+                    "type": "integer",
+                    "description": "Number of recent lines to fetch (default 100)",
+                    "default": 100,
+                },
+            },
+            "required": ["source"],
+        },
+    },
+    {
+        "name": "post_file",
+        "description": (
+            "Upload a text file directly to the Slack conversation so the user can read or download it. "
+            "Use when: output is too large for a chat message, the user asks to 'download', 'export', or "
+            "'send as a file', or when formatted content (diffs, logs, CSVs, reports) is clearer as a file. "
+            "e.g. 'give me that as a file', 'export the log', 'send me the diff for PR #41', "
+            "'download the health report', 'export recent errors as CSV'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string",
+                    "description": "The full text content of the file to upload",
+                },
+                "filename": {
+                    "type": "string",
+                    "description": "Filename with extension, e.g. 'fix-ab12.diff', 'sentinel-report.txt', 'errors.csv', 'ssolwa.log'",
+                },
+                "title": {
+                    "type": "string",
+                    "description": "Optional display title shown above the file in Slack (defaults to filename)",
+                },
+            },
+            "required": ["content", "filename"],
+        },
+    },
+    {
+        "name": "list_all_users",
+        "description": (
+            "ADMIN ONLY. List all Slack users who have ever talked to Sentinel, "
+            "with their issue count and conversation message count. "
+            "e.g. 'list all users', 'who has talked to you?', 'show user activity'"
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "clear_user_history",
+        "description": (
+            "ADMIN ONLY. Clear the conversation history for a specific Slack user. "
+            "e.g. 'clear history for huy', 'reset bob's conversation'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "user_id": {
+                    "type": "string",
+                    "description": "Slack user ID to clear (e.g. U01AB2CD3EF)",
+                },
+            },
+            "required": ["user_id"],
+        },
+    },
+    {
+        "name": "reset_fingerprint",
+        "description": (
+            "ADMIN ONLY. Remove the 24h fix lock for an error fingerprint so Sentinel will retry it "
+            "on the next poll cycle. Use when a fix attempt failed and you want to force a retry. "
+            "e.g. 'retry fix abc123', 'reset fingerprint abc123de', 'let Sentinel try that error again'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "fingerprint": {
+                    "type": "string",
+                    "description": "Error fingerprint hash (8+ hex chars, from get_fix_details or list_all_errors)",
+                },
+            },
+            "required": ["fingerprint"],
+        },
+    },
+    {
+        "name": "list_all_errors",
+        "description": (
+            "ADMIN ONLY. Return the full unfiltered error database — all fingerprints, counts, "
+            "sources, and last-seen times. "
+            "e.g. 'show all errors', 'full error list', 'dump the error DB'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "hours": {
+                    "type": "integer",
+                    "description": "Limit to errors seen in the last N hours (0 = all time)",
+                    "default": 0,
+                },
+            },
+        },
+    },
+    {
+        "name": "export_db",
+        "description": (
+            "ADMIN ONLY. Export the full Sentinel state (errors, fixes, PRs, users) as a "
+            "downloadable text file posted to Slack. "
+            "e.g. 'export the DB', 'download state', 'give me a full report file'"
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "set_maintenance",
+        "description": (
+            "Confirm that a repo/app is deliberately stopped for maintenance. "
+            "Sentinel will silently monitor the health URL and notify when it comes back online. "
+            "Use when Sentinel asked if a 502/503 is deliberate. "
+            "e.g. 'yes it's maintenance', 'maintenance ssolwa', 'confirm ssolwa is down for maintenance'"
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "repo_name": {
+                    "type": "string",
+                    "description": "Repo name as configured (from repo-configs/*.properties)",
+                },
+                "note": {
+                    "type": "string",
+                    "description": "Optional reason e.g. 'scheduled maintenance', 'dependency update'",
+                },
+            },
+            "required": ["repo_name"],
+        },
+    },
+]
+# ── Workspace helpers ─────────────────────────────────────────────────────────
+def _workspace_dir() -> Path:
+    return Path(".").resolve().parent
+def _short_name(dir_name: str) -> str:
+    """'sentinel-1881' → '1881', 'sentinel-elprint' → 'elprint', others unchanged."""
+    if dir_name.startswith("sentinel-"):
+        return dir_name[len("sentinel-"):]
+    return dir_name
+def _read_project_name(project_dir: Path) -> str:
+    """Return PROJECT_NAME from sentinel.properties if set, else fall back to _short_name(dir)."""
+    props = project_dir / "config" / "sentinel.properties"
+    if props.exists():
+        try:
+            for line in props.read_text(encoding="utf-8", errors="ignore").splitlines():
+                line = line.strip()
+                if line.startswith("PROJECT_NAME"):
+                    _, _, val = line.partition("=")
+                    val = val.partition("#")[0].strip()
+                    if val:
+                        return val
+        except Exception:
+            pass
+    return _short_name(project_dir.name)
+def _find_project_dirs(target: str = "") -> list[Path]:
+    """Return project dirs matching target (PROJECT_NAME, short name, or full dir name), or all if target empty."""
+    workspace = _workspace_dir()
+    results = []
+    try:
+        for d in sorted(workspace.iterdir()):
+            if not d.is_dir() or d.name in ("code", ".git"):
+                continue
+            if not (d / "config").exists():
+                continue
+            if target:
+                t = target.lower()
+                if (t not in d.name.lower()
+                        and t not in _short_name(d.name).lower()
+                        and t not in _read_project_name(d).lower()):
+                    continue
+            results.append(d)
+    except Exception:
+        pass
+    return results
+def _git_pull(path: Path) -> dict:
+    try:
+        r = subprocess.run(
+            ["git", "pull", "--rebase", "origin"],
+            cwd=str(path), capture_output=True, text=True, timeout=60,
+        )
+        last = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
+        return {"status": "ok" if r.returncode == 0 else "error",
+                "detail": last if r.returncode == 0 else r.stderr.strip()}
+    except Exception as e:
+        return {"status": "error", "detail": str(e)}
+# ── Log-source name resolver ──────────────────────────────────────────────────
+def _filter_log_sources(props_files: list, source_hint: str) -> list:
+    """
+    Return the subset of props_files whose log source matches source_hint.
+    Matching is tried in order (first match wins per file):
+      1. Substring of the filename stem   (e.g. "sts"   → STS.properties)
+      2. Substring of REMOTE_SERVICE_USER (e.g. "ssolwa" → ...SSOLoginWebApp...)
+      3. Substring of HOSTS               (e.g. hostname fragment)
+    Case-insensitive throughout. An empty source_hint returns all files unchanged.
+    """
+    if not source_hint:
+        return props_files
+    hint = source_hint.lower()
+    def _props_contains(path: Path, key: str, hint: str) -> bool:
+        try:
+            for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
+                stripped = line.strip()
+                if stripped.startswith("#"):
+                    continue
+                if stripped.upper().startswith(key + "="):
+                    val = stripped.split("=", 1)[1].partition("#")[0].strip().lower()
+                    if hint in val:
+                        return True
+        except OSError:
+            pass
+        return False
+    matched = []
+    for p in props_files:
+        if hint in p.stem.lower():
+            matched.append(p)
+        elif _props_contains(p, "REMOTE_SERVICE_USER", hint):
+            matched.append(p)
+        elif _props_contains(p, "HOSTS", hint):
+            matched.append(p)
+    return matched
+# ── Tool execution ────────────────────────────────────────────────────────────
+async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "", channel: str = "", is_admin: bool = False) -> str:
+    if name == "get_status":
+        hours = int(inputs.get("hours", 24))
+        errors = store.get_recent_errors(hours)
+        fixes  = store.get_recent_fixes(hours)
+        prs    = store.get_open_prs()
+        top_errors = [
+            {
+                "message": e["message"][:120],
+                "count":   e["count"],
+                "source":  e["source"],
+                "last_seen": e["last_seen"],
+            }
+            for e in errors[:8]
+        ]
+        return json.dumps({
+            "window_hours":    hours,
+            "errors_detected": len(errors),
+            "top_errors":      top_errors,
+            "fixes_applied":   sum(1 for f in fixes if f["status"] == "applied"),
+            "fixes_pending":   sum(1 for f in fixes if f["status"] == "pending"),
+            "fixes_failed":    sum(1 for f in fixes if f["status"] == "failed"),
+            "open_prs":        [
+                {
+                    "repo":   p["repo_name"],
+                    "branch": p["branch"],
+                    "pr_url": p["pr_url"],
+                    "age":    p.get("timestamp", ""),
+                }
+                for p in prs
+            ],
+            "sentinel_paused": Path("SENTINEL_PAUSE").exists(),
+        })
+    if name == "check_auth_status":
+        import subprocess as _sp
+        from .notify import get_circuit_status
+        cfg = cfg_loader.sentinel
+        # Auth configuration
+        has_key      = bool(cfg.anthropic_api_key)
+        pro_for_tasks = cfg.claude_pro_for_tasks
+        if pro_for_tasks and has_key:
+            primary, fallback = "claude_pro_oauth", "api_key"
+        elif pro_for_tasks:
+            primary, fallback = "claude_pro_oauth", None
+        else:
+            primary, fallback = "api_key", "claude_pro_oauth" if not has_key else "claude_pro_oauth"
+        # Claude CLI liveness check
+        cli_ok, cli_version = False, ""
+        try:
+            r = _sp.run(
+                [cfg.claude_code_bin, "--version"],
+                capture_output=True, text=True, timeout=10,
+            )
+            if r.returncode == 0:
+                cli_ok     = True
+                cli_version = r.stdout.strip() or r.stderr.strip()
+        except Exception:
+            pass
+        # Circuit breaker snapshot — only open (unhealthy) circuits appear here
+        circuits = get_circuit_status()
+        # Fix engine stats (last 24 h)
+        recent = store.get_recent_fixes(hours=24)
+        counts = {"applied": 0, "failed": 0, "skipped": 0, "pending": 0}
+        last_success = None
+        for f in recent:
+            s = f.get("status", "")
+            if s in counts:
+                counts[s] += 1
+            if s == "applied" and not last_success:
+                last_success = f.get("timestamp", "")
+        overall = "healthy"
+        if circuits:
+            overall = "degraded — rate/auth limit active on: " + ", ".join(circuits)
+        elif not cli_ok:
+            overall = "warning — claude CLI not reachable"
+        return json.dumps({
+            "overall": overall,
+            "auth": {
+                "api_key_configured": has_key,
+                "claude_pro_for_tasks": pro_for_tasks,
+                "primary_method": primary,
+                "fallback_method": fallback,
+            },
+            "claude_cli": {"available": cli_ok, "version": cli_version},
+            "rate_limit_circuits": circuits,
+            "fix_engine_24h": {**counts, "last_successful_fix": last_success},
+        })
+    if name == "create_issue":
+        description = inputs["description"]
+        target_repo = inputs.get("target_repo", "")
+        project_arg = inputs.get("project", "")
+        if project_arg:
+            project_dirs = _find_project_dirs(project_arg)
+            if not project_dirs:
+                all_names = [_read_project_name(d) for d in _find_project_dirs()]
+                return json.dumps({
+                    "error": f"No project found matching '{project_arg}'",
+                    "available_projects": all_names,
+                    "action_needed": "Ask the user which project they meant.",
+                })
+            if len(project_dirs) > 1:
+                matches = [_read_project_name(d) for d in project_dirs]
+                return json.dumps({
+                    "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
+                    "action_needed": "Ask the user to clarify which project they mean.",
+                })
+            project_dir = project_dirs[0]
+        else:
+            project_dir = Path(".")
+        support_url          = inputs.get("support_url", "").strip()
+        attachments_summary  = inputs.get("attachments_summary", "").strip()
+        findings             = inputs.get("findings", "").strip()
+        issues_dir = project_dir / "issues"
+        issues_dir.mkdir(exist_ok=True)
+        fname = f"slack-{uuid.uuid4().hex[:8]}.txt"
+        submitter_name = store.get_user_name(user_id) if user_id else ""
+        submitter_line = f"SUBMITTED_BY: {submitter_name} ({user_id})" if user_id else ""
+        lines = []
+        if submitter_line:
+            lines.append(submitter_line)
+        if target_repo:
+            lines.append(f"TARGET_REPO: {target_repo}")
+        if support_url:
+            lines.append(f"SUPPORT_URL: {support_url}")
+        lines.append(f"SUBMITTED_AT: {datetime.now(timezone.utc).isoformat()}")
+        lines.append("")
+        lines.append(description)
+        if findings:
+            lines.append(f"\nEVIDENCE (gathered by Sentinel Boss):\n{findings}")
+        if attachments_summary:
+            lines.append(f"\nATTACHMENTS:\n{attachments_summary}")
+        content = "\n".join(lines)
+        (issues_dir / fname).write_text(content, encoding="utf-8")
+        # Touch SENTINEL_POLL_NOW so the target instance picks it up immediately
+        (project_dir / "SENTINEL_POLL_NOW").touch()
+        project_label = _read_project_name(project_dir.resolve()) if project_arg else "this project"
+        logger.info("Boss created issue for %s: %s", project_label, fname)
+        if user_id:
+            try:
+                store.record_submitted_issue(
+                    user_id=user_id,
+                    user_name=submitter_name,
+                    project=project_label,
+                    fname=fname,
+                    description=description,
+                )
+            except Exception as _rec_err:
+                logger.debug("Boss: could not record submitted issue: %s", _rec_err)
+        return json.dumps({
+            "status":  "queued",
+            "project": project_label,
+            "file":    fname,
+            "note":    f"Delivered to '{project_label}'. Sentinel will process it on the next poll cycle.",
+        })
+    if name == "get_fix_details":
+        fp  = inputs["fingerprint"]
+        fix = store.get_confirmed_fix(fp) or store.get_marker_seen_fix(fp)
+        if not fix:
+            # Fallback: search recent fixes by prefix
+            recent = store.get_recent_fixes(hours=72)
+            fix    = next((f for f in recent if f.get("fingerprint", "").startswith(fp)), None)
+        return json.dumps(fix or {"error": "not found"})
+    if name == "list_pending_prs":
+        prs = store.get_open_prs()
+        return json.dumps({
+            "count":    len(prs),
+            "open_prs": [
+                {
+                    "repo":      p["repo_name"],
+                    "branch":    p["branch"],
+                    "pr_url":    p["pr_url"],
+                    "timestamp": p.get("timestamp", ""),
+                }
+                for p in prs
+            ],
+        })
+    if name == "pause_sentinel":
+        Path("SENTINEL_PAUSE").touch()
+        logger.info("Boss: SENTINEL_PAUSE created")
+        return json.dumps({"status": "paused"})
+    if name == "resume_sentinel":
+        p = Path("SENTINEL_PAUSE")
+        if p.exists():
+            p.unlink()
+        logger.info("Boss: SENTINEL_PAUSE removed")
+        return json.dumps({"status": "resumed"})
+    if name == "list_projects":
+        projects = []
+        for d in _find_project_dirs():
+            repo_cfg_dir = d / "config" / "repo-configs"
+            repos_in_project = []
+            if repo_cfg_dir.exists():
+                for p in sorted(repo_cfg_dir.glob("*.properties")):
+                    if p.name.startswith("_"):
+                        continue
+                    repo_url = ""
+                    for line in p.read_text(encoding="utf-8", errors="ignore").splitlines():
+                        if line.startswith("REPO_URL"):
+                            repo_url = line.split("=", 1)[-1].strip()
+                            break
+                    repos_in_project.append({"repo": p.stem, "url": repo_url})
+            projects.append({
+                "project": _read_project_name(d),
+                "dir":     d.name,
+                "running": (d / "sentinel.pid").exists(),
+                "this":    d.resolve() == Path(".").resolve(),
+                "repos":   repos_in_project,
+            })
+        return json.dumps({"projects": projects})
+    if name == "search_logs":
+        query       = inputs.get("query", "")
+        source      = inputs.get("source", "").lower()
+        max_matches = int(inputs.get("max_matches", 30))
+        tail_override = inputs.get("tail")
+        # ── Preferred path: search locally-synced files (instant, no SSH) ──────
+        synced_base = Path("workspace/synced")
+        if synced_base.exists():
+            log_cfg_dir_s = Path("config") / "log-configs"
+            candidate_sources = (
+                [p.stem for p in _filter_log_sources(sorted(log_cfg_dir_s.glob("*.properties")), source)]
+                if log_cfg_dir_s.exists() else
+                [d.name for d in sorted(synced_base.iterdir()) if d.is_dir()]
+            )
+            synced_results = []
+            try:
+                qpat_s = re.compile(query, re.IGNORECASE)
+            except re.error:
+                qpat_s = re.compile(re.escape(query), re.IGNORECASE)
+            for src_name in candidate_sources:
+                src_dir = synced_base / src_name
+                if not src_dir.is_dir():
+                    continue
+                for log_file in sorted(src_dir.glob("*")):
+                    try:
+                        lines = log_file.read_text(encoding="utf-8", errors="replace").splitlines()
+                        matches = [ln[:300] for ln in lines if qpat_s.search(ln)][:max_matches]
+                        if matches:
+                            synced_results.append({"source": src_name, "file": log_file.name, "matches": matches})
+                    except Exception:
+                        pass
+            if synced_results:
+                total = sum(len(r["matches"]) for r in synced_results)
+                return json.dumps({
+                    "query": query,
+                    "mode": "synced",
+                    "total_matches": total,
+                    "results": synced_results,
+                    "note": "Results from locally-synced files. No SSH needed.",
+                })
+        # ── Live fetch path: SSH to servers and grep in real time ──────────────
+        script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
+        log_cfg_dir = Path("config") / "log-configs"
+        if script.exists() and log_cfg_dir.exists():
+            props_files = _filter_log_sources(sorted(log_cfg_dir.glob("*.properties")), source)
+            if props_files:
+                live_results = []
+                for props in props_files:
+                    env = os.environ.copy()
+                    env["GREP_FILTER"] = query
+                    if tail_override:
+                        env["TAIL"] = str(tail_override)
+                    try:
+                        r = subprocess.run(
+                            ["bash", str(script), str(props)],
+                            capture_output=True, text=True, timeout=60, env=env,
+                        )
+                        try:
+                            _qpat = re.compile(query, re.IGNORECASE)
+                        except re.error:
+                            _qpat = re.compile(re.escape(query), re.IGNORECASE)
+                        lines = (r.stdout or "").strip().splitlines()
+                        matches = [ln[:300] for ln in lines if _qpat.search(ln)][:max_matches]
+                        if matches:
+                            live_results.append({"source": props.stem, "matches": matches})
+                        logger.info("Boss search_logs live %s rc=%d found=%d", props.stem, r.returncode, len(matches))
+                    except subprocess.TimeoutExpired:
+                        live_results.append({"source": props.stem, "error": "timed out"})
+                    except Exception as e:
+                        live_results.append({"source": props.stem, "error": str(e)})
+                total = sum(len(r.get("matches", [])) for r in live_results)
+                return json.dumps({
+                    "query": query,
+                    "mode": "live",
+                    "total_matches": total,
+                    "results": live_results,
+                    "note": (
+                        "Results already include a per-source breakdown. "
+                        "Do NOT call search_logs again with a source filter to 'refine' — "
+                        "use these results directly."
+                    ) if total > 0 else None,
+                })
+        # ── Fallback: search locally-cached log files ──────────────────────────
+        # Reaching here means: live script unavailable OR source filter matched no config files.
+        # A result with files_searched=0 means the source name wasn't recognised — NOT that
+        # there are no log entries. Do not interpret this as "no results found".
+        fetched_dir = Path("workspace/fetched")
+        if not fetched_dir.exists():
+            return json.dumps({
+                "error": "No fetched logs found and fetch_log.sh unavailable",
+                "note": "This is a config/setup problem, not a 'no results' answer.",
+            })
+        try:
+            pattern = re.compile(query, re.IGNORECASE)
+        except re.error as e:
+            return json.dumps({"error": f"Invalid regex: {e}"})
+        results = []
+        for log_file in sorted(fetched_dir.glob("*.log")):
+            if source and source not in log_file.name.lower():
+                continue
+            try:
+                lines   = log_file.read_text(encoding="utf-8", errors="ignore").splitlines()
+                matches = [
+                    {"line": i + 1, "text": line[:300]}
+                    for i, line in enumerate(lines)
+                    if pattern.search(line)
+                ][:max_matches]
+                if matches:
+                    results.append({"file": log_file.name, "matches": matches})
+            except Exception:
+                pass
+        total = sum(len(r["matches"]) for r in results)
+        files_searched = len(list(fetched_dir.glob("*.log")))
+        result = {
+            "query": query,
+            "mode": "cached",
+            "total_matches": total,
+            "files_searched": files_searched,
+            "results": results,
+        }
+        if files_searched == 0:
+            result["warning"] = (
+                "Source name not recognised in cached files — this is a lookup failure, not 'no results'. "
+                "If you already have results from a broader search_logs call, use those. Stop retrying."
+            )
+        return json.dumps(result)
+    if name == "filter_logs":
+        import re as _re
+        from collections import Counter as _Counter
+        from datetime import datetime, timedelta, timezone as _tz
+        # Extract a short grouping key from a log line for pattern analysis
+        _EXC_PAT  = _re.compile(r'([A-Z][a-zA-Z]+(?:Exception|Error|Failure|Fault|Warning))')
+        _LVL_PAT  = _re.compile(r'\b(ERROR|WARN(?:ING)?|CRITICAL|FATAL|SEVERE)\b', _re.IGNORECASE)
+        def _signature(line):
+            exc = _EXC_PAT.search(line)
+            if exc:
+                return exc.group(1)
+            m = _LVL_PAT.search(line)
+            if m:
+                after = line[m.end():].strip()
+                token = after.split()[0].rstrip(':.,') if after.split() else ''
+                if token and len(token) > 2:
+                    return m.group(1).upper() + ' ' + token[:40]
+            return line.strip()[:40]
+        query_f      = inputs.get("query", "")
+        source_f     = inputs.get("source", "").lower()
+        since_hours  = inputs.get("since_hours")
+        max_matches  = int(inputs.get("max_matches", 300))
+        case_flag    = 0 if inputs.get("case_sensitive") else _re.IGNORECASE
+        try:
+            pat = _re.compile(query_f, case_flag)
+        except _re.error as e:
+            return json.dumps({"error": f"Invalid regex: {e}"})
+        synced_base = Path("workspace/synced")
+        if not synced_base.exists():
+            return json.dumps({
+                "error": "No synced logs found.",
+                "hint": "Log sync runs every SYNC_INTERVAL_SECONDS (default 300s). "
+                        "If just started, wait a minute then try again.",
+            })
+        # Build cutoff timestamp for since_hours filter
+        cutoff = None
+        if since_hours:
+            cutoff = datetime.now(_tz.utc) - timedelta(hours=int(since_hours))
+        # Determine which source directories to search
+        if source_f:
+            src_dirs = [d for d in sorted(synced_base.iterdir())
+                        if d.is_dir() and source_f in d.name.lower()]
+        else:
+            src_dirs = [d for d in sorted(synced_base.iterdir()) if d.is_dir()]
+        if not src_dirs:
+            available = [d.name for d in synced_base.iterdir() if d.is_dir()]
+            return json.dumps({
+                "error": f"No synced source matching '{source_f}'",
+                "available_sources": available,
+            })
+        results = []
+        total_matches = 0
+        for src_dir in src_dirs:
+            for log_file in sorted(src_dir.glob("*")):
+                try:
+                    lines = log_file.read_text(encoding="utf-8", errors="replace").splitlines()
+                    matches = []
+                    for line in lines:
+                        if not pat.search(line):
+                            continue
+                        if cutoff:
+                            # Try to parse timestamp from line
+                            from .log_fetcher import _parse_line_ts
+                            ts = _parse_line_ts(line)
+                            if ts and ts < cutoff:
+                                continue
+                        matches.append(line[:300])
+                        if len(matches) >= max_matches:
+                            break
+                    if matches:
+                        results.append({
+                            "source": src_dir.name,
+                            "file": log_file.name,
+                            "matches": matches,
+                        })
+                        total_matches += len(matches)
+                except Exception:
+                    pass
+        if not results:
+            return json.dumps({
+                "query": query_f,
+                "total_matches": 0,
+                "sources_searched": [d.name for d in src_dirs],
+                "note": "No matches found in synced logs.",
+            })
+        try:
+            pat = _re.compile(query_f, case_flag)
+        except _re.error as e:
+            return json.dumps({"error": f"Invalid regex: {e}"})
+        synced_base = Path("workspace/synced")
+        if not synced_base.exists():
+            return json.dumps({
+                "error": "No synced logs found.",
+                "hint": "Log sync runs every SYNC_INTERVAL_SECONDS (default 300s). "
+                        "If just started, wait a minute then try again.",
+            })
+        cutoff = None
+        if since_hours:
+            cutoff = datetime.now(_tz.utc) - timedelta(hours=int(since_hours))
+        if source_f:
+            src_dirs = [d for d in sorted(synced_base.iterdir())
+                        if d.is_dir() and source_f in d.name.lower()]
+        else:
+            src_dirs = [d for d in sorted(synced_base.iterdir()) if d.is_dir()]
+        if not src_dirs:
+            available = [d.name for d in synced_base.iterdir() if d.is_dir()]
+            return json.dumps({
+                "error": f"No synced source matching '{source_f}'",
+                "available_sources": available,
+            })
+        all_matches = []   # list of (source_name, line)
+        sources_hit = set()
+        for src_dir in src_dirs:
+            for log_file in sorted(src_dir.glob("*")):
+                try:
+                    lines = log_file.read_text(encoding="utf-8", errors="replace").splitlines()
+                    for line in lines:
+                        if not pat.search(line):
+                            continue
+                        if cutoff:
+                            from .log_fetcher import _parse_line_ts
+                            ts = _parse_line_ts(line)
+                            if ts and ts < cutoff:
+                                continue
+                        all_matches.append((src_dir.name, line[:300]))
+                        sources_hit.add(src_dir.name)
+                        if len(all_matches) >= max_matches:
+                            break
+                except Exception:
+                    pass
+            if len(all_matches) >= max_matches:
+                break
+        total = len(all_matches)
+        if total == 0:
+            return json.dumps({
+                "query": query_f,
+                "total_matches": 0,
+                "sources_searched": [d.name for d in src_dirs],
+                "note": "No matches found in synced logs.",
+            })
+        # Pattern grouping: count occurrences of each error signature
+        sig_counter = _Counter()
+        sig_examples = {}
+        for src, line in all_matches:
+            sig = _signature(line)
+            sig_counter[sig] += 1
+            if sig not in sig_examples:
+                sig_examples[sig] = f"[{src}] {line}"
+        top_patterns = [
+            {"pattern": sig, "count": cnt, "example": sig_examples[sig][:250]}
+            for sig, cnt in sig_counter.most_common(10)
+        ]
+        # Sample: first unique-signature line from each source
+        sample_lines = []
+        seen_sigs = set()
+        for src, line in all_matches:
+            sig = _signature(line)
+            if sig not in seen_sigs:
+                sample_lines.append(f"[{src}] {line}")
+                seen_sigs.add(sig)
+            if len(sample_lines) >= 10:
+                break
+        # Time span
+        time_span = {}
+        try:
+            from .log_fetcher import _parse_line_ts
+            timestamps = [_parse_line_ts(ln) for _, ln in all_matches]
+            timestamps = [t for t in timestamps if t]
+            if timestamps:
+                time_span = {
+                    "earliest": min(timestamps).strftime("%Y-%m-%d %H:%M:%S UTC"),
+                    "latest":   max(timestamps).strftime("%Y-%m-%d %H:%M:%S UTC"),
+                }
+        except Exception:
+            pass
+        return json.dumps({
+            "query":            query_f,
+            "total_matches":    total,
+            "sources_hit":      sorted(sources_hit),
+            "sources_searched": [d.name for d in src_dirs],
+            "top_patterns":     top_patterns,
+            "sample_lines":     sample_lines,
+            "time_span":        time_span,
+            "capped":           total >= max_matches,
+        })
+    if name == "trigger_poll":
+        Path("SENTINEL_POLL_NOW").touch()
+        logger.info("Boss: immediate poll requested")
+        return json.dumps({"status": "triggered", "note": "Sentinel will run a poll cycle within seconds"})
+    if name == "get_repo_status":
+        hours  = int(inputs.get("hours", 24))
+        fixes  = store.get_recent_fixes(hours)
+        errors = store.get_recent_errors(hours)
+        by_repo: dict = {}
+        for fix in fixes:
+            repo = fix.get("repo_name", "unknown")
+            s    = by_repo.setdefault(repo, {"applied": 0, "pending": 0, "failed": 0, "skipped": 0})
+            key  = fix.get("status", "failed")
+            s[key] = s.get(key, 0) + 1
+        return json.dumps({"window_hours": hours, "total_errors": len(errors), "by_repo": by_repo})
+    if name == "list_recent_commits":
+        limit   = int(inputs.get("limit", 5))
+        results = []
+        for repo_name, repo in cfg_loader.repos.items():
+            local = Path(repo.local_path)
+            if not local.exists():
+                continue
+            try:
+                r = subprocess.run(
+                    ["git", "log", "--oneline", "--grep=sentinel", "-n", str(limit)],
+                    cwd=str(local), capture_output=True, text=True, timeout=10,
+                )
+                commits = r.stdout.strip().splitlines()
+                if commits:
+                    results.append({"repo": repo_name, "commits": commits})
+            except Exception:
+                pass
+        return json.dumps({"sentinel_commits": results})
+    if name == "pull_repo":
+        target = inputs.get("repo", "").lower()
+        results = []
+        for repo_name, repo in cfg_loader.repos.items():
+            if target and target not in repo_name.lower():
+                continue
+            local = Path(repo.local_path)
+            if not local.exists():
+                results.append({"repo": repo_name, "status": "error", "detail": "local path not found"})
+                continue
+            try:
+                r = subprocess.run(
+                    ["git", "pull", "--rebase", "origin", repo.branch],
+                    cwd=str(local), capture_output=True, text=True, timeout=60,
+                )
+                last_line = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
+                if r.returncode == 0:
+                    results.append({"repo": repo_name, "status": "ok", "detail": last_line})
+                else:
+                    results.append({"repo": repo_name, "status": "error", "detail": r.stderr.strip()})
+            except Exception as e:
+                results.append({"repo": repo_name, "status": "error", "detail": str(e)})
+        return json.dumps({"results": results})
+    if name == "pull_config":
+        target = inputs.get("project", "")
+        dirs = _find_project_dirs(target)
+        if not dirs:
+            return json.dumps({"error": f"No project found matching '{target}'"})
+        results = []
+        for d in dirs:
+            res = _git_pull(d)
+            results.append({"project": _read_project_name(d), "dir": d.name, **res})
+            logger.info("Boss: pull_config %s → %s", d.name, res["status"])
+        return json.dumps({"results": results})
+    if name == "fetch_logs":
+        source_filter = inputs.get("source", "").lower()
+        debug         = bool(inputs.get("debug", False))
+        tail_override = inputs.get("tail")
+        grep_override = inputs.get("grep_filter", "")
+        # Find fetch_log.sh relative to this file
+        script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
+        if not script.exists():
+            return json.dumps({"error": f"fetch_log.sh not found at {script}"})
+        log_cfg_dir = Path("config") / "log-configs"
+        if not log_cfg_dir.exists():
+            return json.dumps({"error": "config/log-configs/ not found"})
+        props_files = _filter_log_sources(sorted(log_cfg_dir.glob("*.properties")), source_filter)
+        if not props_files:
+            return json.dumps({"error": f"No log-config found matching '{source_filter}'"})
+        results = []
+        for props in props_files:
+            env = os.environ.copy()
+            if tail_override:
+                env["TAIL"] = str(tail_override)
+            if grep_override:
+                env["GREP_FILTER"] = grep_override
+            cmd = ["bash", str(script)]
+            if debug:
+                cmd.append("--debug")
+            cmd.append(str(props))
+            try:
+                r = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=120, env=env,
+                )
+                output = (r.stdout or "").strip()
+                stderr = (r.stderr or "").strip()
+                results.append({
+                    "source":     props.stem,
+                    "returncode": r.returncode,
+                    "output":     output[-2000:] if output else "",
+                    "stderr":     stderr[-1000:] if stderr else "",
+                })
+                logger.info("Boss fetch_logs %s rc=%d", props.stem, r.returncode)
+            except subprocess.TimeoutExpired:
+                results.append({"source": props.stem, "error": "timed out after 120s"})
+            except Exception as e:
+                results.append({"source": props.stem, "error": str(e)})
+        return json.dumps({"fetched": len(results), "results": results})
+    if name == "watch_bot":
+        if not is_admin:
+            return json.dumps({"error": "Admin access required to register bots for monitoring."})
+        user_ids    = inputs.get("user_ids", [])
+        project_arg = inputs.get("project", "").strip()
+        if not user_ids:
+            return json.dumps({"error": "No user_ids provided"})
+        # Resolve + validate project — required for bot issue routing
+        resolved_project = ""
+        if project_arg:
+            project_dirs = _find_project_dirs(project_arg)
+            if not project_dirs:
+                all_names = [_read_project_name(d) for d in _find_project_dirs()]
+                return json.dumps({
+                    "error": f"No project found matching '{project_arg}'",
+                    "available_projects": all_names,
+                    "action_needed": "Ask the user which project these bot alerts belong to.",
+                })
+            if len(project_dirs) > 1:
+                matches = [_read_project_name(d) for d in project_dirs]
+                return json.dumps({
+                    "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
+                    "action_needed": "Ask the user to clarify which project.",
+                })
+            resolved_project = _read_project_name(project_dirs[0])
+        else:
+            all_projects = _find_project_dirs()
+            if len(all_projects) == 1:
+                # Single project in workspace — auto-assign
+                resolved_project = _read_project_name(all_projects[0])
+            elif all_projects:
+                all_names = [_read_project_name(d) for d in all_projects]
+                return json.dumps({
+                    "error": "Cannot determine which project these bot alerts belong to.",
+                    "available_projects": all_names,
+                    "action_needed": "Ask the user to specify the project, then retry with project filled in.",
+                })
+        results = []
+        for uid in user_ids:
+            if not slack_client:
+                results.append({"user_id": uid, "status": "error", "reason": "no Slack client available"})
+                continue
+            try:
+                info = await slack_client.users_info(user=uid)
+                user = info.get("user", {})
+                if not user.get("is_bot", False):
+                    results.append({"user_id": uid, "status": "skipped", "reason": "not a bot — only bots can be watched passively"})
+                    continue
+                bot_name = user.get("real_name") or user.get("name") or uid
+                store.add_watched_bot(uid, bot_name, added_by="boss", project_name=resolved_project)
+                logger.info("Boss: now watching bot %s (%s) → project '%s'", bot_name, uid, resolved_project or "unset")
+                results.append({"user_id": uid, "bot_name": bot_name, "project": resolved_project, "status": "watching"})
+            except Exception as e:
+                results.append({"user_id": uid, "status": "error", "reason": str(e)})
+        return json.dumps({"results": results})
+    if name == "unwatch_bot":
+        if not is_admin:
+            return json.dumps({"error": "Admin access required to remove bots from monitoring."})
+        user_ids = inputs.get("user_ids", [])
+        if not user_ids:
+            return json.dumps({"error": "No user_ids provided"})
+        results = []
+        for uid in user_ids:
+            removed = store.remove_watched_bot(uid)
+            logger.info("Boss: unwatch bot %s → %s", uid, "removed" if removed else "not found")
+            results.append({"user_id": uid, "status": "removed" if removed else "not found"})
+        return json.dumps({"results": results})
+    if name == "list_watched_bots":
+        bots = store.get_watched_bots()
+        return json.dumps({
+            "count": len(bots),
+            "bots": [
+                {
+                    "bot_id":   b["bot_id"],
+                    "bot_name": b["bot_name"],
+                    "project":  b.get("project_name") or "",
+                    "added_by": b["added_by"],
+                    "added_at": b["added_at"],
+                }
+                for b in bots
+            ],
+        })
+    if name == "upgrade_sentinel":
+        if not is_admin:
+            return json.dumps({"error": "Admin access required to upgrade Sentinel."})
+        import threading
+        # Sentinel is installed via npm — use `sentinel upgrade` which handles
+        # npm install + Python bundle copy + restart via stopAll/startAll.
+        # Run it in the background after a short delay so the Slack reply is
+        # sent before the process is replaced.
+        try:
+            r = subprocess.run(
+                ["sentinel", "--version"],
+                capture_output=True, text=True, timeout=10,
+            )
+            sentinel_bin_ok = r.returncode == 0
+        except Exception:
+            sentinel_bin_ok = False
+        if not sentinel_bin_ok:
+            return json.dumps({
+                "status": "error",
+                "note":   "`sentinel` CLI not found. Run: npm install -g @misterhuydo/sentinel",
+            })
+        def _do_upgrade():
+            import time
+            time.sleep(10)   # give Slack time to post the reply
+            subprocess.Popen(["sentinel", "upgrade"], close_fds=True)
+        threading.Thread(target=_do_upgrade, daemon=True).start()
+        logger.info("Boss: upgrade_sentinel scheduled via `sentinel upgrade`")
+        return json.dumps({
+            "status": "ok",
+            "note":   "Upgrade started — pulling latest version via npm and restarting. Give me ~30 seconds then I'll be back.",
+        })
+    if name == "ask_codebase":
+        target   = inputs.get("repo", "").lower()
+        question = inputs.get("question", "")
+        # 1. Find repos whose name contains the target (e.g. "STS", "elprint-sales")
+        matched = [(rn, r) for rn, r in cfg_loader.repos.items() if target in rn.lower()]
+        # 2. No repo match — check if target is a project name → use ALL repos in cfg_loader
+        #    (each Sentinel instance is scoped to one project, so all repos belong to it)
+        if not matched:
+            current_project = _read_project_name(Path("."))
+            if target in current_project.lower() or current_project.lower() in target:
+                matched = list(cfg_loader.repos.items())
+        if not matched:
+            return json.dumps({
+                "error": f"No repo or project found matching '{target}'",
+                "available_repos": list(cfg_loader.repos.keys()),
+            })
+        cfg = cfg_loader.sentinel
+        env = os.environ.copy()
+        # Only inject API key when Claude Pro is NOT preferred for heavy tasks
+        if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
+            env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
+        def _ask_one(repo_name, repo_cfg) -> dict:
+            local_path = Path(repo_cfg.local_path)
+            if not local_path.exists():
+                return {"repo": repo_name, "error": f"not cloned yet at {local_path}"}
+            prompt = (
+                f"You are a code analyst. Answer the following question about the codebase at: {local_path}\n\n"
+                f"Question: {question}\n\n"
+                f"Use whatever tools you need to answer accurately. Be concise and direct. Plain text only."
+            )
+            try:
+                r = subprocess.run(
+                    ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
+                    if os.getuid() != 0 else
+                    [cfg.claude_code_bin, "--print", prompt]),
+                    capture_output=True, text=True, timeout=180, env=env,
+                    cwd=str(local_path),
+                )
+                output = (r.stdout or "").strip()
+                logger.info("Boss ask_codebase %s rc=%d len=%d", repo_name, r.returncode, len(output))
+                if r.returncode != 0 and not output:
+                    raw_err = (r.stderr or "")
+                    alert_if_rate_limited(
+                        cfg.slack_bot_token, cfg.slack_channel,
+                        f"ask_codebase/{repo_name}", raw_err,
+                    )
+                    return {"repo": repo_name, "error": f"claude --print failed (rc={r.returncode}): {raw_err[:200]}"}
+                return {"repo": repo_name, "answer": output[:3000]}
+            except subprocess.TimeoutExpired:
+                return {"repo": repo_name, "error": "timed out after 180s"}
+            except Exception as e:
+                return {"repo": repo_name, "error": str(e)}
+        if len(matched) == 1:
+            result = _ask_one(*matched[0])
+            # Unwrap single-repo result for cleaner response
+            return json.dumps(result)
+        # Multiple repos — query each and combine
+        results = [_ask_one(rn, r) for rn, r in matched]
+        return json.dumps({"project": target, "repos_queried": len(results), "results": results})
+    if name == "restart_project":
+        if not is_admin:
+            return json.dumps({"error": "Admin access required to restart a project."})
+        project_arg = inputs.get("project", "").lower()
+        dirs = _find_project_dirs(project_arg)
+        if not dirs:
+            return json.dumps({"error": f"No project found matching '{project_arg}'"})
+        results = []
+        for d in dirs:
+            stop_sh  = d / "stop.sh"
+            start_sh = d / "start.sh"
+            if not stop_sh.exists() or not start_sh.exists():
+                results.append({"project": d.name, "status": "error", "detail": "stop.sh or start.sh not found"})
+                continue
+            try:
+                subprocess.run(["bash", str(stop_sh)],  cwd=str(d), timeout=30)
+                subprocess.run(["bash", str(start_sh)], cwd=str(d), timeout=30)
+                results.append({"project": d.name, "status": "restarted"})
+                logger.info("Boss: restarted project %s", d.name)
+            except Exception as e:
+                results.append({"project": d.name, "status": "error", "detail": str(e)})
+        return json.dumps({"results": results})
+    if name == "tail_log":
+        source      = inputs.get("source", "").lower()
+        lines       = int(inputs.get("lines", 100))
+        script      = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
+        log_cfg_dir = Path("config") / "log-configs"
+        if not script.exists():
+            return json.dumps({"error": "fetch_log.sh not found"})
+        if not log_cfg_dir.exists():
+            return json.dumps({"error": "config/log-configs/ not found"})
+        props_files = sorted(log_cfg_dir.glob("*.properties"))
+        if source:
+            props_files = [p for p in props_files if source in p.stem.lower()]
+        if not props_files:
+            return json.dumps({"error": f"No log-config found matching '{source}'"})
+        results = []
+        for props in props_files:
+            env = os.environ.copy()
+            env["TAIL"]        = str(lines)
+            env["GREP_FILTER"] = ""   # no filter — show everything
+            try:
+                r = subprocess.run(
+                    ["bash", str(script), str(props)],
+                    capture_output=True, text=True, timeout=60, env=env,
+                )
+                tail_lines = (r.stdout or "").strip().splitlines()[-lines:]
+                results.append({
+                    "source":  props.stem,
+                    "lines":   len(tail_lines),
+                    "content": "\n".join(tail_lines),
+                })
+                logger.info("Boss tail_log %s rc=%d lines=%d", props.stem, r.returncode, len(tail_lines))
+            except subprocess.TimeoutExpired:
+                results.append({"source": props.stem, "error": "timed out"})
+            except Exception as e:
+                results.append({"source": props.stem, "error": str(e)})
+        return json.dumps({"results": results})
+    if name == "post_file":
+        if not slack_client or not channel:
+            return json.dumps({"error": "No Slack channel context — cannot upload file"})
+        content  = inputs.get("content", "")
+        filename = inputs.get("filename", "sentinel-output.txt")
+        title    = inputs.get("title", filename)
+        if not content:
+            return json.dumps({"error": "No content provided"})
+        try:
+            await slack_client.files_upload_v2(
+                channel=channel,
+                content=content,
+                filename=filename,
+                title=title,
+            )
+            logger.info("Boss post_file: uploaded %s (%d bytes) to %s", filename, len(content), channel)
+            return json.dumps({"ok": True, "filename": filename, "bytes": len(content)})
+        except Exception as e:
+            logger.warning("Boss post_file failed: %s", e)
+            return json.dumps({"error": str(e)})
+    if name == "my_stats":
+        hours  = int(inputs.get("hours", 168))
+        errors = store.get_recent_errors(hours)
+        fixes  = store.get_recent_fixes(hours)
+        prs    = store.get_open_prs()
+        pending_conf = store.get_fixes_pending_confirmation()
+        # Conversation stats
+        history      = store.load_conversation(user_id) if user_id else []
+        hist_len     = len(history)
+        # Load conversation updated_at from DB
+        conv_updated = ""
+        try:
+            import sqlite3 as _sqlite3
+            with _sqlite3.connect(store.db_path) as _db:
+                row = _db.execute(
+                    "SELECT updated_at FROM conversations WHERE user_id=?", (user_id,)
+                ).fetchone()
+                if row:
+                    conv_updated = row[0]
+        except Exception:
+            pass
+        # Tally fix statuses
+        by_status: dict = {}
+        for fix in fixes:
+            s = fix.get("status", "unknown")
+            by_status[s] = by_status.get(s, 0) + 1
+        # Fixes confirmed via sentinel marker in prod
+        confirmed = [f for f in fixes if f.get("fix_outcome") == "confirmed"]
+        regressed = [f for f in fixes if f.get("fix_outcome") == "regressed"]
+        submitted = store.get_submitted_issues(user_id, hours=hours) if user_id else []
+        submitted_recent = store.get_submitted_issues(user_id, hours=hours) if user_id else []
+        return json.dumps({
+            "conversation": {
+                "messages_in_history": hist_len,
+                "turns":               hist_len // 2,
+                "last_active":         conv_updated or "no history",
+            },
+            "issues_you_submitted": {
+                "total_in_window": len(submitted_recent),
+                "all_time":        len(store.get_submitted_issues(user_id) if user_id else []),
+                "recent": [
+                    {"project": i["project"], "description": i["description"][:80],
+                     "submitted_at": i["submitted_at"]}
+                    for i in submitted_recent[:5]
+                ],
+            },
+            "window_hours": hours,
+            "errors_detected": len(errors),
+            "fixes": {
+                "applied":    by_status.get("applied", 0),
+                "pending_pr": len(prs),
+                "failed":     by_status.get("failed", 0),
+                "skipped":    by_status.get("skipped", 0),
+                "error":      by_status.get("error", 0),
+            },
+            "confirmed_in_prod":     len(confirmed),
+            "regressed_after_fix":   len(regressed),
+            "awaiting_confirmation": len(pending_conf),
+            "open_prs": [
+                {"repo": p["repo_name"], "pr_url": p["pr_url"], "timestamp": p["timestamp"]}
+                for p in prs
+            ],
+            "top_errors": [
+                {"message": e["message"][:100], "count": e["count"], "source": e["source"]}
+                for e in errors[:5]
+            ],
+        })
+    if name == "clear_my_history":
+        if user_id:
+            store.save_conversation(user_id, [])
+            logger.info("Boss: cleared conversation history for user %s", user_id)
+            return json.dumps({
+                "status":  "cleared",
+                "note":    "Your conversation history has been wiped. Next session starts fresh. [DONE]",
+            })
+        return json.dumps({"error": "cannot determine user — not clearing"})
+    # ── Admin-only tools ──────────────────────────────────────────────────────
+    _ADMIN_TOOLS = {"list_all_users", "clear_user_history", "reset_fingerprint", "list_all_errors", "export_db"}
+    if name in _ADMIN_TOOLS:
+        if not is_admin:
+            return json.dumps({"error": "Admin access required. You are not in SLACK_ADMIN_USERS."})
+        if name == "list_all_users":
+            stats = store.get_all_user_stats()
+            return json.dumps({"users": stats, "total": len(stats)})
+        if name == "clear_user_history":
+            target = inputs.get("target_user_id", "").strip()
+            if not target:
+                return json.dumps({"error": "target_user_id is required"})
+            store.save_conversation(target, [])
+            display = store.get_user_name(target)
+            logger.info("Boss admin: cleared history for user %s (%s) by admin %s", target, display, user_id)
+            return json.dumps({"status": "cleared", "target_user_id": target, "display_name": display})
+        if name == "set_maintenance":
+            repo_name = inputs.get("repo_name", "").strip()
+            note      = inputs.get("note", "").strip()
+            if not repo_name:
+                return json.dumps({"error": "repo_name is required"})
+            store.set_health_state(repo_name, "confirmed", note=note)
+            logger.info("Boss: maintenance confirmed for %s by %s (note: %s)", repo_name, user_id, note or "none")
+            return json.dumps({
+                "status": "confirmed",
+                "repo": repo_name,
+                "note": note or "none",
+                "message": (
+                    f"Got it. I'll silently monitor {repo_name}'s health URL and "
+                    f"notify you as soon as it comes back online."
+                ),
+            })
+        if name == "reset_fingerprint":
+            fp = inputs.get("fingerprint", "").strip()
+            if not fp:
+                return json.dumps({"error": "fingerprint is required"})
+            found = store.reset_fingerprint(fp)
+            logger.info("Boss admin: reset fingerprint %s by admin %s (found=%s)", fp, user_id, found)
+            return json.dumps({"status": "reset" if found else "not_found", "fingerprint": fp,
+                               "note": "Sentinel will retry this error on the next poll." if found else "No fix record found for this fingerprint."})
+        if name == "list_all_errors":
+            hours = int(inputs.get("hours", 0))
+            errors = store.get_all_errors(hours)
+            return json.dumps({"errors": errors[:100], "total": len(errors),
+                               "window_hours": hours or "all time"})
+        if name == "export_db":
+            if not slack_client or not channel:
+                return json.dumps({"error": "No Slack channel context — cannot upload file"})
+            try:
+                import sqlite3 as _sq
+                import io as _io
+                lines = []
+                with _sq.connect(store.db_path) as _db:
+                    for tbl in ["errors", "fixes", "reports", "slack_users", "conversations", "submitted_issues"]:
+                        try:
+                            rows = _db.execute(f"SELECT * FROM {tbl}").fetchall()  # noqa: S608
+                            cols = [d[0] for d in _db.execute(f"SELECT * FROM {tbl} LIMIT 0").description]  # noqa: S608
+                            lines.append(f"=== {tbl} ({len(rows)} rows) ===")
+                            lines.append("\t".join(cols))
+                            for row in rows:
+                                lines.append("\t".join(str(v) if v is not None else "" for v in row))
+                            lines.append("")
+                        except Exception:
+                            lines.append(f"=== {tbl} (unavailable) ===\n")
+                content = "\n".join(lines)
+                await slack_client.files_upload_v2(
+                    channel=channel,
+                    content=content,
+                    filename="sentinel-db-export.tsv",
+                    title="Sentinel DB Export",
+                )
+                logger.info("Boss admin: exported DB (%d bytes) by admin %s", len(content), user_id)
+                return json.dumps({"ok": True, "bytes": len(content)})
+            except Exception as e:
+                return json.dumps({"error": str(e)})
+    return json.dumps({"error": f"unknown tool: {name}"})
+# ── CLI fallback (OAuth / no API key) ────────────────────────────────────────
+def _attachments_to_text(attachments: list[dict]) -> str:
+    """Produce a plain-text summary of attachments to append to CLI prompts."""
+    if not attachments:
+        return ""
+    parts = []
+    for att in attachments:
+        if att["type"] == "text":
+            parts.append(
+                f"[Attached file: {att['name']}]\n{att['content']}"
+            )
+        elif att["type"] == "image":
+            parts.append(
+                f"[Attached image: {att['name']}] (saved at {att['path']})"
+            )
+        else:
+            parts.append(
+                f"[Attached file: {att['name']}] (saved at {att['path']} — read it if relevant)"
+            )
+    return "\n\nATTACHMENTS:\n" + "\n---\n".join(parts)
+def _attachments_to_api_blocks(attachments: list[dict]) -> list[dict]:
+    """Convert attachments into Anthropic API message content blocks."""
+    blocks: list[dict] = []
+    for att in attachments:
+        if att["type"] == "image":
+            blocks.append({
+                "type": "image",
+                "source": {
+                    "type":       "base64",
+                    "media_type": att.get("mime", "image/png"),
+                    "data":       att["content"],
+                },
+            })
+        elif att["type"] == "text":
+            blocks.append({
+                "type": "text",
+                "text": f"[Attached file: {att['name']}]\n{att['content']}",
+            })
+        else:
+            blocks.append({
+                "type": "text",
+                "text": f"[Attached file: {att['name']}] saved at {att['path']}",
+            })
+    return blocks
+_ACTION_RE = re.compile(r"^ACTION:\s*(\{.*\})", re.MULTILINE)
+async def _handle_with_cli(
+    message: str,
+    history: list,
+    cfg_loader,
+    store,
+    slack_client=None,
+    user_name: str = "",
+    user_id: str = "",
+    attachments: list | None = None,
+    is_admin: bool = False,
+) -> tuple[str, bool]:
+    """Fallback: use `claude --print` for users without an Anthropic API key."""
+    status_json = await _run_tool("get_status", {"hours": 24}, cfg_loader, store)
+    prs_json    = await _run_tool("list_pending_prs", {}, cfg_loader, store)
+    # Pre-fetch log search if the message is a search request.
+    # Use quoted strings as the query, or fall back to the full message.
+    # Never hardcode field names — the query is whatever the user said.
+    search_json = ""
+    _search_kws = ("search", "find", "look for", "show me log", "grep", "entries for")
+    if any(kw in message.lower() for kw in _search_kws):
+        quoted = re.findall(r'"([^"]+)"', message)
+        query  = quoted[0] if quoted else message
+        search_json = await _run_tool("search_logs", {"query": query}, cfg_loader, store)
+    paused      = Path("SENTINEL_PAUSE").exists()
+    repos       = list(cfg_loader.repos.keys())
+    log_sources = list(cfg_loader.log_sources.keys())
+    ts          = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    history_text = ""
+    for msg in history[-8:]:
+        role    = msg["role"].upper()
+        content = msg["content"]
+        if isinstance(content, list):
+            content = " ".join(
+                (b.get("text", "") if isinstance(b, dict) else getattr(b, "text", ""))
+                for b in content
+                if (isinstance(b, dict) and b.get("type") == "text")
+                or (hasattr(b, "type") and b.type == "text")
+            )
+        history_text += f"\n{role}: {content}"
+    slack_mention = f"<@{user_id}>" if user_id else (user_name or "")
+    known_users   = store.get_all_users()
+    users_hint    = ", ".join(f"<@{uid}> = {name}" for uid, name in known_users.items())
+    prompt = (
+        _SYSTEM
+        + (f"\nYou are speaking with: {user_name} (Slack mention: {slack_mention})" if user_name else "")
+        + "\nAlways start your reply by addressing the user directly using their Slack mention, e.g. \"<@U123> here is what I found...\"."
+        + " Never use their plain name — always use the <@USER_ID> format so Slack highlights it."
+        + (f"\nKnown Slack users: {users_hint}" if users_hint else "")
+        + f"\n\nCurrent time: {ts}"
+        + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
+        + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
+        + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
+        + f"\nAdmin access for this user: {'YES — admin tools are available' if is_admin else 'NO — admin tools will be refused'}"
+        + "\nNOTE: Running in CLI fallback mode — admin tools and some features are unavailable. Ask user to configure ANTHROPIC_API_KEY for full features."
+        + f"\n\nCurrent status (last 24 h):\n{status_json}"
+        + f"\n\nOpen PRs:\n{prs_json}"
+        + (f"\n\nLog search results:\n{search_json}" if search_json else "")
+        + (f"\n\nConversation so far:{history_text}" if history_text else "")
+        + _attachments_to_text(attachments or [])
+        + f"\n\nUSER: {message}"
+        + "\n\nIf you need to take an action, include a line like:\n"
+        + "  ACTION: {\"action\": \"pause_sentinel\"}\n"
+        + "  ACTION: {\"action\": \"resume_sentinel\"}\n"
+        + "  ACTION: {\"action\": \"trigger_poll\"}\n"
+        + "  ACTION: {\"action\": \"create_issue\", \"description\": \"...\", \"target_repo\": \"\"}\n"
+        + "  ACTION: {\"action\": \"search_logs\", \"query\": \"<whatever the user asked to find>\"}\n"
+        + "End with [DONE] if the request is fully handled."
+    )
+    cfg = cfg_loader.sentinel
+    env = os.environ.copy()
+    if cfg.anthropic_api_key:
+        env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
+    try:
+        result = subprocess.run(
+            ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
+                    if os.getuid() != 0 else
+                    [cfg.claude_code_bin, "--print", prompt]),
+            capture_output=True, text=True, timeout=180, env=env,
+        )
+        output = (result.stdout or "").strip()
+        if result.returncode != 0 or not output:
+            stderr = (result.stderr or "").strip()
+            logger.error(
+                "Boss CLI call failed (rc=%d): stdout=%r stderr=%r",
+                result.returncode, output[:200], stderr[:200],
+            )
+        raw_err = (result.stderr or "").strip()
+        if result.returncode != 0 and not output:
+            full_err = f"exit {result.returncode}: {raw_err[:300]}"
+            cfg = cfg_loader.sentinel
+            alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
+                                  "sentinel_boss/cli", raw_err or full_err)
+            return f":warning: `claude --print` failed ({full_err})", True
+    except Exception as e:
+        logger.error("Boss CLI call failed: %s", e)
+        return f":warning: Boss unavailable: {e}", True
+    for m in _ACTION_RE.finditer(output):
+        try:
+            action = json.loads(m.group(1))
+            name   = action.pop("action", "")
+            if name:
+                result_str = await _run_tool(name, action, cfg_loader, store, user_id=user_id)
+                logger.info("Boss CLI action: %s → %s", name, result_str[:80])
+        except Exception as e:
+            logger.warning("Boss action parse error: %s", e)
+    reply   = _ACTION_RE.sub("", output).strip()
+    is_done = "[DONE]" in reply
+    reply   = reply.replace("[DONE]", "").strip()
+    if not reply:
+        greeting = f"Hi {user_name}! " if user_name else "Hi! "
+        reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
+    history.append({"role": "user",      "content": message})
+    history.append({"role": "assistant", "content": reply})
+    return reply, is_done
+# ── History serialization helpers ────────────────────────────────────────────
+def _serialize_content(content) -> list:
+    """Convert Anthropic SDK response content (Pydantic objects) to plain dicts.
+    The SDK returns TextBlock / ToolUseBlock instances.  json.dumps(..., default=str)
+    turns them into useless strings like "TextBlock(type='text', text='...')".
+    This converts them to proper dicts so history round-trips through SQLite safely.
+    """
+    if not isinstance(content, list):
+        return content
+    result = []
+    for block in content:
+        if isinstance(block, dict):
+            result.append(block)
+        elif hasattr(block, "model_dump"):
+            result.append(block.model_dump())
+        elif hasattr(block, "dict"):
+            result.append(block.dict())
+        elif hasattr(block, "type"):
+            if block.type == "text":
+                result.append({"type": "text", "text": getattr(block, "text", "")})
+            elif block.type == "tool_use":
+                result.append({
+                    "type":  "tool_use",
+                    "id":    getattr(block, "id", ""),
+                    "name":  getattr(block, "name", ""),
+                    "input": getattr(block, "input", {}),
+                })
+        else:
+            result.append({"type": "text", "text": str(block)})
+    return result
+def _clean_history(history: list) -> list:
+    """Remove turns that would cause a 400 from the Anthropic API.
+    Strips orphaned tool_use blocks (assistant turn with tool_use but no
+    following tool_result turn) and consecutive same-role turns that result
+    from a previous session that crashed mid-tool-loop.
+    """
+    cleaned = []
+    i = 0
+    while i < len(history):
+        turn = history[i]
+        role    = turn.get("role", "")
+        content = turn.get("content", [])
+        # Drop assistant turns that contain tool_use if the next turn isn't tool_result
+        if role == "assistant" and isinstance(content, list):
+            has_tool_use = any(
+                (isinstance(b, dict) and b.get("type") == "tool_use")
+                for b in content
+            )
+            if has_tool_use:
+                next_turn = history[i + 1] if i + 1 < len(history) else None
+                next_content = (next_turn or {}).get("content", [])
+                has_result = isinstance(next_content, list) and any(
+                    (isinstance(b, dict) and b.get("type") == "tool_result")
+                    for b in next_content
+                )
+                if not has_result:
+                    i += 1   # skip orphaned tool_use turn
+                    continue
+        # Drop consecutive same-role turns (keep the last one)
+        if cleaned and cleaned[-1].get("role") == role:
+            cleaned[-1] = turn
+        else:
+            cleaned.append(turn)
+        i += 1
+    return cleaned
+# ── API-key path (structured tools, full agentic loop) ────────────────────────
+async def _handle_with_api(
+    message: str,
+    history: list,
+    cfg_loader,
+    store,
+    slack_client=None,
+    user_name: str = "",
+    user_id: str = "",
+    attachments: list | None = None,
+    channel: str = "",
+    is_admin: bool = False,
+) -> tuple[str, bool]:
+    import anthropic
+    api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
+    client  = anthropic.Anthropic(api_key=api_key)
+    paused         = Path("SENTINEL_PAUSE").exists()
+    repos          = list(cfg_loader.repos.keys())
+    ts             = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    known_projects = [_read_project_name(d) for d in _find_project_dirs()]
+    log_sources    = list(cfg_loader.log_sources.keys())
+    slack_mention = f"<@{user_id}>" if user_id else (user_name or "")
+    known_users   = store.get_all_users()   # {user_id: display_name}
+    users_hint    = ", ".join(f"<@{uid}> = {name}" for uid, name in known_users.items())
+    system = (
+        _SYSTEM
+        + (f"\nYou are speaking with: {user_name} (Slack mention: {slack_mention})" if user_name else "")
+        + "\nAlways start your reply by addressing the user directly using their Slack mention, e.g. \"<@U123> here is what I found...\"."
+        + " Never use their plain name — always use the <@USER_ID> format so Slack highlights it."
+        + (f"\nKnown Slack users: {users_hint}" if users_hint else "")
+        + f"\n\nCurrent time: {ts}"
+        + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
+        + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
+        + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
+        + (f"\nKnown projects in workspace: {', '.join(known_projects)}" if known_projects else "")
+        + f"\nAdmin access for this user: {'YES — admin tools are available' if is_admin else 'NO — admin tools will be refused'}"
+    )
+    # Build user content — include attachment blocks if any
+    attach_blocks = _attachments_to_api_blocks(attachments or [])
+    if attach_blocks:
+        user_content = attach_blocks + [{"type": "text", "text": message}]
+    else:
+        user_content = message
+    # Work on a local copy — only commit to history on success to prevent
+    # cascading 400s if the API rejects a malformed/corrupted history.
+    messages = list(history) + [{"role": "user", "content": user_content}]
+    while True:
+        response = client.messages.create(
+            model="claude-opus-4-6",
+            max_tokens=2048,
+            system=system,
+            tools=_TOOLS,
+            messages=messages,
+        )
+        text_parts  = []
+        tool_blocks = []
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "tool_use":
+                tool_blocks.append(block)
+        if not tool_blocks:
+            reply   = " ".join(text_parts).strip()
+            is_done = "[DONE]" in reply
+            reply   = reply.replace("[DONE]", "").strip()
+            if not reply:
+                greeting = f"Hi {user_name}! " if user_name else "Hi! "
+                reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
+            # Heuristic override: if reply ends with a question, Claude is waiting for input
+            if is_done and re.search(r'\?\s*$', reply):
+                is_done = False
+            # Commit to history only on success — serialize SDK objects to plain dicts
+            history.append({"role": "user", "content": user_content})
+            history.append({"role": "assistant", "content": _serialize_content(response.content)})
+            return reply, is_done
+        messages.append({"role": "assistant", "content": _serialize_content(response.content)})
+        tool_results = []
+        for tc in tool_blocks:
+            result = await _run_tool(tc.name, tc.input, cfg_loader, store, slack_client=slack_client, user_id=user_id, channel=channel, is_admin=is_admin)
+            logger.info("Boss tool: %s(%s) → %s", tc.name, tc.input, result[:120])
+            tool_results.append({
+                "type":        "tool_result",
+                "tool_use_id": tc.id,
+                "content":     result,
+            })
+        messages.append({"role": "user", "content": tool_results})
+# ── Main entry point ──────────────────────────────────────────────────────────
+async def handle_message(
+    message: str,
+    history: list,
+    cfg_loader,
+    store,
+    slack_client=None,
+    user_name: str = "",
+    user_id: str = "",
+    attachments: list | None = None,
+    channel: str = "",
+    is_admin: bool = False,
+) -> tuple[str, bool]:
+    """
+    Process one user message through the Sentinel Boss (Claude with tool use).
+    Priority:
+      1. Claude Pro / OAuth via `claude --print` (CLI path — no API key needed)
+      2. ANTHROPIC_API_KEY fallback (structured tools, full agentic loop)
+    Returns:
+        (reply_text, is_done)
+        is_done=True  → session complete, release the Slack queue slot.
+        is_done=False → waiting for user follow-up, keep the slot.
+    """
+    api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
+    # 1st priority: ANTHROPIC_API_KEY — full structured tools, cheap per-token for Boss queries
+    if api_key:
+        try:
+            import anthropic  # noqa: F401
+            return await _handle_with_api(
+                message, history, cfg_loader, store, slack_client=slack_client,
+                user_name=user_name, user_id=user_id, attachments=attachments, channel=channel,
+                is_admin=is_admin,
+            )
+        except Exception as api_err:
+            err_str = str(api_err)
+            # Detect rate-limit / auth failure and alert Slack before falling through
+            cfg = cfg_loader.sentinel
+            if is_rate_limited(err_str):
+                from .notify import rate_limit_message
+                alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
+                                      "sentinel_boss/api", err_str)
+            logger.warning("Boss: API key path failed (%s), trying CLI fallback", err_str)
+    # 2nd priority: Claude Pro / OAuth via CLI (limited tools but no API key needed)
+    cli_reply, cli_done = await _handle_with_cli(
+        message, history, cfg_loader, store, slack_client=slack_client, user_name=user_name,
+        user_id=user_id, attachments=attachments, is_admin=is_admin,
+    )
+    if not cli_reply.startswith(":warning:"):
+        return cli_reply, cli_done
+    # Both paths failed — alert Slack and return error
+    cfg = cfg_loader.sentinel
+    err_output = cli_reply
+    alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
+                          "sentinel_boss/cli", err_output)
+    if not api_key:
+        # No auth at all configured
+        no_auth_msg = (
+            ":warning: *Sentinel Boss — no Claude auth configured*\n"
+            "Configure at least one of:\n"
+            "• `ANTHROPIC_API_KEY` in `sentinel.properties` — full features\n"
+            "• Claude Pro OAuth: run `claude login` on the server — required for fix_engine\n"
+            "See: https://github.com/misterhuydo/Sentinel#authentication"
+        )
+        slack_alert(cfg.slack_bot_token, cfg.slack_channel, no_auth_msg)
+        return ":warning: No Claude authentication configured. See Slack for details.", True
+    return cli_reply, cli_done