npm - @misterhuydo/sentinel - Versions diffs - 1.2.7 → 1.2.9 - Mend

@misterhuydo/sentinel 1.2.7 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.cairn/session.json +2 -2
package/package.json +1 -1
package/python/sentinel/fix_engine.py +24 -5
package/python/sentinel/issue_watcher.py +8 -0
package/python/sentinel/main.py +46 -32
package/python/sentinel/notify.py +249 -173

package/.cairn/session.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "message": "Auto-checkpoint at 2026-03-23T11:40:37.793Z",
-  "checkpoint_at": "2026-03-23T11:40:37.794Z",
+  "message": "Auto-checkpoint at 2026-03-23T12:00:29.548Z",
+  "checkpoint_at": "2026-03-23T12:00:29.550Z",
   "active_files": [],
   "notes": [],
   "mtime_snapshot": {}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@misterhuydo/sentinel",
-  "version": "1.2.7",
+  "version": "1.2.9",
   "description": "Sentinel — Autonomous DevOps Agent installer and manager",
   "bin": {
     "sentinel": "./bin/sentinel.js"

package/python/sentinel/fix_engine.py CHANGED Viewed

@@ -80,9 +80,22 @@ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers:
         f"1. {step1}",
         "2. Use your available tools to explore the codebase and identify the root cause.",
         f"3. {marker_instruction}",
-        "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
-        "5. Do not explain. Output only the patch.",
-        "6. If you cannot determine a safe fix, output: SKIP: <reason>",
+        "4. Consider all possible fix approaches. For each, weigh:",
+        "   - Confidence: is this definitely the root cause?",
+        "   - Safety: could this break other functionality?",
+        "   - Scope: is it minimal and targeted?",
+        "   Choose the safest minimal approach. If multiple valid options exist, pick the one",
+        "   with highest confidence and lowest blast radius.",
+        "5. Output ONLY a unified diff patch (git diff format) for the chosen fix.",
+        "6. Do not explain. Output only the patch.",
+        "7. Only if you truly cannot produce a safe fix — e.g. the root cause requires a",
+        "   DB schema change, infrastructure update, business logic decision, or is inside",
+        "   a third-party library — output exactly:",
+        "   NEEDS_HUMAN: <explanation>",
+        "   Include: (a) root cause identified, (b) approaches you considered and why each",
+        "   was insufficient or unsafe, (c) exactly what a human needs to do or decide.",
+        "   Do NOT output NEEDS_HUMAN just because the fix is complex — only when human",
+        "   judgement or access is genuinely required.",
     ]
     return "\n".join(lines_out)
@@ -142,13 +155,14 @@ def generate_fix(
     repo: RepoConfig,
     cfg: SentinelConfig,
     patches_dir: Path,
+    store=None,
 ) -> tuple[str, Path | None, str]:
     """
     Generate a fix for the given error event.
     Returns:
         (status, patch_path, marker)
-        status: "patch" | "skip" | "error"
+        status: "patch" | "skip" | "needs_human" | "error"
     Auth strategy — API key and Claude Pro (OAuth) are interchangeable:
       Primary  : Claude Pro (OAuth) if claude_pro_for_tasks=True, else API key
@@ -237,10 +251,15 @@ def generate_fix(
         output=output,
     )
+    if output.strip().upper().startswith("NEEDS_HUMAN:"):
+        reason = output.strip()[len("NEEDS_HUMAN:"):].strip()
+        logger.info("Claude needs human for %s: %s", event.fingerprint, reason[:200])
+        return "needs_human", None, reason
     if output.strip().upper().startswith("SKIP:"):
         reason = output.strip()[5:].strip()
         logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
-        return "skip", None, ""
+        return "skip", None, reason
     patch = _extract_patch(output)
     if not patch:

package/python/sentinel/issue_watcher.py CHANGED Viewed

@@ -41,6 +41,7 @@ class IssueEvent:
     fingerprint: str = ""
     severity: str = "ERROR"
     timestamp: str = ""
+    submitter_user_id: str = ""  # Slack user ID who raised this via Boss, if known
     # Compatibility fields matching ErrorEvent interface
     level: str = "ERROR"
@@ -53,6 +54,13 @@ class IssueEvent:
         if not self.fingerprint:
             raw = f"issue:{self.source}:{self.message[:200]}"
             self.fingerprint = hashlib.sha1(raw.encode()).hexdigest()[:16]
+        if not self.submitter_user_id:
+            import re as _re
+            for _line in self.body.splitlines():
+                _m = _re.match(r'SUBMITTED_BY:.*\(([UW][A-Z0-9]+)\)', _line.strip())
+                if _m:
+                    self.submitter_user_id = _m.group(1)
+                    break
         if not self.timestamp:
             self.timestamp = datetime.now(timezone.utc).isoformat()
         if not self.stack_trace:

package/python/sentinel/main.py CHANGED Viewed

@@ -28,6 +28,7 @@ from .log_parser import parse_all, scan_all_for_markers, ErrorEvent
 from .issue_watcher import scan_issues, mark_done, IssueEvent
 from .repo_router import route
 from .reporter import build_and_send, send_fix_notification, send_failure_notification, send_confirmed_notification, send_regression_notification, send_startup_notification, send_upgrade_notification
+from .notify import notify_fix_blocked
 from .health_checker import evaluate_repos
 from .state_store import StateStore
@@ -87,15 +88,21 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
     status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
     if status != "patch" or patch_path is None:
-        outcome = "skipped" if status == "skip" else "failed"
+        outcome = "skipped" if status in ("skip", "needs_human") else "failed"
         store.record_fix(event.fingerprint, outcome, repo_name=repo.repo_name)
-        send_failure_notification(sentinel, {
-            "source":    event.source,
-            "message":   event.message,
-            "repo_name": repo.repo_name,
-            "reason":    f"Claude Code returned {status.upper()}",
-            "body":      event.full_text()[:500],
-        })
+        # For log-detected errors: NEEDS_HUMAN -> DM/channel; SKIP -> email only (not spam)
+        if status == "needs_human":
+            notify_fix_blocked(sentinel, event.source, event.message,
+                               reason=marker, repo_name=repo.repo_name,
+                               submitter_user_id="")
+        else:
+            send_failure_notification(sentinel, {
+                "source":    event.source,
+                "message":   event.message,
+                "repo_name": repo.repo_name,
+                "reason":    f"Claude Code returned {status.upper()}",
+                "body":      event.full_text()[:500],
+            })
         return
     commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
@@ -105,7 +112,7 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
             "source":    event.source,
             "message":   event.message,
             "repo_name": repo.repo_name,
-            "reason":    "patch generated but commit/tests failed",
+            "reason":    "Patch was generated but commit/tests failed",
             "body":      event.full_text()[:500],
         })
         return
@@ -179,28 +186,25 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
     status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
     if status != "patch" or patch_path is None:
-        store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed",
+        store.record_fix(event.fingerprint, "skipped" if status in ("skip", "needs_human") else "failed",
                          repo_name=repo.repo_name)
-        send_failure_notification(sentinel, {
-            "source":    event.source,
-            "message":   event.message,
-            "repo_name": repo.repo_name,
-            "reason":    f"Claude Code returned {status.upper()}",
-            "body":      event.body[:500],
-        })
+        # For user-submitted issues: always notify (person is waiting)
+        submitter_uid = getattr(event, "submitter_user_id", "")
+        reason_text = marker if status == "needs_human" else f"Claude Code returned {status.upper()}"
+        notify_fix_blocked(sentinel, event.source, event.message,
+                           reason=reason_text, repo_name=repo.repo_name,
+                           submitter_user_id=submitter_uid)
         mark_done(event.issue_file)
         return
     commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
     if commit_status != "committed":
         store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
-        send_failure_notification(sentinel, {
-            "source":    event.source,
-            "message":   event.message,
-            "repo_name": repo.repo_name,
-            "reason":    "patch generated but commit/tests failed",
-            "body":      event.body[:500],
-        })
+        submitter_uid = getattr(event, "submitter_user_id", "")
+        notify_fix_blocked(sentinel, event.source, event.message,
+                           reason="Patch was generated but commit/tests failed",
+                           repo_name=repo.repo_name,
+                           submitter_user_id=submitter_uid)
         mark_done(event.issue_file)
         return
@@ -303,21 +307,31 @@ async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
     # -- Health URL checks -------------------------------------------------------
     if cfg_loader.repos:
-        health_results = evaluate_repos(
-            cfg_loader.repos, cfg_loader.log_sources, cfg_loader.sentinel.workspace_dir,
-            store=store,
+        import asyncio as _asyncio
+        _loop = _asyncio.get_event_loop()
+        health_results = await _loop.run_in_executor(
+            None,
+            lambda: evaluate_repos(
+                cfg_loader.repos, cfg_loader.log_sources,
+                cfg_loader.sentinel.workspace_dir, store=store,
+            )
         )
         for hr in health_results:
             if hr["action"] == "fix":
                 fp = f"health-{hr['repo_name']}"
                 store.record_error(fp, f"health_checker/{hr['repo_name']}", hr["message"])
                 if not store.fix_attempted_recently(fp, hours=6):
-                    synth = ErrorEvent(
+                    from .log_parser import ErrorEvent as _EE
+                    from datetime import datetime, timezone as _tz
+                    synth = _EE(
                         source=f"health_checker/{hr['repo_name']}",
-                        severity="ERROR",
-                        message=f"App startup failure: {hr['message']}",
-                        raw_lines=[hr["startup_failure_line"]],
-                        timestamp=None,
+                        log_file="",
+                        timestamp=datetime.now(_tz.utc).isoformat(),
+                        level="ERROR",
+                        thread="health_checker",
+                        logger_name="health_checker",
+                        message=f"App startup failure detected: {hr['message']}",
+                        stack_trace=[hr["startup_failure_line"]] if hr["startup_failure_line"] else [],
                     )
                     synth.fingerprint = fp
                     await _handle_error(synth, cfg_loader, store)

package/python/sentinel/notify.py CHANGED Viewed

@@ -1,173 +1,249 @@
-"""
-notify.py — Best-effort Slack alerts from any Sentinel module.
-Uses the Slack Web API directly (no Bolt / Socket Mode required).
-Calls never raise — failures are logged and silently dropped.
-"""
-import logging
-import re
-import time
-import requests
-logger = logging.getLogger(__name__)
-# ── Rate-limit / auth-failure detector ────────────────────────────────────────
-_RATE_LIMIT_RE = re.compile(
-    r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
-    r"|overloaded|credit.?balance|billing|529"
-    r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
-    r"|claude\.ai subscription|pro.?plan|login required",
-    re.IGNORECASE,
-)
-def is_rate_limited(text: str) -> bool:
-    """Return True if the text contains a rate-limit or auth-failure signal."""
-    return bool(_RATE_LIMIT_RE.search(text))
-# ── Circuit breaker ────────────────────────────────────────────────────────────
-#
-# Prevents alert storms when Claude is persistently rate-limited.
-# Each `source` string gets its own independent circuit:
-#   CLOSED → normal; alerts pass through immediately
-#   OPEN   → suppressed; one re-alert every CIRCUIT_COOLDOWN_SECONDS
-#
-# On recovery (first non-rate-limited output after OPEN):
-#   → post "resolved" to Slack, close the circuit
-CIRCUIT_COOLDOWN_SECONDS = 3600   # 1 h between repeat alerts while open
-# source → {opened_at, last_alerted_at, count}
-_circuits: dict[str, dict] = {}
-def get_circuit_status() -> dict:
-    """
-    Return a snapshot of all open circuits.
-    Used by the `check_auth_status` Boss tool.
-    Returns:
-        { source: { state, opened_at, open_for_seconds, alert_count } }
-        Only open circuits are included; an empty dict means everything is healthy.
-    """
-    now = time.time()
-    return {
-        src: {
-            "state": "open",
-            "opened_at": c["opened_at"],
-            "open_for_seconds": int(now - c["opened_at"]),
-            "alert_count": c["count"],
-        }
-        for src, c in _circuits.items()
-    }
-def _open_or_repeat(bot_token: str, channel: str, source: str, output: str) -> None:
-    """Open circuit on first hit; re-alert after cooldown if still failing."""
-    now = time.time()
-    circuit = _circuits.get(source)
-    if circuit is None:
-        # First occurrence — open and alert immediately
-        _circuits[source] = {"opened_at": now, "last_alerted_at": now, "count": 1}
-        logger.error("Circuit opened for %s: %s", source, output[:200])
-        slack_alert(bot_token, channel, rate_limit_message(source, output))
-        return
-    circuit["count"] += 1
-    elapsed = now - circuit["last_alerted_at"]
-    if elapsed >= CIRCUIT_COOLDOWN_SECONDS:
-        # Still failing after cooldown — remind admins once per hour
-        circuit["last_alerted_at"] = now
-        open_mins = int((now - circuit["opened_at"]) / 60)
-        msg = (
-            f":warning: *Sentinel — Claude usage/auth problem still active ({source})*\n"
-            f"Still failing after {open_mins} minutes. Total occurrences: {circuit['count']}.\n"
-            f"Last error:\n```{output.strip()[:300]}```\n"
-            f"Run `check_auth_status` in Slack to see the full picture."
-        )
-        logger.error("Circuit still open for %s (count=%d)", source, circuit["count"])
-        slack_alert(bot_token, channel, msg)
-    # else: within cooldown window — suppress
-def _close_if_open(bot_token: str, channel: str, source: str) -> None:
-    """If circuit was open, close it and post a recovery alert."""
-    circuit = _circuits.pop(source, None)
-    if circuit is None:
-        return
-    duration_mins = int((time.time() - circuit["opened_at"]) / 60)
-    msg = (
-        f":white_check_mark: *Sentinel — Claude auth restored ({source})*\n"
-        f"Fixed after {duration_mins} min. Total failures during outage: {circuit['count']}."
-    )
-    logger.info("Circuit closed for %s after %d min, %d failures", source, duration_mins, circuit["count"])
-    slack_alert(bot_token, channel, msg)
-def rate_limit_message(source: str, raw: str) -> str:
-    """Produce a human-readable Slack alert for a rate-limit / auth event (first occurrence)."""
-    snippet = raw.strip()[:300].replace("\n", " ")
-    return (
-        f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
-        f"Claude returned an error that requires admin attention:\n"
-        f"```{snippet}```\n"
-        f"*What to check:*\n"
-        f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
-        f"• Claude Pro: run `claude login` on the server to refresh the OAuth session\n"
-        f"• Both: Sentinel tries both methods — at least one must be working\n"
-        f"Repeat alerts will be suppressed for 1 hour. "
-        f"Run `check_auth_status` in Slack to see current state."
-    )
-# ── Alert dispatcher ──────────────────────────────────────────────────────────
-def slack_alert(bot_token: str, channel: str, text: str) -> None:
-    """
-    Post a plain-text alert to a Slack channel.
-    Best-effort: logs on failure, never raises.
-    """
-    if not bot_token or not channel:
-        logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
-        return
-    try:
-        resp = requests.post(
-            "https://slack.com/api/chat.postMessage",
-            headers={
-                "Authorization": f"Bearer {bot_token}",
-                "Content-Type": "application/json",
-            },
-            json={"channel": channel, "text": text},
-            timeout=10,
-        )
-        data = resp.json()
-        if not data.get("ok"):
-            logger.warning("slack_alert: Slack API error: %s", data.get("error"))
-    except Exception as exc:
-        logger.warning("slack_alert: failed to post: %s", exc)
-def alert_if_rate_limited(
-    bot_token: str,
-    channel: str,
-    source: str,
-    output: str,
-) -> bool:
-    """
-    Check output for rate-limit / auth signals and manage the circuit breaker.
-    - Rate limited  → open/keep-open circuit, alert (with cooldown suppression)
-    - Not limited   → close circuit if it was open (recovery alert), return False
-    Returns True if a rate-limit signal was found.
-    """
-    if not is_rate_limited(output):
-        _close_if_open(bot_token, channel, source)
-        return False
-    _open_or_repeat(bot_token, channel, source, output)
-    return True
+"""
+notify.py — Best-effort Slack alerts from any Sentinel module.
+Uses the Slack Web API directly (no Bolt / Socket Mode required).
+Calls never raise — failures are logged and silently dropped.
+"""
+import logging
+import re
+import time
+import requests
+logger = logging.getLogger(__name__)
+# ── Rate-limit / auth-failure detector ────────────────────────────────────────
+_RATE_LIMIT_RE = re.compile(
+    r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
+    r"|overloaded|credit.?balance|billing|529"
+    r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
+    r"|claude\.ai subscription|pro.?plan|login required",
+    re.IGNORECASE,
+)
+def is_rate_limited(text: str) -> bool:
+    """Return True if the text contains a rate-limit or auth-failure signal."""
+    return bool(_RATE_LIMIT_RE.search(text))
+# ── Circuit breaker ────────────────────────────────────────────────────────────
+#
+# Prevents alert storms when Claude is persistently rate-limited.
+# Each `source` string gets its own independent circuit:
+#   CLOSED → normal; alerts pass through immediately
+#   OPEN   → suppressed; one re-alert every CIRCUIT_COOLDOWN_SECONDS
+#
+# On recovery (first non-rate-limited output after OPEN):
+#   → post "resolved" to Slack, close the circuit
+CIRCUIT_COOLDOWN_SECONDS = 3600   # 1 h between repeat alerts while open
+# source → {opened_at, last_alerted_at, count}
+_circuits: dict[str, dict] = {}
+def get_circuit_status() -> dict:
+    """
+    Return a snapshot of all open circuits.
+    Used by the `check_auth_status` Boss tool.
+    Returns:
+        { source: { state, opened_at, open_for_seconds, alert_count } }
+        Only open circuits are included; an empty dict means everything is healthy.
+    """
+    now = time.time()
+    return {
+        src: {
+            "state": "open",
+            "opened_at": c["opened_at"],
+            "open_for_seconds": int(now - c["opened_at"]),
+            "alert_count": c["count"],
+        }
+        for src, c in _circuits.items()
+    }
+def _open_or_repeat(bot_token: str, channel: str, source: str, output: str) -> None:
+    """Open circuit on first hit; re-alert after cooldown if still failing."""
+    now = time.time()
+    circuit = _circuits.get(source)
+    if circuit is None:
+        # First occurrence — open and alert immediately
+        _circuits[source] = {"opened_at": now, "last_alerted_at": now, "count": 1}
+        logger.error("Circuit opened for %s: %s", source, output[:200])
+        slack_alert(bot_token, channel, rate_limit_message(source, output))
+        return
+    circuit["count"] += 1
+    elapsed = now - circuit["last_alerted_at"]
+    if elapsed >= CIRCUIT_COOLDOWN_SECONDS:
+        # Still failing after cooldown — remind admins once per hour
+        circuit["last_alerted_at"] = now
+        open_mins = int((now - circuit["opened_at"]) / 60)
+        msg = (
+            f":warning: *Sentinel — Claude usage/auth problem still active ({source})*\n"
+            f"Still failing after {open_mins} minutes. Total occurrences: {circuit['count']}.\n"
+            f"Last error:\n```{output.strip()[:300]}```\n"
+            f"Run `check_auth_status` in Slack to see the full picture."
+        )
+        logger.error("Circuit still open for %s (count=%d)", source, circuit["count"])
+        slack_alert(bot_token, channel, msg)
+    # else: within cooldown window — suppress
+def _close_if_open(bot_token: str, channel: str, source: str) -> None:
+    """If circuit was open, close it and post a recovery alert."""
+    circuit = _circuits.pop(source, None)
+    if circuit is None:
+        return
+    duration_mins = int((time.time() - circuit["opened_at"]) / 60)
+    msg = (
+        f":white_check_mark: *Sentinel — Claude auth restored ({source})*\n"
+        f"Fixed after {duration_mins} min. Total failures during outage: {circuit['count']}."
+    )
+    logger.info("Circuit closed for %s after %d min, %d failures", source, duration_mins, circuit["count"])
+    slack_alert(bot_token, channel, msg)
+def rate_limit_message(source: str, raw: str) -> str:
+    """Produce a human-readable Slack alert for a rate-limit / auth event (first occurrence)."""
+    snippet = raw.strip()[:300].replace("\n", " ")
+    return (
+        f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
+        f"Claude returned an error that requires admin attention:\n"
+        f"```{snippet}```\n"
+        f"*What to check:*\n"
+        f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
+        f"• Claude Pro: run `claude login` on the server to refresh the OAuth session\n"
+        f"• Both: Sentinel tries both methods — at least one must be working\n"
+        f"Repeat alerts will be suppressed for 1 hour. "
+        f"Run `check_auth_status` in Slack to see current state."
+    )
+# ── Alert dispatcher ──────────────────────────────────────────────────────────
+def slack_alert(bot_token: str, channel: str, text: str) -> None:
+    """
+    Post a plain-text alert to a Slack channel.
+    Best-effort: logs on failure, never raises.
+    """
+    if not bot_token or not channel:
+        logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
+        return
+    try:
+        resp = requests.post(
+            "https://slack.com/api/chat.postMessage",
+            headers={
+                "Authorization": f"Bearer {bot_token}",
+                "Content-Type": "application/json",
+            },
+            json={"channel": channel, "text": text},
+            timeout=10,
+        )
+        data = resp.json()
+        if not data.get("ok"):
+            logger.warning("slack_alert: Slack API error: %s", data.get("error"))
+    except Exception as exc:
+        logger.warning("slack_alert: failed to post: %s", exc)
+def slack_dm(bot_token: str, user_id: str, text: str) -> None:
+    """
+    Send a direct message to a specific Slack user.
+    Opens a DM channel via conversations.open, then posts.
+    Best-effort: logs on failure, never raises.
+    """
+    if not bot_token or not user_id:
+        logger.debug("slack_dm: no token/user_id — skipping DM")
+        return
+    try:
+        resp = requests.post(
+            "https://slack.com/api/conversations.open",
+            headers={"Authorization": f"Bearer {bot_token}", "Content-Type": "application/json"},
+            json={"users": user_id},
+            timeout=10,
+        )
+        data = resp.json()
+        if not data.get("ok"):
+            logger.warning("slack_dm: conversations.open failed: %s", data.get("error"))
+            return
+        dm_channel = data["channel"]["id"]
+        slack_alert(bot_token, dm_channel, text)
+    except Exception as exc:
+        logger.warning("slack_dm: failed to DM %s: %s", user_id, exc)
+def notify_fix_blocked(
+    cfg,
+    source: str,
+    message: str,
+    reason: str,
+    repo_name: str = "",
+    submitter_user_id: str = "",
+) -> None:
+    """
+    Notify that a fix needs human intervention.
+    - If submitter_user_id is known: DM that person directly.
+    - Otherwise: @channel in the configured Slack channel.
+    - Always: email admins via reporter.send_failure_notification.
+    """
+    short_reason = (reason or "Claude could not determine a safe fix.")[:600]
+    repo_line = f"\n*Repo:* {repo_name}" if repo_name else ""
+    slack_text = (
+        f":hand: *Fix blocked — human intervention needed*\n"
+        f"*Source:* {source}\n"
+        f"*Issue:* {message[:200]}{repo_line}\n"
+        f"*Reason:*\n{short_reason}"
+    )
+    if submitter_user_id:
+        slack_dm(cfg.slack_bot_token, submitter_user_id, slack_text)
+    else:
+        # No known submitter — broadcast to the whole channel
+        slack_alert(
+            cfg.slack_bot_token,
+            cfg.slack_channel,
+            f"<!channel> {slack_text}",
+        )
+    # Always email admins
+    try:
+        from .reporter import send_failure_notification
+        send_failure_notification(cfg, {
+            "source":    source,
+            "message":   message,
+            "repo_name": repo_name,
+            "reason":    f"Needs human intervention: {short_reason[:200]}",
+            "body":      reason,
+        })
+    except Exception as exc:
+        logger.warning("notify_fix_blocked: email notification failed: %s", exc)
+def alert_if_rate_limited(
+    bot_token: str,
+    channel: str,
+    source: str,
+    output: str,
+) -> bool:
+    """
+    Check output for rate-limit / auth signals and manage the circuit breaker.
+    - Rate limited  → open/keep-open circuit, alert (with cooldown suppression)
+    - Not limited   → close circuit if it was open (recovery alert), return False
+    Returns True if a rate-limit signal was found.
+    """
+    if not is_rate_limited(output):
+        _close_if_open(bot_token, channel, source)
+        return False
+    _open_or_repeat(bot_token, channel, source, output)
+    return True