@misterhuydo/sentinel 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-03-23T11:40:37.793Z",
3
- "checkpoint_at": "2026-03-23T11:40:37.794Z",
2
+ "message": "Auto-checkpoint at 2026-03-23T11:46:43.946Z",
3
+ "checkpoint_at": "2026-03-23T11:46:43.948Z",
4
4
  "active_files": [],
5
5
  "notes": [],
6
6
  "mtime_snapshot": {}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.2.7",
3
+ "version": "1.2.8",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -80,9 +80,22 @@ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers:
80
80
  f"1. {step1}",
81
81
  "2. Use your available tools to explore the codebase and identify the root cause.",
82
82
  f"3. {marker_instruction}",
83
- "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
84
- "5. Do not explain. Output only the patch.",
85
- "6. If you cannot determine a safe fix, output: SKIP: <reason>",
83
+ "4. Consider all possible fix approaches. For each, weigh:",
84
+ " - Confidence: is this definitely the root cause?",
85
+ " - Safety: could this break other functionality?",
86
+ " - Scope: is it minimal and targeted?",
87
+ " Choose the safest minimal approach. If multiple valid options exist, pick the one",
88
+ " with highest confidence and lowest blast radius.",
89
+ "5. Output ONLY a unified diff patch (git diff format) for the chosen fix.",
90
+ "6. Do not explain. Output only the patch.",
91
+ "7. Only if you truly cannot produce a safe fix — e.g. the root cause requires a",
92
+ " DB schema change, infrastructure update, business logic decision, or is inside",
93
+ " a third-party library — output exactly:",
94
+ " NEEDS_HUMAN: <explanation>",
95
+ " Include: (a) root cause identified, (b) approaches you considered and why each",
96
+ " was insufficient or unsafe, (c) exactly what a human needs to do or decide.",
97
+ " Do NOT output NEEDS_HUMAN just because the fix is complex — only when human",
98
+ " judgement or access is genuinely required.",
86
99
  ]
87
100
  return "\n".join(lines_out)
88
101
 
@@ -148,7 +161,7 @@ def generate_fix(
148
161
 
149
162
  Returns:
150
163
  (status, patch_path, marker)
151
- status: "patch" | "skip" | "error"
164
+ status: "patch" | "skip" | "needs_human" | "error"
152
165
 
153
166
  Auth strategy — API key and Claude Pro (OAuth) are interchangeable:
154
167
  Primary : Claude Pro (OAuth) if claude_pro_for_tasks=True, else API key
@@ -237,10 +250,15 @@ def generate_fix(
237
250
  output=output,
238
251
  )
239
252
 
253
+ if output.strip().upper().startswith("NEEDS_HUMAN:"):
254
+ reason = output.strip()[len("NEEDS_HUMAN:"):].strip()
255
+ logger.info("Claude needs human for %s: %s", event.fingerprint, reason[:200])
256
+ return "needs_human", None, reason
257
+
240
258
  if output.strip().upper().startswith("SKIP:"):
241
259
  reason = output.strip()[5:].strip()
242
260
  logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
243
- return "skip", None, ""
261
+ return "skip", None, reason
244
262
 
245
263
  patch = _extract_patch(output)
246
264
  if not patch:
@@ -41,6 +41,7 @@ class IssueEvent:
41
41
  fingerprint: str = ""
42
42
  severity: str = "ERROR"
43
43
  timestamp: str = ""
44
+ submitter_user_id: str = "" # Slack user ID who raised this via Boss, if known
44
45
 
45
46
  # Compatibility fields matching ErrorEvent interface
46
47
  level: str = "ERROR"
@@ -53,6 +54,13 @@ class IssueEvent:
53
54
  if not self.fingerprint:
54
55
  raw = f"issue:{self.source}:{self.message[:200]}"
55
56
  self.fingerprint = hashlib.sha1(raw.encode()).hexdigest()[:16]
57
+ if not self.submitter_user_id:
58
+ import re as _re
59
+ for _line in self.body.splitlines():
60
+ _m = _re.match(r'SUBMITTED_BY:.*\(([UW][A-Z0-9]+)\)', _line.strip())
61
+ if _m:
62
+ self.submitter_user_id = _m.group(1)
63
+ break
56
64
  if not self.timestamp:
57
65
  self.timestamp = datetime.now(timezone.utc).isoformat()
58
66
  if not self.stack_trace:
@@ -28,6 +28,7 @@ from .log_parser import parse_all, scan_all_for_markers, ErrorEvent
28
28
  from .issue_watcher import scan_issues, mark_done, IssueEvent
29
29
  from .repo_router import route
30
30
  from .reporter import build_and_send, send_fix_notification, send_failure_notification, send_confirmed_notification, send_regression_notification, send_startup_notification, send_upgrade_notification
31
+ from .notify import notify_fix_blocked
31
32
  from .health_checker import evaluate_repos
32
33
  from .state_store import StateStore
33
34
 
@@ -87,27 +88,29 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
87
88
  status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
88
89
 
89
90
  if status != "patch" or patch_path is None:
90
- outcome = "skipped" if status == "skip" else "failed"
91
+ outcome = "skipped" if status in ("skip", "needs_human") else "failed"
91
92
  store.record_fix(event.fingerprint, outcome, repo_name=repo.repo_name)
92
- send_failure_notification(sentinel, {
93
- "source": event.source,
94
- "message": event.message,
95
- "repo_name": repo.repo_name,
96
- "reason": f"Claude Code returned {status.upper()}",
97
- "body": event.full_text()[:500],
98
- })
93
+ submitter_uid = getattr(event, "submitter_user_id", "")
94
+ if status == "needs_human":
95
+ # marker holds the reason string for needs_human
96
+ notify_fix_blocked(sentinel, event.source, event.message,
97
+ reason=marker, repo_name=repo.repo_name,
98
+ submitter_user_id=submitter_uid)
99
+ else:
100
+ notify_fix_blocked(sentinel, event.source, event.message,
101
+ reason=f"Claude Code returned {status.upper()}",
102
+ repo_name=repo.repo_name,
103
+ submitter_user_id=submitter_uid)
99
104
  return
100
105
 
101
106
  commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
102
107
  if commit_status != "committed":
103
108
  store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
104
- send_failure_notification(sentinel, {
105
- "source": event.source,
106
- "message": event.message,
107
- "repo_name": repo.repo_name,
108
- "reason": "patch generated but commit/tests failed",
109
- "body": event.full_text()[:500],
110
- })
109
+ submitter_uid = getattr(event, "submitter_user_id", "")
110
+ notify_fix_blocked(sentinel, event.source, event.message,
111
+ reason="Patch was generated but commit/tests failed",
112
+ repo_name=repo.repo_name,
113
+ submitter_user_id=submitter_uid)
111
114
  return
112
115
 
113
116
  branch, pr_url = publish(event, repo, sentinel, commit_hash)
@@ -179,28 +182,29 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
179
182
  status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
180
183
 
181
184
  if status != "patch" or patch_path is None:
182
- store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed",
185
+ store.record_fix(event.fingerprint, "skipped" if status in ("skip", "needs_human") else "failed",
183
186
  repo_name=repo.repo_name)
184
- send_failure_notification(sentinel, {
185
- "source": event.source,
186
- "message": event.message,
187
- "repo_name": repo.repo_name,
188
- "reason": f"Claude Code returned {status.upper()}",
189
- "body": event.body[:500],
190
- })
187
+ submitter_uid = getattr(event, "submitter_user_id", "")
188
+ if status == "needs_human":
189
+ notify_fix_blocked(sentinel, event.source, event.message,
190
+ reason=marker, repo_name=repo.repo_name,
191
+ submitter_user_id=submitter_uid)
192
+ else:
193
+ notify_fix_blocked(sentinel, event.source, event.message,
194
+ reason=f"Claude Code returned {status.upper()}",
195
+ repo_name=repo.repo_name,
196
+ submitter_user_id=submitter_uid)
191
197
  mark_done(event.issue_file)
192
198
  return
193
199
 
194
200
  commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
195
201
  if commit_status != "committed":
196
202
  store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
197
- send_failure_notification(sentinel, {
198
- "source": event.source,
199
- "message": event.message,
200
- "repo_name": repo.repo_name,
201
- "reason": "patch generated but commit/tests failed",
202
- "body": event.body[:500],
203
- })
203
+ submitter_uid = getattr(event, "submitter_user_id", "")
204
+ notify_fix_blocked(sentinel, event.source, event.message,
205
+ reason="Patch was generated but commit/tests failed",
206
+ repo_name=repo.repo_name,
207
+ submitter_user_id=submitter_uid)
204
208
  mark_done(event.issue_file)
205
209
  return
206
210
 
@@ -1,173 +1,249 @@
1
- """
2
- notify.py — Best-effort Slack alerts from any Sentinel module.
3
-
4
- Uses the Slack Web API directly (no Bolt / Socket Mode required).
5
- Calls never raise — failures are logged and silently dropped.
6
- """
7
-
8
- import logging
9
- import re
10
- import time
11
-
12
- import requests
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- # ── Rate-limit / auth-failure detector ────────────────────────────────────────
17
-
18
- _RATE_LIMIT_RE = re.compile(
19
- r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
20
- r"|overloaded|credit.?balance|billing|529"
21
- r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
22
- r"|claude\.ai subscription|pro.?plan|login required",
23
- re.IGNORECASE,
24
- )
25
-
26
-
27
- def is_rate_limited(text: str) -> bool:
28
- """Return True if the text contains a rate-limit or auth-failure signal."""
29
- return bool(_RATE_LIMIT_RE.search(text))
30
-
31
-
32
- # ── Circuit breaker ────────────────────────────────────────────────────────────
33
- #
34
- # Prevents alert storms when Claude is persistently rate-limited.
35
- # Each `source` string gets its own independent circuit:
36
- # CLOSED → normal; alerts pass through immediately
37
- # OPEN → suppressed; one re-alert every CIRCUIT_COOLDOWN_SECONDS
38
- #
39
- # On recovery (first non-rate-limited output after OPEN):
40
- # → post "resolved" to Slack, close the circuit
41
-
42
- CIRCUIT_COOLDOWN_SECONDS = 3600 # 1 h between repeat alerts while open
43
-
44
- # source → {opened_at, last_alerted_at, count}
45
- _circuits: dict[str, dict] = {}
46
-
47
-
48
- def get_circuit_status() -> dict:
49
- """
50
- Return a snapshot of all open circuits.
51
- Used by the `check_auth_status` Boss tool.
52
-
53
- Returns:
54
- { source: { state, opened_at, open_for_seconds, alert_count } }
55
- Only open circuits are included; an empty dict means everything is healthy.
56
- """
57
- now = time.time()
58
- return {
59
- src: {
60
- "state": "open",
61
- "opened_at": c["opened_at"],
62
- "open_for_seconds": int(now - c["opened_at"]),
63
- "alert_count": c["count"],
64
- }
65
- for src, c in _circuits.items()
66
- }
67
-
68
-
69
- def _open_or_repeat(bot_token: str, channel: str, source: str, output: str) -> None:
70
- """Open circuit on first hit; re-alert after cooldown if still failing."""
71
- now = time.time()
72
- circuit = _circuits.get(source)
73
-
74
- if circuit is None:
75
- # First occurrence — open and alert immediately
76
- _circuits[source] = {"opened_at": now, "last_alerted_at": now, "count": 1}
77
- logger.error("Circuit opened for %s: %s", source, output[:200])
78
- slack_alert(bot_token, channel, rate_limit_message(source, output))
79
- return
80
-
81
- circuit["count"] += 1
82
- elapsed = now - circuit["last_alerted_at"]
83
- if elapsed >= CIRCUIT_COOLDOWN_SECONDS:
84
- # Still failing after cooldown — remind admins once per hour
85
- circuit["last_alerted_at"] = now
86
- open_mins = int((now - circuit["opened_at"]) / 60)
87
- msg = (
88
- f":warning: *Sentinel — Claude usage/auth problem still active ({source})*\n"
89
- f"Still failing after {open_mins} minutes. Total occurrences: {circuit['count']}.\n"
90
- f"Last error:\n```{output.strip()[:300]}```\n"
91
- f"Run `check_auth_status` in Slack to see the full picture."
92
- )
93
- logger.error("Circuit still open for %s (count=%d)", source, circuit["count"])
94
- slack_alert(bot_token, channel, msg)
95
- # else: within cooldown window — suppress
96
-
97
-
98
- def _close_if_open(bot_token: str, channel: str, source: str) -> None:
99
- """If circuit was open, close it and post a recovery alert."""
100
- circuit = _circuits.pop(source, None)
101
- if circuit is None:
102
- return
103
- duration_mins = int((time.time() - circuit["opened_at"]) / 60)
104
- msg = (
105
- f":white_check_mark: *Sentinel — Claude auth restored ({source})*\n"
106
- f"Fixed after {duration_mins} min. Total failures during outage: {circuit['count']}."
107
- )
108
- logger.info("Circuit closed for %s after %d min, %d failures", source, duration_mins, circuit["count"])
109
- slack_alert(bot_token, channel, msg)
110
-
111
-
112
- def rate_limit_message(source: str, raw: str) -> str:
113
- """Produce a human-readable Slack alert for a rate-limit / auth event (first occurrence)."""
114
- snippet = raw.strip()[:300].replace("\n", " ")
115
- return (
116
- f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
117
- f"Claude returned an error that requires admin attention:\n"
118
- f"```{snippet}```\n"
119
- f"*What to check:*\n"
120
- f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
121
- f"• Claude Pro: run `claude login` on the server to refresh the OAuth session\n"
122
- f"• Both: Sentinel tries both methods — at least one must be working\n"
123
- f"Repeat alerts will be suppressed for 1 hour. "
124
- f"Run `check_auth_status` in Slack to see current state."
125
- )
126
-
127
-
128
- # ── Alert dispatcher ──────────────────────────────────────────────────────────
129
-
130
- def slack_alert(bot_token: str, channel: str, text: str) -> None:
131
- """
132
- Post a plain-text alert to a Slack channel.
133
- Best-effort: logs on failure, never raises.
134
- """
135
- if not bot_token or not channel:
136
- logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
137
- return
138
- try:
139
- resp = requests.post(
140
- "https://slack.com/api/chat.postMessage",
141
- headers={
142
- "Authorization": f"Bearer {bot_token}",
143
- "Content-Type": "application/json",
144
- },
145
- json={"channel": channel, "text": text},
146
- timeout=10,
147
- )
148
- data = resp.json()
149
- if not data.get("ok"):
150
- logger.warning("slack_alert: Slack API error: %s", data.get("error"))
151
- except Exception as exc:
152
- logger.warning("slack_alert: failed to post: %s", exc)
153
-
154
-
155
- def alert_if_rate_limited(
156
- bot_token: str,
157
- channel: str,
158
- source: str,
159
- output: str,
160
- ) -> bool:
161
- """
162
- Check output for rate-limit / auth signals and manage the circuit breaker.
163
-
164
- - Rate limited → open/keep-open circuit, alert (with cooldown suppression)
165
- - Not limited → close circuit if it was open (recovery alert), return False
166
-
167
- Returns True if a rate-limit signal was found.
168
- """
169
- if not is_rate_limited(output):
170
- _close_if_open(bot_token, channel, source)
171
- return False
172
- _open_or_repeat(bot_token, channel, source, output)
173
- return True
1
+ """
2
+ notify.py — Best-effort Slack alerts from any Sentinel module.
3
+
4
+ Uses the Slack Web API directly (no Bolt / Socket Mode required).
5
+ Calls never raise — failures are logged and silently dropped.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ import time
11
+
12
+ import requests
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # ── Rate-limit / auth-failure detector ────────────────────────────────────────
17
+
18
+ _RATE_LIMIT_RE = re.compile(
19
+ r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
20
+ r"|overloaded|credit.?balance|billing|529"
21
+ r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
22
+ r"|claude\.ai subscription|pro.?plan|login required",
23
+ re.IGNORECASE,
24
+ )
25
+
26
+
27
+ def is_rate_limited(text: str) -> bool:
28
+ """Return True if the text contains a rate-limit or auth-failure signal."""
29
+ return bool(_RATE_LIMIT_RE.search(text))
30
+
31
+
32
+ # ── Circuit breaker ────────────────────────────────────────────────────────────
33
+ #
34
+ # Prevents alert storms when Claude is persistently rate-limited.
35
+ # Each `source` string gets its own independent circuit:
36
+ # CLOSED → normal; alerts pass through immediately
37
+ # OPEN → suppressed; one re-alert every CIRCUIT_COOLDOWN_SECONDS
38
+ #
39
+ # On recovery (first non-rate-limited output after OPEN):
40
+ # → post "resolved" to Slack, close the circuit
41
+
42
+ CIRCUIT_COOLDOWN_SECONDS = 3600 # 1 h between repeat alerts while open
43
+
44
+ # source → {opened_at, last_alerted_at, count}
45
+ _circuits: dict[str, dict] = {}
46
+
47
+
48
+ def get_circuit_status() -> dict:
49
+ """
50
+ Return a snapshot of all open circuits.
51
+ Used by the `check_auth_status` Boss tool.
52
+
53
+ Returns:
54
+ { source: { state, opened_at, open_for_seconds, alert_count } }
55
+ Only open circuits are included; an empty dict means everything is healthy.
56
+ """
57
+ now = time.time()
58
+ return {
59
+ src: {
60
+ "state": "open",
61
+ "opened_at": c["opened_at"],
62
+ "open_for_seconds": int(now - c["opened_at"]),
63
+ "alert_count": c["count"],
64
+ }
65
+ for src, c in _circuits.items()
66
+ }
67
+
68
+
69
+ def _open_or_repeat(bot_token: str, channel: str, source: str, output: str) -> None:
70
+ """Open circuit on first hit; re-alert after cooldown if still failing."""
71
+ now = time.time()
72
+ circuit = _circuits.get(source)
73
+
74
+ if circuit is None:
75
+ # First occurrence — open and alert immediately
76
+ _circuits[source] = {"opened_at": now, "last_alerted_at": now, "count": 1}
77
+ logger.error("Circuit opened for %s: %s", source, output[:200])
78
+ slack_alert(bot_token, channel, rate_limit_message(source, output))
79
+ return
80
+
81
+ circuit["count"] += 1
82
+ elapsed = now - circuit["last_alerted_at"]
83
+ if elapsed >= CIRCUIT_COOLDOWN_SECONDS:
84
+ # Still failing after cooldown — remind admins once per hour
85
+ circuit["last_alerted_at"] = now
86
+ open_mins = int((now - circuit["opened_at"]) / 60)
87
+ msg = (
88
+ f":warning: *Sentinel — Claude usage/auth problem still active ({source})*\n"
89
+ f"Still failing after {open_mins} minutes. Total occurrences: {circuit['count']}.\n"
90
+ f"Last error:\n```{output.strip()[:300]}```\n"
91
+ f"Run `check_auth_status` in Slack to see the full picture."
92
+ )
93
+ logger.error("Circuit still open for %s (count=%d)", source, circuit["count"])
94
+ slack_alert(bot_token, channel, msg)
95
+ # else: within cooldown window — suppress
96
+
97
+
98
+ def _close_if_open(bot_token: str, channel: str, source: str) -> None:
99
+ """If circuit was open, close it and post a recovery alert."""
100
+ circuit = _circuits.pop(source, None)
101
+ if circuit is None:
102
+ return
103
+ duration_mins = int((time.time() - circuit["opened_at"]) / 60)
104
+ msg = (
105
+ f":white_check_mark: *Sentinel — Claude auth restored ({source})*\n"
106
+ f"Fixed after {duration_mins} min. Total failures during outage: {circuit['count']}."
107
+ )
108
+ logger.info("Circuit closed for %s after %d min, %d failures", source, duration_mins, circuit["count"])
109
+ slack_alert(bot_token, channel, msg)
110
+
111
+
112
+ def rate_limit_message(source: str, raw: str) -> str:
113
+ """Produce a human-readable Slack alert for a rate-limit / auth event (first occurrence)."""
114
+ snippet = raw.strip()[:300].replace("\n", " ")
115
+ return (
116
+ f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
117
+ f"Claude returned an error that requires admin attention:\n"
118
+ f"```{snippet}```\n"
119
+ f"*What to check:*\n"
120
+ f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
121
+ f"• Claude Pro: run `claude login` on the server to refresh the OAuth session\n"
122
+ f"• Both: Sentinel tries both methods — at least one must be working\n"
123
+ f"Repeat alerts will be suppressed for 1 hour. "
124
+ f"Run `check_auth_status` in Slack to see current state."
125
+ )
126
+
127
+
128
+ # ── Alert dispatcher ──────────────────────────────────────────────────────────
129
+
130
+ def slack_alert(bot_token: str, channel: str, text: str) -> None:
131
+ """
132
+ Post a plain-text alert to a Slack channel.
133
+ Best-effort: logs on failure, never raises.
134
+ """
135
+ if not bot_token or not channel:
136
+ logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
137
+ return
138
+ try:
139
+ resp = requests.post(
140
+ "https://slack.com/api/chat.postMessage",
141
+ headers={
142
+ "Authorization": f"Bearer {bot_token}",
143
+ "Content-Type": "application/json",
144
+ },
145
+ json={"channel": channel, "text": text},
146
+ timeout=10,
147
+ )
148
+ data = resp.json()
149
+ if not data.get("ok"):
150
+ logger.warning("slack_alert: Slack API error: %s", data.get("error"))
151
+ except Exception as exc:
152
+ logger.warning("slack_alert: failed to post: %s", exc)
153
+
154
+
155
+
156
+ def slack_dm(bot_token: str, user_id: str, text: str) -> None:
157
+ """
158
+ Send a direct message to a specific Slack user.
159
+ Opens a DM channel via conversations.open, then posts.
160
+ Best-effort: logs on failure, never raises.
161
+ """
162
+ if not bot_token or not user_id:
163
+ logger.debug("slack_dm: no token/user_id — skipping DM")
164
+ return
165
+ try:
166
+ resp = requests.post(
167
+ "https://slack.com/api/conversations.open",
168
+ headers={"Authorization": f"Bearer {bot_token}", "Content-Type": "application/json"},
169
+ json={"users": user_id},
170
+ timeout=10,
171
+ )
172
+ data = resp.json()
173
+ if not data.get("ok"):
174
+ logger.warning("slack_dm: conversations.open failed: %s", data.get("error"))
175
+ return
176
+ dm_channel = data["channel"]["id"]
177
+ slack_alert(bot_token, dm_channel, text)
178
+ except Exception as exc:
179
+ logger.warning("slack_dm: failed to DM %s: %s", user_id, exc)
180
+
181
+
182
+ def notify_fix_blocked(
183
+ cfg,
184
+ source: str,
185
+ message: str,
186
+ reason: str,
187
+ repo_name: str = "",
188
+ submitter_user_id: str = "",
189
+ ) -> None:
190
+ """
191
+ Notify that a fix needs human intervention.
192
+
193
+ - If submitter_user_id is known: DM that person directly.
194
+ - Otherwise: @channel in the configured Slack channel.
195
+ - Always: email admins via reporter.send_failure_notification.
196
+ """
197
+ short_reason = (reason or "Claude could not determine a safe fix.")[:600]
198
+ repo_line = f"\n*Repo:* {repo_name}" if repo_name else ""
199
+
200
+ slack_text = (
201
+ f":hand: *Fix blocked — human intervention needed*\n"
202
+ f"*Source:* {source}\n"
203
+ f"*Issue:* {message[:200]}{repo_line}\n"
204
+ f"*Reason:*\n{short_reason}"
205
+ )
206
+
207
+ if submitter_user_id:
208
+ slack_dm(cfg.slack_bot_token, submitter_user_id, slack_text)
209
+ else:
210
+ # No known submitter — broadcast to the whole channel
211
+ slack_alert(
212
+ cfg.slack_bot_token,
213
+ cfg.slack_channel,
214
+ f"<!channel> {slack_text}",
215
+ )
216
+
217
+ # Always email admins
218
+ try:
219
+ from .reporter import send_failure_notification
220
+ send_failure_notification(cfg, {
221
+ "source": source,
222
+ "message": message,
223
+ "repo_name": repo_name,
224
+ "reason": f"Needs human intervention: {short_reason[:200]}",
225
+ "body": reason,
226
+ })
227
+ except Exception as exc:
228
+ logger.warning("notify_fix_blocked: email notification failed: %s", exc)
229
+
230
+
231
+ def alert_if_rate_limited(
232
+ bot_token: str,
233
+ channel: str,
234
+ source: str,
235
+ output: str,
236
+ ) -> bool:
237
+ """
238
+ Check output for rate-limit / auth signals and manage the circuit breaker.
239
+
240
+ - Rate limited → open/keep-open circuit, alert (with cooldown suppression)
241
+ - Not limited → close circuit if it was open (recovery alert), return False
242
+
243
+ Returns True if a rate-limit signal was found.
244
+ """
245
+ if not is_rate_limited(output):
246
+ _close_if_open(bot_token, channel, source)
247
+ return False
248
+ _open_or_repeat(bot_token, channel, source, output)
249
+ return True