@misterhuydo/sentinel 1.0.77 → 1.0.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,160 +1,177 @@
1
- """
2
- fix_engine.py — Generate code fixes via Claude Code (headless).
3
-
4
- Invokes: claude --print "<prompt>" 2>&1
5
-
6
- Cairn MCP context is fetched automatically by Claude Code via its MCP tool
7
- connection — Sentinel does not need to query or inject it explicitly.
8
- """
9
-
10
- import logging
11
- import re
12
- import subprocess
13
- import textwrap
14
- from pathlib import Path
15
-
16
- from .config_loader import RepoConfig, SentinelConfig
17
- from .log_parser import ErrorEvent
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
- SUBPROCESS_TIMEOUT = 120
22
- MAX_FILES_IN_PATCH = 5
23
- MAX_LINES_IN_PATCH = 200
24
-
25
- _DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
26
- _DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
27
-
28
-
29
- def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
30
- if log_file and log_file.exists():
31
- ctx = (
32
- "LOG FILE: " + str(log_file) + "\n"
33
- "Read this file first -- it contains the last 48h of logs from "
34
- + event.source + ".\n"
35
- "Use it to understand frequency, context, and preceding warnings."
36
- )
37
- step1 = "Read the log file above to understand what led up to this error."
38
- else:
39
- ctx = (
40
- "SOURCE: " + event.source + "\n"
41
- "No rolling log file available. The full issue description is below."
42
- )
43
- step1 = "Use the issue description above as your primary context."
44
-
45
- marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
46
- marker_instruction = "\n".join([
47
- "For EVERY method and constructor you modify, add this as the FIRST executable line:",
48
- f' Java/Kotlin : log.info("{marker_label}");',
49
- f' Python : logger.info("{marker_label}")',
50
- f' Node.js : logger.info("{marker_label}")',
51
- "Use the logger already present in the file. Do not add new imports.",
52
- "This applies to ALL modified methods and constructors without exception.",
53
- ])
54
-
55
- cleanup = ""
56
- if stale_markers:
57
- marker_list = "\n".join(f" - {m}" for m in stale_markers)
58
- cleanup = (
59
- "CLEANUP (do this first, before the fix):\n"
60
- "Remove any log lines containing these stale Sentinel markers from the codebase:\n"
61
- + marker_list + "\n"
62
- "Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
63
- )
64
-
65
- lines_out = [
66
- f"You are fixing a production bug in the repository at {repo.local_path}.",
67
- f"Repository: {repo.repo_name}",
68
- "",
69
- ]
70
- if cleanup:
71
- lines_out += [cleanup, ""]
72
- lines_out += [
73
- ctx,
74
- "",
75
- f"ISSUE TO FIX (from {event.source}):",
76
- event.full_text(),
77
- "",
78
- "Task:",
79
- f"1. {step1}",
80
- "2. Use your available tools to explore the codebase and identify the root cause.",
81
- f"3. {marker_instruction}",
82
- "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
83
- "5. Do not explain. Output only the patch.",
84
- "6. If you cannot determine a safe fix, output: SKIP: <reason>",
85
- ]
86
- return "\n".join(lines_out)
87
-
88
- def _validate_patch(patch: str) -> tuple[bool, str]:
89
- files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
90
- lines_changed = len([
91
- l for l in patch.splitlines()
92
- if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
93
- ])
94
- if files_changed > MAX_FILES_IN_PATCH:
95
- return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
96
- if lines_changed > MAX_LINES_IN_PATCH:
97
- return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
98
- return True, ""
99
-
100
-
101
- def generate_fix(
102
- event: ErrorEvent,
103
- repo: RepoConfig,
104
- cfg: SentinelConfig,
105
- patches_dir: Path,
106
- ) -> tuple[str, Path | None]:
107
- """
108
- Generate a fix for the given error event.
109
-
110
- Returns:
111
- (status, patch_path)
112
- status: "patch" | "skip" | "error"
113
- """
114
- # Issues have source like "issues/filename" — no rolling log file exists
115
- log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
116
- if not log_file.exists():
117
- log_file = None
118
- prompt = _build_prompt(event, repo, log_file)
119
-
120
- logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
121
- import os as _os
122
- env = _os.environ.copy()
123
- if cfg.anthropic_api_key:
124
- env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
125
- try:
126
- result = subprocess.run(
127
- ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
128
- if os.getuid() != 0 else
129
- [cfg.claude_code_bin, "--print", prompt]),
130
- capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
131
- )
132
- except subprocess.TimeoutExpired:
133
- logger.error("Claude Code timed out for %s", event.fingerprint)
134
- return "error", None, ""
135
- except FileNotFoundError:
136
- logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
137
- return "error", None, ""
138
-
139
- output = (result.stdout or "") + (result.stderr or "")
140
-
141
- if output.strip().upper().startswith("SKIP:"):
142
- reason = output.strip()[5:].strip()
143
- logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
144
- return "skip", None, ""
145
-
146
- patch = _extract_patch(output)
147
- if not patch:
148
- logger.warning("No patch found in Claude output for %s", event.fingerprint)
149
- return "error", None, ""
150
-
151
- ok, reason = _validate_patch(patch)
152
- if not ok:
153
- logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
154
- return "skip", None, ""
155
-
156
- patches_dir.mkdir(parents=True, exist_ok=True)
157
- patch_path = patches_dir / f"{event.fingerprint}.diff"
158
- patch_path.write_text(patch, encoding="utf-8")
159
- logger.info("Patch written to %s", patch_path)
160
- return "patch", patch_path, marker
1
+ """
2
+ fix_engine.py — Generate code fixes via Claude Code (headless).
3
+
4
+ Invokes: claude --print "<prompt>" 2>&1
5
+
6
+ Cairn MCP context is fetched automatically by Claude Code via its MCP tool
7
+ connection — Sentinel does not need to query or inject it explicitly.
8
+ """
9
+
10
+ import logging
11
+ import re
12
+ import subprocess
13
+ import textwrap
14
+ from pathlib import Path
15
+
16
+ from .config_loader import RepoConfig, SentinelConfig
17
+ from .log_parser import ErrorEvent
18
+ from .notify import alert_if_rate_limited, slack_alert
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ SUBPROCESS_TIMEOUT = 120
23
+ MAX_FILES_IN_PATCH = 5
24
+ MAX_LINES_IN_PATCH = 200
25
+
26
+ _DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
27
+ _DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
28
+
29
+
30
+ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
31
+ if log_file and log_file.exists():
32
+ ctx = (
33
+ "LOG FILE: " + str(log_file) + "\n"
34
+ "Read this file first -- it contains the last 48h of logs from "
35
+ + event.source + ".\n"
36
+ "Use it to understand frequency, context, and preceding warnings."
37
+ )
38
+ step1 = "Read the log file above to understand what led up to this error."
39
+ else:
40
+ ctx = (
41
+ "SOURCE: " + event.source + "\n"
42
+ "No rolling log file available. The full issue description is below."
43
+ )
44
+ step1 = "Use the issue description above as your primary context."
45
+
46
+ marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
47
+ marker_instruction = "\n".join([
48
+ "For EVERY method and constructor you modify, add this as the FIRST executable line:",
49
+ f' Java/Kotlin : log.info("{marker_label}");',
50
+ f' Python : logger.info("{marker_label}")',
51
+ f' Node.js : logger.info("{marker_label}")',
52
+ "Use the logger already present in the file. Do not add new imports.",
53
+ "This applies to ALL modified methods and constructors without exception.",
54
+ ])
55
+
56
+ cleanup = ""
57
+ if stale_markers:
58
+ marker_list = "\n".join(f" - {m}" for m in stale_markers)
59
+ cleanup = (
60
+ "CLEANUP (do this first, before the fix):\n"
61
+ "Remove any log lines containing these stale Sentinel markers from the codebase:\n"
62
+ + marker_list + "\n"
63
+ "Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
64
+ )
65
+
66
+ lines_out = [
67
+ f"You are fixing a production bug in the repository at {repo.local_path}.",
68
+ f"Repository: {repo.repo_name}",
69
+ "",
70
+ ]
71
+ if cleanup:
72
+ lines_out += [cleanup, ""]
73
+ lines_out += [
74
+ ctx,
75
+ "",
76
+ f"ISSUE TO FIX (from {event.source}):",
77
+ event.full_text(),
78
+ "",
79
+ "Task:",
80
+ f"1. {step1}",
81
+ "2. Use your available tools to explore the codebase and identify the root cause.",
82
+ f"3. {marker_instruction}",
83
+ "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
84
+ "5. Do not explain. Output only the patch.",
85
+ "6. If you cannot determine a safe fix, output: SKIP: <reason>",
86
+ ]
87
+ return "\n".join(lines_out)
88
+
89
+ def _validate_patch(patch: str) -> tuple[bool, str]:
90
+ files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
91
+ lines_changed = len([
92
+ l for l in patch.splitlines()
93
+ if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
94
+ ])
95
+ if files_changed > MAX_FILES_IN_PATCH:
96
+ return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
97
+ if lines_changed > MAX_LINES_IN_PATCH:
98
+ return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
99
+ return True, ""
100
+
101
+
102
+ def generate_fix(
103
+ event: ErrorEvent,
104
+ repo: RepoConfig,
105
+ cfg: SentinelConfig,
106
+ patches_dir: Path,
107
+ ) -> tuple[str, Path | None]:
108
+ """
109
+ Generate a fix for the given error event.
110
+
111
+ Returns:
112
+ (status, patch_path)
113
+ status: "patch" | "skip" | "error"
114
+ """
115
+ # Issues have source like "issues/filename" no rolling log file exists
116
+ log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
117
+ if not log_file.exists():
118
+ log_file = None
119
+ prompt = _build_prompt(event, repo, log_file)
120
+
121
+ logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
122
+ import os as _os
123
+ env = _os.environ.copy()
124
+ # Inject API key only when Claude Pro is NOT preferred for tasks
125
+ # (when claude_pro_for_tasks=True and API key is set, let claude CLI use OAuth/Pro)
126
+ if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
127
+ env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
128
+ try:
129
+ result = subprocess.run(
130
+ ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
131
+ if os.getuid() != 0 else
132
+ [cfg.claude_code_bin, "--print", prompt]),
133
+ capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
134
+ )
135
+ except subprocess.TimeoutExpired:
136
+ logger.error("Claude Code timed out for %s", event.fingerprint)
137
+ return "error", None, ""
138
+ except FileNotFoundError:
139
+ msg = (
140
+ f":warning: *Sentinel — Claude CLI not found*\n"
141
+ f"`{cfg.claude_code_bin}` not found. Run: `npm install -g @anthropic-ai/claude-code`\n"
142
+ f"Fix engine is disabled until this is resolved."
143
+ )
144
+ logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
145
+ slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
146
+ return "error", None, ""
147
+
148
+ output = (result.stdout or "") + (result.stderr or "")
149
+
150
+ # Alert Slack immediately on rate-limit / auth failure — never stay silent
151
+ alert_if_rate_limited(
152
+ cfg.slack_bot_token,
153
+ cfg.slack_channel,
154
+ source=f"fix_engine/{event.fingerprint}",
155
+ output=output,
156
+ )
157
+
158
+ if output.strip().upper().startswith("SKIP:"):
159
+ reason = output.strip()[5:].strip()
160
+ logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
161
+ return "skip", None, ""
162
+
163
+ patch = _extract_patch(output)
164
+ if not patch:
165
+ logger.warning("No patch found in Claude output for %s", event.fingerprint)
166
+ return "error", None, ""
167
+
168
+ ok, reason = _validate_patch(patch)
169
+ if not ok:
170
+ logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
171
+ return "skip", None, ""
172
+
173
+ patches_dir.mkdir(parents=True, exist_ok=True)
174
+ patch_path = patches_dir / f"{event.fingerprint}.diff"
175
+ patch_path.write_text(patch, encoding="utf-8")
176
+ logger.info("Patch written to %s", patch_path)
177
+ return "patch", patch_path, marker
@@ -541,10 +541,45 @@ async def _upgrade_check_loop(cfg_loader: ConfigLoader):
541
541
 
542
542
  # ── Entry point ──────────────────────────────────────────────────────────────────────────────────
543
543
 
544
+ def _log_auth_status(cfg: SentinelConfig) -> None:
545
+ """Log Claude auth configuration at startup and post to Slack if nothing is configured."""
546
+ has_api_key = bool(cfg.anthropic_api_key)
547
+ has_claude_bin = bool(shutil.which(cfg.claude_code_bin))
548
+ pro_for_tasks = cfg.claude_pro_for_tasks
549
+
550
+ if has_api_key and pro_for_tasks:
551
+ logger.info(
552
+ "Claude auth: API key ✓ (Boss) + Claude Pro preferred for Fix Engine/Ask Codebase. "
553
+ "Run `claude login` if not already authenticated."
554
+ )
555
+ elif has_api_key and not pro_for_tasks:
556
+ logger.info(
557
+ "Claude auth: API key ✓ (Boss + Fix Engine). "
558
+ "CLAUDE_PRO_FOR_TASKS=false — all tasks billed to API quota."
559
+ )
560
+ elif not has_api_key and has_claude_bin:
561
+ logger.warning(
562
+ "Claude auth: no ANTHROPIC_API_KEY — Boss will use CLI fallback (limited tools). "
563
+ "Fix Engine uses Claude Pro via `claude` CLI."
564
+ )
565
+ else:
566
+ msg = (
567
+ ":warning: *Sentinel — no Claude authentication configured*\n"
568
+ "Sentinel needs at least one of:\n"
569
+ "• `ANTHROPIC_API_KEY` in `sentinel.properties` — full Boss tools, API billing\n"
570
+ "• Claude Pro OAuth: run `claude login` on the server — required for Fix Engine\n"
571
+ "See the auth section in your workspace `sentinel.properties` for guidance."
572
+ )
573
+ logger.error("Claude auth: NOTHING configured — Boss and Fix Engine will fail!")
574
+ from .notify import slack_alert
575
+ slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
576
+
577
+
544
578
  async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
545
579
  interval = cfg_loader.sentinel.poll_interval_seconds
546
580
  logger.info("Sentinel starting — poll interval: %ds, repos: %s",
547
581
  interval, list(cfg_loader.repos.keys()))
582
+ _log_auth_status(cfg_loader.sentinel)
548
583
 
549
584
  results = await _startup_checks(cfg_loader)
550
585
 
@@ -0,0 +1,88 @@
1
+ """
2
+ notify.py — Best-effort Slack alerts from any Sentinel module.
3
+
4
+ Uses the Slack Web API directly (no Bolt / Socket Mode required).
5
+ Calls never raise — failures are logged and silently dropped.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+
11
+ import requests
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # ── Rate-limit / auth-failure detector ────────────────────────────────────────
16
+
17
+ _RATE_LIMIT_RE = re.compile(
18
+ r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
19
+ r"|overloaded|credit.?balance|billing|529"
20
+ r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
21
+ r"|claude\.ai subscription|pro.?plan|login required",
22
+ re.IGNORECASE,
23
+ )
24
+
25
+
26
+ def is_rate_limited(text: str) -> bool:
27
+ """Return True if the text contains a rate-limit or auth-failure signal."""
28
+ return bool(_RATE_LIMIT_RE.search(text))
29
+
30
+
31
+ def rate_limit_message(source: str, raw: str) -> str:
32
+ """Produce a human-readable Slack alert for a rate-limit event."""
33
+ snippet = raw.strip()[:300].replace("\n", " ")
34
+ return (
35
+ f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
36
+ f"Claude returned an error that requires admin attention:\n"
37
+ f"```{snippet}```\n"
38
+ f"*What to check:*\n"
39
+ f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
40
+ f"• Claude Pro: run `claude login` on the server to refresh OAuth\n"
41
+ f"• Both: at least one auth method must be working\n"
42
+ f"Sentinel will retry on the next poll cycle."
43
+ )
44
+
45
+
46
+ # ── Alert dispatcher ──────────────────────────────────────────────────────────
47
+
48
+ def slack_alert(bot_token: str, channel: str, text: str) -> None:
49
+ """
50
+ Post a plain-text alert to a Slack channel.
51
+ Best-effort: logs on failure, never raises.
52
+ """
53
+ if not bot_token or not channel:
54
+ logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
55
+ return
56
+ try:
57
+ resp = requests.post(
58
+ "https://slack.com/api/chat.postMessage",
59
+ headers={
60
+ "Authorization": f"Bearer {bot_token}",
61
+ "Content-Type": "application/json",
62
+ },
63
+ json={"channel": channel, "text": text},
64
+ timeout=10,
65
+ )
66
+ data = resp.json()
67
+ if not data.get("ok"):
68
+ logger.warning("slack_alert: Slack API error: %s", data.get("error"))
69
+ except Exception as exc:
70
+ logger.warning("slack_alert: failed to post: %s", exc)
71
+
72
+
73
+ def alert_if_rate_limited(
74
+ bot_token: str,
75
+ channel: str,
76
+ source: str,
77
+ output: str,
78
+ ) -> bool:
79
+ """
80
+ Check output for rate-limit / auth signals.
81
+ If found, post a Slack alert and return True.
82
+ """
83
+ if not is_rate_limited(output):
84
+ return False
85
+ msg = rate_limit_message(source, output)
86
+ logger.error("Claude rate-limit/auth failure in %s: %s", source, output[:200])
87
+ slack_alert(bot_token, channel, msg)
88
+ return True