@misterhuydo/sentinel 1.2.4 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cairn/.hint-lock +1 -1
- package/.cairn/session.json +2 -2
- package/package.json +21 -21
- package/python/sentinel/config_loader.py +14 -0
- package/python/sentinel/fix_engine.py +259 -242
- package/python/sentinel/health_checker.py +219 -0
- package/python/sentinel/log_syncer.py +164 -0
- package/python/sentinel/main.py +62 -0
- package/python/sentinel/sentinel_boss.py +2406 -2143
- package/python/sentinel/state_store.py +542 -499
package/.cairn/.hint-lock
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2026-03-
|
|
1
|
+
2026-03-23T11:11:25.885Z
|
package/.cairn/session.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"message": "Auto-checkpoint at 2026-03-
|
|
3
|
-
"checkpoint_at": "2026-03-
|
|
2
|
+
"message": "Auto-checkpoint at 2026-03-23T11:34:06.855Z",
|
|
3
|
+
"checkpoint_at": "2026-03-23T11:34:06.857Z",
|
|
4
4
|
"active_files": [],
|
|
5
5
|
"notes": [],
|
|
6
6
|
"mtime_snapshot": {}
|
package/package.json
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@misterhuydo/sentinel",
|
|
3
|
-
"version": "1.2.
|
|
4
|
-
"description": "Sentinel — Autonomous DevOps Agent installer and manager",
|
|
5
|
-
"bin": {
|
|
6
|
-
"sentinel": "./bin/sentinel.js"
|
|
7
|
-
},
|
|
8
|
-
"scripts": {
|
|
9
|
-
"prepublishOnly": "node scripts/bundle.js"
|
|
10
|
-
},
|
|
11
|
-
"dependencies": {
|
|
12
|
-
"chalk": "^4.1.2",
|
|
13
|
-
"fs-extra": "^11.2.0",
|
|
14
|
-
"prompts": "^2.4.2"
|
|
15
|
-
},
|
|
16
|
-
"engines": {
|
|
17
|
-
"node": ">=16"
|
|
18
|
-
},
|
|
19
|
-
"author": "misterhuydo",
|
|
20
|
-
"license": "MIT"
|
|
21
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@misterhuydo/sentinel",
|
|
3
|
+
"version": "1.2.6",
|
|
4
|
+
"description": "Sentinel — Autonomous DevOps Agent installer and manager",
|
|
5
|
+
"bin": {
|
|
6
|
+
"sentinel": "./bin/sentinel.js"
|
|
7
|
+
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"prepublishOnly": "node scripts/bundle.js"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"chalk": "^4.1.2",
|
|
13
|
+
"fs-extra": "^11.2.0",
|
|
14
|
+
"prompts": "^2.4.2"
|
|
15
|
+
},
|
|
16
|
+
"engines": {
|
|
17
|
+
"node": ">=16"
|
|
18
|
+
},
|
|
19
|
+
"author": "misterhuydo",
|
|
20
|
+
"license": "MIT"
|
|
21
|
+
}
|
|
@@ -69,6 +69,10 @@ class SentinelConfig:
|
|
|
69
69
|
# Claude Pro / OAuth — used by fix_engine + ask_codebase when CLAUDE_PRO_FOR_TASKS=true
|
|
70
70
|
# At least one must be configured. Both = ideal split (Boss=API key, heavy tasks=Pro).
|
|
71
71
|
claude_pro_for_tasks: bool = True # when True + API key set, fix_engine/ask_codebase use claude CLI (Pro billing)
|
|
72
|
+
sync_enabled: bool = True
|
|
73
|
+
sync_interval_seconds: int = 300
|
|
74
|
+
sync_retention_days: int = 30 # delete synced log files older than this many days
|
|
75
|
+
sync_max_file_mb: int = 200 # truncate synced log files exceeding this size (MB)
|
|
72
76
|
|
|
73
77
|
|
|
74
78
|
@dataclass
|
|
@@ -85,8 +89,10 @@ class LogSourceConfig:
|
|
|
85
89
|
tail: Optional[int] = None
|
|
86
90
|
head: Optional[int] = None
|
|
87
91
|
# Cloudflare
|
|
92
|
+
target_repo: str = "auto" # explicit repo name, or "auto" for stack-trace routing
|
|
88
93
|
cf_url: str = ""
|
|
89
94
|
cf_token: str = ""
|
|
95
|
+
sync_enabled: bool = True
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
@dataclass
|
|
@@ -98,6 +104,7 @@ class RepoConfig:
|
|
|
98
104
|
auto_publish: bool = False
|
|
99
105
|
cicd_type: str = ""
|
|
100
106
|
cicd_job_url: str = ""
|
|
107
|
+
health_url: str = "" # optional: HTTP endpoint returning {"Status": "true"}
|
|
101
108
|
cicd_token: str = ""
|
|
102
109
|
|
|
103
110
|
|
|
@@ -166,6 +173,10 @@ class ConfigLoader:
|
|
|
166
173
|
c.slack_admin_users = _csv(d.get("SLACK_ADMIN_USERS", ""))
|
|
167
174
|
c.project_name = d.get("PROJECT_NAME", "")
|
|
168
175
|
c.claude_pro_for_tasks = d.get("CLAUDE_PRO_FOR_TASKS", "true").lower() != "false"
|
|
176
|
+
c.sync_enabled = d.get("SYNC_ENABLED", "true").lower() != "false"
|
|
177
|
+
c.sync_interval_seconds = int(d.get("SYNC_INTERVAL_SECONDS", 300))
|
|
178
|
+
c.sync_retention_days = int(d.get("SYNC_RETENTION_DAYS", 30))
|
|
179
|
+
c.sync_max_file_mb = int(d.get("SYNC_MAX_FILE_MB", 200))
|
|
169
180
|
self.sentinel = c
|
|
170
181
|
|
|
171
182
|
def _load_log_sources(self):
|
|
@@ -190,6 +201,8 @@ class ConfigLoader:
|
|
|
190
201
|
s.head = int(d["HEAD"]) if "HEAD" in d else None
|
|
191
202
|
s.cf_url = d.get("CF_URL", "")
|
|
192
203
|
s.cf_token = d.get("CF_TOKEN", "")
|
|
204
|
+
s.target_repo = d.get("TARGET_REPO", "auto")
|
|
205
|
+
s.sync_enabled = d.get("SYNC_ENABLED", "true").lower() != "false"
|
|
193
206
|
self.log_sources[s.name] = s
|
|
194
207
|
|
|
195
208
|
def _load_repos(self):
|
|
@@ -210,6 +223,7 @@ class ConfigLoader:
|
|
|
210
223
|
r.cicd_type = d.get("CICD_TYPE", "")
|
|
211
224
|
r.cicd_job_url = d.get("CICD_JOB_URL", "")
|
|
212
225
|
r.cicd_token = d.get("CICD_TOKEN", "")
|
|
226
|
+
r.health_url = d.get("HEALTH_URL", "")
|
|
213
227
|
self.repos[r.repo_name] = r
|
|
214
228
|
|
|
215
229
|
def _register_sighup(self):
|
|
@@ -1,242 +1,259 @@
|
|
|
1
|
-
"""
|
|
2
|
-
fix_engine.py — Generate code fixes via Claude Code (headless).
|
|
3
|
-
|
|
4
|
-
Invokes: claude --print "<prompt>" 2>&1
|
|
5
|
-
|
|
6
|
-
Cairn MCP context is fetched automatically by Claude Code via its MCP tool
|
|
7
|
-
connection — Sentinel does not need to query or inject it explicitly.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import logging
|
|
11
|
-
import re
|
|
12
|
-
import subprocess
|
|
13
|
-
import textwrap
|
|
14
|
-
from pathlib import Path
|
|
15
|
-
|
|
16
|
-
from .config_loader import RepoConfig, SentinelConfig
|
|
17
|
-
from .log_parser import ErrorEvent
|
|
18
|
-
from .notify import alert_if_rate_limited, slack_alert
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
SUBPROCESS_TIMEOUT = 120
|
|
23
|
-
MAX_FILES_IN_PATCH = 5
|
|
24
|
-
MAX_LINES_IN_PATCH = 200
|
|
25
|
-
|
|
26
|
-
_DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
|
|
27
|
-
_DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
|
|
31
|
-
if log_file and log_file.exists():
|
|
32
|
-
ctx = (
|
|
33
|
-
"LOG FILE: " + str(log_file) + "\n"
|
|
34
|
-
"Read this file first -- it contains the last 48h of logs from "
|
|
35
|
-
+ event.source + ".\n"
|
|
36
|
-
"Use it to understand frequency, context, and preceding warnings."
|
|
37
|
-
)
|
|
38
|
-
step1 = "Read the log file above to understand what led up to this error."
|
|
39
|
-
else:
|
|
40
|
-
ctx = (
|
|
41
|
-
"SOURCE: " + event.source + "\n"
|
|
42
|
-
"No rolling log file available. The full issue description is below."
|
|
43
|
-
)
|
|
44
|
-
step1 = "Use the issue description above as your primary context."
|
|
45
|
-
|
|
46
|
-
marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
|
|
47
|
-
marker_instruction = "\n".join([
|
|
48
|
-
"For EVERY method and constructor you modify, add this as the FIRST executable line:",
|
|
49
|
-
f' Java/Kotlin : log.info("{marker_label}");',
|
|
50
|
-
f' Python : logger.info("{marker_label}")',
|
|
51
|
-
f' Node.js : logger.info("{marker_label}")',
|
|
52
|
-
"Use the logger already present in the file. Do not add new imports.",
|
|
53
|
-
"This applies to ALL modified methods and constructors without exception.",
|
|
54
|
-
])
|
|
55
|
-
|
|
56
|
-
cleanup = ""
|
|
57
|
-
if stale_markers:
|
|
58
|
-
marker_list = "\n".join(f" - {m}" for m in stale_markers)
|
|
59
|
-
cleanup = (
|
|
60
|
-
"CLEANUP (do this first, before the fix):\n"
|
|
61
|
-
"Remove any log lines containing these stale Sentinel markers from the codebase:\n"
|
|
62
|
-
+ marker_list + "\n"
|
|
63
|
-
"Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
lines_out = [
|
|
67
|
-
f"You are fixing a production bug in the repository at {repo.local_path}.",
|
|
68
|
-
f"Repository: {repo.repo_name}",
|
|
69
|
-
"",
|
|
70
|
-
]
|
|
71
|
-
if cleanup:
|
|
72
|
-
lines_out += [cleanup, ""]
|
|
73
|
-
lines_out += [
|
|
74
|
-
ctx,
|
|
75
|
-
"",
|
|
76
|
-
f"ISSUE TO FIX (from {event.source}):",
|
|
77
|
-
event.full_text(),
|
|
78
|
-
"",
|
|
79
|
-
"Task:",
|
|
80
|
-
f"1. {step1}",
|
|
81
|
-
"2. Use your available tools to explore the codebase and identify the root cause.",
|
|
82
|
-
f"3. {marker_instruction}",
|
|
83
|
-
"4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
|
|
84
|
-
"5. Do not explain. Output only the patch.",
|
|
85
|
-
"6. If you cannot determine a safe fix, output: SKIP: <reason>",
|
|
86
|
-
]
|
|
87
|
-
return "\n".join(lines_out)
|
|
88
|
-
|
|
89
|
-
def _validate_patch(patch: str) -> tuple[bool, str]:
|
|
90
|
-
files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
|
|
91
|
-
lines_changed = len([
|
|
92
|
-
l for l in patch.splitlines()
|
|
93
|
-
if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
|
|
94
|
-
])
|
|
95
|
-
if files_changed > MAX_FILES_IN_PATCH:
|
|
96
|
-
return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
|
|
97
|
-
if lines_changed > MAX_LINES_IN_PATCH:
|
|
98
|
-
return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
|
|
99
|
-
return True, ""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
_AUTH_ERROR_HINTS = (
|
|
103
|
-
"not logged in", "please run claude login", "authentication failed",
|
|
104
|
-
"api key is not set", "invalid x-api-key", "unauthorized", "please authenticate",
|
|
105
|
-
"unauthenticated", "auth_required", "no auth", "login required",
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def _is_auth_error(output: str) -> bool:
|
|
110
|
-
low = output.lower()
|
|
111
|
-
return any(hint in low for hint in _AUTH_ERROR_HINTS)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def _claude_cmd(bin_path: str, prompt: str) -> list[str]:
|
|
115
|
-
import os as _os
|
|
116
|
-
try:
|
|
117
|
-
skip = _os.getuid() != 0
|
|
118
|
-
except AttributeError:
|
|
119
|
-
skip = True # Windows — always pass flag
|
|
120
|
-
if skip:
|
|
121
|
-
return [bin_path, "--dangerously-skip-permissions", "--print", prompt]
|
|
122
|
-
return [bin_path, "--print", prompt]
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def _run_claude_attempt(bin_path: str, prompt: str, env: dict) -> tuple[str, bool]:
|
|
126
|
-
"""
|
|
127
|
-
Run claude CLI with the given env. Returns (output, timed_out).
|
|
128
|
-
Raises FileNotFoundError if binary is missing.
|
|
129
|
-
"""
|
|
130
|
-
try:
|
|
131
|
-
result = subprocess.run(
|
|
132
|
-
_claude_cmd(bin_path, prompt),
|
|
133
|
-
capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
|
|
134
|
-
)
|
|
135
|
-
return (result.stdout or "") + (result.stderr or ""), False
|
|
136
|
-
except subprocess.TimeoutExpired:
|
|
137
|
-
return "", True
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def generate_fix(
|
|
141
|
-
event: ErrorEvent,
|
|
142
|
-
repo: RepoConfig,
|
|
143
|
-
cfg: SentinelConfig,
|
|
144
|
-
patches_dir: Path,
|
|
145
|
-
) -> tuple[str, Path | None, str]:
|
|
146
|
-
"""
|
|
147
|
-
Generate a fix for the given error event.
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
(status, patch_path, marker)
|
|
151
|
-
status: "patch" | "skip" | "error"
|
|
152
|
-
|
|
153
|
-
Auth strategy — API key and Claude Pro (OAuth) are interchangeable:
|
|
154
|
-
Primary : Claude Pro (OAuth) if claude_pro_for_tasks=True, else API key
|
|
155
|
-
Fallback : the other method, if primary fails with an auth error
|
|
156
|
-
On total auth failure: notify Slack admins + email report recipients
|
|
157
|
-
"""
|
|
158
|
-
import os as _os
|
|
159
|
-
|
|
160
|
-
marker = f"sentinel-{event.fingerprint[:8]}"
|
|
161
|
-
log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
|
|
162
|
-
if not log_file.exists():
|
|
163
|
-
log_file = None
|
|
164
|
-
prompt = _build_prompt(event, repo, log_file, marker)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
1
|
+
"""
|
|
2
|
+
fix_engine.py — Generate code fixes via Claude Code (headless).
|
|
3
|
+
|
|
4
|
+
Invokes: claude --print "<prompt>" 2>&1
|
|
5
|
+
|
|
6
|
+
Cairn MCP context is fetched automatically by Claude Code via its MCP tool
|
|
7
|
+
connection — Sentinel does not need to query or inject it explicitly.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
import textwrap
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from .config_loader import RepoConfig, SentinelConfig
|
|
17
|
+
from .log_parser import ErrorEvent
|
|
18
|
+
from .notify import alert_if_rate_limited, slack_alert
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
SUBPROCESS_TIMEOUT = 120
|
|
23
|
+
MAX_FILES_IN_PATCH = 5
|
|
24
|
+
MAX_LINES_IN_PATCH = 200
|
|
25
|
+
|
|
26
|
+
_DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
|
|
27
|
+
_DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
|
|
31
|
+
if log_file and log_file.exists():
|
|
32
|
+
ctx = (
|
|
33
|
+
"LOG FILE: " + str(log_file) + "\n"
|
|
34
|
+
"Read this file first -- it contains the last 48h of logs from "
|
|
35
|
+
+ event.source + ".\n"
|
|
36
|
+
"Use it to understand frequency, context, and preceding warnings."
|
|
37
|
+
)
|
|
38
|
+
step1 = "Read the log file above to understand what led up to this error."
|
|
39
|
+
else:
|
|
40
|
+
ctx = (
|
|
41
|
+
"SOURCE: " + event.source + "\n"
|
|
42
|
+
"No rolling log file available. The full issue description is below."
|
|
43
|
+
)
|
|
44
|
+
step1 = "Use the issue description above as your primary context."
|
|
45
|
+
|
|
46
|
+
marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
|
|
47
|
+
marker_instruction = "\n".join([
|
|
48
|
+
"For EVERY method and constructor you modify, add this as the FIRST executable line:",
|
|
49
|
+
f' Java/Kotlin : log.info("{marker_label}");',
|
|
50
|
+
f' Python : logger.info("{marker_label}")',
|
|
51
|
+
f' Node.js : logger.info("{marker_label}")',
|
|
52
|
+
"Use the logger already present in the file. Do not add new imports.",
|
|
53
|
+
"This applies to ALL modified methods and constructors without exception.",
|
|
54
|
+
])
|
|
55
|
+
|
|
56
|
+
cleanup = ""
|
|
57
|
+
if stale_markers:
|
|
58
|
+
marker_list = "\n".join(f" - {m}" for m in stale_markers)
|
|
59
|
+
cleanup = (
|
|
60
|
+
"CLEANUP (do this first, before the fix):\n"
|
|
61
|
+
"Remove any log lines containing these stale Sentinel markers from the codebase:\n"
|
|
62
|
+
+ marker_list + "\n"
|
|
63
|
+
"Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
lines_out = [
|
|
67
|
+
f"You are fixing a production bug in the repository at {repo.local_path}.",
|
|
68
|
+
f"Repository: {repo.repo_name}",
|
|
69
|
+
"",
|
|
70
|
+
]
|
|
71
|
+
if cleanup:
|
|
72
|
+
lines_out += [cleanup, ""]
|
|
73
|
+
lines_out += [
|
|
74
|
+
ctx,
|
|
75
|
+
"",
|
|
76
|
+
f"ISSUE TO FIX (from {event.source}):",
|
|
77
|
+
event.full_text(),
|
|
78
|
+
"",
|
|
79
|
+
"Task:",
|
|
80
|
+
f"1. {step1}",
|
|
81
|
+
"2. Use your available tools to explore the codebase and identify the root cause.",
|
|
82
|
+
f"3. {marker_instruction}",
|
|
83
|
+
"4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
|
|
84
|
+
"5. Do not explain. Output only the patch.",
|
|
85
|
+
"6. If you cannot determine a safe fix, output: SKIP: <reason>",
|
|
86
|
+
]
|
|
87
|
+
return "\n".join(lines_out)
|
|
88
|
+
|
|
89
|
+
def _validate_patch(patch: str) -> tuple[bool, str]:
|
|
90
|
+
files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
|
|
91
|
+
lines_changed = len([
|
|
92
|
+
l for l in patch.splitlines()
|
|
93
|
+
if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
|
|
94
|
+
])
|
|
95
|
+
if files_changed > MAX_FILES_IN_PATCH:
|
|
96
|
+
return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
|
|
97
|
+
if lines_changed > MAX_LINES_IN_PATCH:
|
|
98
|
+
return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
|
|
99
|
+
return True, ""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
_AUTH_ERROR_HINTS = (
|
|
103
|
+
"not logged in", "please run claude login", "authentication failed",
|
|
104
|
+
"api key is not set", "invalid x-api-key", "unauthorized", "please authenticate",
|
|
105
|
+
"unauthenticated", "auth_required", "no auth", "login required",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _is_auth_error(output: str) -> bool:
|
|
110
|
+
low = output.lower()
|
|
111
|
+
return any(hint in low for hint in _AUTH_ERROR_HINTS)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _claude_cmd(bin_path: str, prompt: str) -> list[str]:
|
|
115
|
+
import os as _os
|
|
116
|
+
try:
|
|
117
|
+
skip = _os.getuid() != 0
|
|
118
|
+
except AttributeError:
|
|
119
|
+
skip = True # Windows — always pass flag
|
|
120
|
+
if skip:
|
|
121
|
+
return [bin_path, "--dangerously-skip-permissions", "--print", prompt]
|
|
122
|
+
return [bin_path, "--print", prompt]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _run_claude_attempt(bin_path: str, prompt: str, env: dict) -> tuple[str, bool]:
|
|
126
|
+
"""
|
|
127
|
+
Run claude CLI with the given env. Returns (output, timed_out).
|
|
128
|
+
Raises FileNotFoundError if binary is missing.
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
result = subprocess.run(
|
|
132
|
+
_claude_cmd(bin_path, prompt),
|
|
133
|
+
capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
|
|
134
|
+
)
|
|
135
|
+
return (result.stdout or "") + (result.stderr or ""), False
|
|
136
|
+
except subprocess.TimeoutExpired:
|
|
137
|
+
return "", True
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def generate_fix(
|
|
141
|
+
event: ErrorEvent,
|
|
142
|
+
repo: RepoConfig,
|
|
143
|
+
cfg: SentinelConfig,
|
|
144
|
+
patches_dir: Path,
|
|
145
|
+
) -> tuple[str, Path | None, str]:
|
|
146
|
+
"""
|
|
147
|
+
Generate a fix for the given error event.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
(status, patch_path, marker)
|
|
151
|
+
status: "patch" | "skip" | "error"
|
|
152
|
+
|
|
153
|
+
Auth strategy — API key and Claude Pro (OAuth) are interchangeable:
|
|
154
|
+
Primary : Claude Pro (OAuth) if claude_pro_for_tasks=True, else API key
|
|
155
|
+
Fallback : the other method, if primary fails with an auth error
|
|
156
|
+
On total auth failure: notify Slack admins + email report recipients
|
|
157
|
+
"""
|
|
158
|
+
import os as _os
|
|
159
|
+
|
|
160
|
+
marker = f"sentinel-{event.fingerprint[:8]}"
|
|
161
|
+
log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
|
|
162
|
+
if not log_file.exists():
|
|
163
|
+
log_file = None
|
|
164
|
+
prompt = _build_prompt(event, repo, log_file, marker)
|
|
165
|
+
|
|
166
|
+
# -- Cross-source dedup: skip if fingerprint already fixed in recent git commits ------
|
|
167
|
+
if repo.local_path:
|
|
168
|
+
import subprocess as _sp
|
|
169
|
+
try:
|
|
170
|
+
git_result = _sp.run(
|
|
171
|
+
["git", "log", "--oneline", "-50", f"--grep={event.fingerprint[:8]}"],
|
|
172
|
+
cwd=repo.local_path, capture_output=True, text=True, timeout=15,
|
|
173
|
+
)
|
|
174
|
+
if git_result.returncode == 0 and git_result.stdout.strip():
|
|
175
|
+
logger.info(
|
|
176
|
+
"fix_engine: fingerprint %s already in recent git commits — skipping: %s",
|
|
177
|
+
event.fingerprint[:8], git_result.stdout.splitlines()[0],
|
|
178
|
+
)
|
|
179
|
+
return "skip", None, ""
|
|
180
|
+
except Exception as _e:
|
|
181
|
+
logger.debug("fix_engine: git log check failed: %s", _e)
|
|
182
|
+
|
|
183
|
+
logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
|
|
184
|
+
|
|
185
|
+
base_env = _os.environ.copy()
|
|
186
|
+
api_env = {**base_env, "ANTHROPIC_API_KEY": cfg.anthropic_api_key} if cfg.anthropic_api_key else None
|
|
187
|
+
oauth_env = base_env # relies on cached `claude login` session — no key injected
|
|
188
|
+
|
|
189
|
+
# Choose primary/fallback order based on config
|
|
190
|
+
if cfg.claude_pro_for_tasks and cfg.anthropic_api_key:
|
|
191
|
+
attempts = [("Claude Pro (OAuth)", oauth_env), ("API key", api_env)]
|
|
192
|
+
elif cfg.claude_pro_for_tasks:
|
|
193
|
+
attempts = [("Claude Pro (OAuth)", oauth_env)]
|
|
194
|
+
elif cfg.anthropic_api_key:
|
|
195
|
+
attempts = [("API key", api_env), ("Claude Pro (OAuth)", oauth_env)]
|
|
196
|
+
else:
|
|
197
|
+
attempts = [("Claude Pro (OAuth)", oauth_env)]
|
|
198
|
+
|
|
199
|
+
output = ""
|
|
200
|
+
try:
|
|
201
|
+
for label, env in attempts:
|
|
202
|
+
if env is None:
|
|
203
|
+
continue
|
|
204
|
+
logger.info("fix_engine: trying %s for %s", label, event.fingerprint)
|
|
205
|
+
output, timed_out = _run_claude_attempt(cfg.claude_code_bin, prompt, env)
|
|
206
|
+
if timed_out:
|
|
207
|
+
logger.error("Claude Code timed out for %s", event.fingerprint)
|
|
208
|
+
return "error", None, ""
|
|
209
|
+
if not _is_auth_error(output):
|
|
210
|
+
break
|
|
211
|
+
logger.warning("fix_engine: %s auth error for %s — trying next method", label, event.fingerprint)
|
|
212
|
+
else:
|
|
213
|
+
# All attempts failed with auth errors
|
|
214
|
+
msg = (
|
|
215
|
+
":warning: *Sentinel — Fix Engine auth failure*\n"
|
|
216
|
+
f"Both API key and Claude Pro (OAuth) failed authentication for `{event.fingerprint}`.\n"
|
|
217
|
+
"• Check that `ANTHROPIC_API_KEY` is valid, or run `claude login` to refresh the OAuth session."
|
|
218
|
+
)
|
|
219
|
+
logger.error("fix_engine: all auth methods failed for %s", event.fingerprint)
|
|
220
|
+
slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
|
|
221
|
+
return "error", None, ""
|
|
222
|
+
except FileNotFoundError:
|
|
223
|
+
msg = (
|
|
224
|
+
f":warning: *Sentinel — Claude CLI not found*\n"
|
|
225
|
+
f"`{cfg.claude_code_bin}` not found. Run: `npm install -g @anthropic-ai/claude-code`\n"
|
|
226
|
+
f"Fix engine is disabled until this is resolved."
|
|
227
|
+
)
|
|
228
|
+
logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
|
|
229
|
+
slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
|
|
230
|
+
return "error", None, ""
|
|
231
|
+
|
|
232
|
+
# Alert Slack immediately on rate-limit — never stay silent
|
|
233
|
+
alert_if_rate_limited(
|
|
234
|
+
cfg.slack_bot_token,
|
|
235
|
+
cfg.slack_channel,
|
|
236
|
+
source=f"fix_engine/{event.fingerprint}",
|
|
237
|
+
output=output,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
if output.strip().upper().startswith("SKIP:"):
|
|
241
|
+
reason = output.strip()[5:].strip()
|
|
242
|
+
logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
|
|
243
|
+
return "skip", None, ""
|
|
244
|
+
|
|
245
|
+
patch = _extract_patch(output)
|
|
246
|
+
if not patch:
|
|
247
|
+
logger.warning("No patch found in Claude output for %s", event.fingerprint)
|
|
248
|
+
return "error", None, ""
|
|
249
|
+
|
|
250
|
+
ok, reason = _validate_patch(patch)
|
|
251
|
+
if not ok:
|
|
252
|
+
logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
|
|
253
|
+
return "skip", None, ""
|
|
254
|
+
|
|
255
|
+
patches_dir.mkdir(parents=True, exist_ok=True)
|
|
256
|
+
patch_path = patches_dir / f"{event.fingerprint}.diff"
|
|
257
|
+
patch_path.write_text(patch, encoding="utf-8")
|
|
258
|
+
logger.info("Patch written to %s", patch_path)
|
|
259
|
+
return "patch", patch_path, marker
|