@misterhuydo/sentinel 1.0.5 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,9 +21,10 @@ from .fix_engine import generate_fix
21
21
  from .git_manager import apply_and_commit, publish
22
22
  from .cicd_trigger import trigger as cicd_trigger
23
23
  from .log_fetcher import fetch_all
24
- from .log_parser import parse_all, ErrorEvent
24
+ from .log_parser import parse_all, scan_all_for_markers, ErrorEvent
25
+ from .issue_watcher import scan_issues, mark_done, IssueEvent
25
26
  from .repo_router import route
26
- from .reporter import build_and_send, send_fix_notification
27
+ from .reporter import build_and_send, send_fix_notification, send_failure_notification, send_confirmed_notification, send_regression_notification
27
28
  from .state_store import StateStore
28
29
 
29
30
  logging.basicConfig(
@@ -80,15 +81,30 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
80
81
  return
81
82
 
82
83
  patches_dir = Path(sentinel.workspace_dir) / "patches"
83
- status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
84
+ status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
84
85
 
85
86
  if status != "patch" or patch_path is None:
86
- store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed", repo_name=repo.repo_name)
87
+ outcome = "skipped" if status == "skip" else "failed"
88
+ store.record_fix(event.fingerprint, outcome, repo_name=repo.repo_name)
89
+ send_failure_notification(sentinel, {
90
+ "source": event.source,
91
+ "message": event.message,
92
+ "repo_name": repo.repo_name,
93
+ "reason": f"Claude Code returned {status.upper()}",
94
+ "body": event.full_text()[:500],
95
+ })
87
96
  return
88
97
 
89
98
  commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
90
99
  if commit_status != "committed":
91
100
  store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
101
+ send_failure_notification(sentinel, {
102
+ "source": event.source,
103
+ "message": event.message,
104
+ "repo_name": repo.repo_name,
105
+ "reason": "patch generated but commit/tests failed",
106
+ "body": event.full_text()[:500],
107
+ })
92
108
  return
93
109
 
94
110
  branch, pr_url = publish(event, repo, sentinel, commit_hash)
@@ -100,6 +116,7 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
100
116
  branch=branch,
101
117
  pr_url=pr_url,
102
118
  repo_name=repo.repo_name,
119
+ sentinel_marker=marker,
103
120
  )
104
121
 
105
122
  send_fix_notification(sentinel, {
@@ -123,28 +140,162 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
123
140
 
124
141
  # ── Poll cycle ────────────────────────────────────────────────────────────────
125
142
 
126
- async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
127
- global _report_requested
128
143
 
129
- sources = list(cfg_loader.log_sources.values())
130
- if not sources:
131
- logger.warning("No log-configs found")
144
+ # ── Issue pipeline ────────────────────────────────────────────────────────────
145
+
146
+ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: StateStore):
147
+ """Process a single issue file from the issues/ directory."""
148
+ sentinel = cfg_loader.sentinel
149
+
150
+ if Path("SENTINEL_PAUSE").exists():
151
+ logger.info("SENTINEL_PAUSE present -- fix activity halted")
132
152
  return
133
153
 
134
- logger.info("Fetching logs from %d source(s)...", len(sources))
135
- fetched = await fetch_all(sources, cfg_loader.sentinel)
154
+ if store.fix_attempted_recently(event.fingerprint, hours=24):
155
+ logger.debug("Issue already processed recently: %s", event.source)
156
+ mark_done(event.issue_file)
157
+ return
158
+
159
+ # Route: explicit TARGET_REPO in file > single-repo shortcut > warn and leave
160
+ if event.target_repo:
161
+ repo = cfg_loader.repos.get(event.target_repo)
162
+ if not repo:
163
+ logger.warning("TARGET_REPO %r not found in config -- leaving %s for admin",
164
+ event.target_repo, event.source)
165
+ return
166
+ elif len(cfg_loader.repos) == 1:
167
+ repo = next(iter(cfg_loader.repos.values()))
168
+ else:
169
+ logger.warning(
170
+ "Cannot auto-route %s -- add 'TARGET_REPO: <repo>' as first line in the file",
171
+ event.source,
172
+ )
173
+ return # Leave the file so admin can add the header
174
+
175
+ patches_dir = Path(sentinel.workspace_dir) / "patches"
176
+ status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
177
+
178
+ if status != "patch" or patch_path is None:
179
+ store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed",
180
+ repo_name=repo.repo_name)
181
+ send_failure_notification(sentinel, {
182
+ "source": event.source,
183
+ "message": event.message,
184
+ "repo_name": repo.repo_name,
185
+ "reason": f"Claude Code returned {status.upper()}",
186
+ "body": event.body[:500],
187
+ })
188
+ mark_done(event.issue_file)
189
+ return
190
+
191
+ commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
192
+ if commit_status != "committed":
193
+ store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
194
+ send_failure_notification(sentinel, {
195
+ "source": event.source,
196
+ "message": event.message,
197
+ "repo_name": repo.repo_name,
198
+ "reason": "patch generated but commit/tests failed",
199
+ "body": event.body[:500],
200
+ })
201
+ mark_done(event.issue_file)
202
+ return
203
+
204
+ branch, pr_url = publish(event, repo, sentinel, commit_hash)
205
+ store.record_fix(
206
+ event.fingerprint,
207
+ "applied" if repo.auto_publish else "pending",
208
+ patch_path=str(patch_path),
209
+ commit_hash=commit_hash,
210
+ branch=branch,
211
+ pr_url=pr_url,
212
+ repo_name=repo.repo_name,
213
+ sentinel_marker=marker,
214
+ )
215
+ send_fix_notification(sentinel, {
216
+ "source": event.source,
217
+ "severity": "ERROR",
218
+ "fingerprint": event.fingerprint,
219
+ "first_seen": event.timestamp,
220
+ "message": event.message,
221
+ "stack_trace": event.body,
222
+ "repo_name": repo.repo_name,
223
+ "commit_hash": commit_hash,
224
+ "branch": branch,
225
+ "pr_url": pr_url,
226
+ "auto_publish": repo.auto_publish,
227
+ "files_changed": [],
228
+ })
229
+ mark_done(event.issue_file)
230
+
231
+ if repo.auto_publish:
232
+ cicd_trigger(repo, store, event.fingerprint)
233
+
136
234
 
137
- events = parse_all(fetched, cfg_loader.log_sources)
138
- logger.info("Parsed %d error/warn events", len(events))
235
+ async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
236
+ global _report_requested
237
+ events: list = []
238
+ fetched: dict = {}
139
239
 
140
- new_events = []
141
- for event in events:
142
- store.record_error(event.fingerprint, event.source, event.message)
143
- if not store.fix_attempted_recently(event.fingerprint):
144
- new_events.append(event)
240
+ # ── Log sources (optional) ────────────────────────────────────────────────
241
+ sources = list(cfg_loader.log_sources.values())
242
+ if sources:
243
+ logger.info("Fetching logs from %d source(s)...", len(sources))
244
+ fetched = await fetch_all(sources, cfg_loader.sentinel)
245
+ events = parse_all(fetched, cfg_loader.log_sources)
246
+ logger.info("Parsed %d error/warn events", len(events))
247
+
248
+ new_events = []
249
+ for event in events:
250
+ store.record_error(event.fingerprint, event.source, event.message)
251
+ if not store.fix_attempted_recently(event.fingerprint):
252
+ new_events.append(event)
253
+
254
+ if new_events:
255
+ logger.info("%d new log event(s) to process", len(new_events))
256
+ await asyncio.gather(
257
+ *[_handle_error(e, cfg_loader, store) for e in new_events],
258
+ return_exceptions=True,
259
+ )
145
260
 
146
- logger.info("%d new event(s) to process", len(new_events))
147
- await asyncio.gather(*[_handle_error(e, cfg_loader, store) for e in new_events], return_exceptions=True)
261
+ # ── SENTINEL marker scanning (phase 1: record first seen in prod logs) ────
262
+ if sources and fetched:
263
+ for marker in set(scan_all_for_markers(fetched)):
264
+ fix = store.mark_marker_seen(marker)
265
+ if fix:
266
+ logger.info("Marker seen in production: %s repo=%s — quiet period started",
267
+ marker, fix.get("repo_name"))
268
+
269
+ # ── Regression detection (error recurred before quiet period elapsed) ──────
270
+ if sources:
271
+ for event in events:
272
+ pending = store.get_marker_seen_fix(event.fingerprint)
273
+ if pending:
274
+ logger.warning("Regression: %s recurred after marker seen", event.fingerprint)
275
+ store.mark_regressed(event.fingerprint)
276
+ send_regression_notification(cfg_loader.sentinel, pending, {
277
+ "source": event.source,
278
+ "message": event.message,
279
+ "body": event.full_text()[:500],
280
+ })
281
+
282
+ # ── Phase 2: confirm fixes whose quiet period has elapsed ────────────────
283
+ quiet_hours = cfg_loader.sentinel.marker_confirm_hours
284
+ for fix in store.get_fixes_pending_confirmation(quiet_hours):
285
+ confirmed = store.confirm_fix(fix["fingerprint"])
286
+ if confirmed:
287
+ logger.info("Fix confirmed after %dh quiet period: %s repo=%s",
288
+ quiet_hours, fix["fingerprint"], fix.get("repo_name"))
289
+ send_confirmed_notification(cfg_loader.sentinel, confirmed)
290
+
291
+ # ── Issues directory (always checked) ────────────────────────────────────
292
+ issues = scan_issues(Path("."))
293
+ if issues:
294
+ logger.info("%d issue file(s) found in issues/", len(issues))
295
+ await asyncio.gather(
296
+ *[_handle_issue(e, cfg_loader, store) for e in issues],
297
+ return_exceptions=True,
298
+ )
148
299
 
149
300
  if cfg_loader.sentinel.send_health and (_report_requested or _report_due(cfg_loader, store)):
150
301
  _report_requested = False
@@ -216,6 +367,7 @@ def main():
216
367
  Path("logs").mkdir(exist_ok=True)
217
368
  Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
218
369
  Path("workspace/patches").mkdir(parents=True, exist_ok=True)
370
+ Path("issues").mkdir(exist_ok=True)
219
371
 
220
372
  parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
221
373
  parser.add_argument("--init", action="store_true", help="First-time setup")
@@ -136,3 +136,139 @@ def _age(ts_str: str) -> str:
136
136
  return f"{int(delta.total_seconds() // 60)}m" if hours < 1 else f"{hours}h"
137
137
  except Exception:
138
138
  return "?"
139
+
140
+ def send_failure_notification(cfg: SentinelConfig, details: dict):
141
+ """
142
+ Notify admins when Claude Code cannot fix a problem (from logs or issues/).
143
+
144
+ details dict keys: source, message, repo_name, reason, body
145
+ """
146
+ if not cfg.mails:
147
+ return
148
+
149
+ source = details.get('source', 'unknown')
150
+ repo_name = details.get('repo_name', 'unknown')
151
+ reason = details.get('reason', 'unknown')
152
+ message = details.get('message', '')
153
+ body = details.get('body', '')[:1000]
154
+ ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
155
+
156
+ subject = f'[Sentinel] UNRESOLVED ({source}): {message[:80]}'
157
+
158
+ ctx_html = f'<h3>Context</h3><pre>{body}</pre>' if body else ''
159
+ html = (
160
+ '<!DOCTYPE html><html><head><meta charset="utf-8">'
161
+ '<style>'
162
+ 'body{font-family:Arial,sans-serif;font-size:14px;color:#222}'
163
+ 'h2{color:#c62828}'
164
+ 'h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}'
165
+ 'table{border-collapse:collapse;width:100%;margin-bottom:16px}'
166
+ 'th{background:#f1f3f4;text-align:left;padding:6px 10px}'
167
+ 'td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}'
168
+ '.label{font-weight:bold;width:160px}'
169
+ '.mono{font-family:monospace;font-size:12px}'
170
+ 'pre{background:#f8f8f8;border:1px solid #ddd;padding:10px;font-size:12px;white-space:pre-wrap}'
171
+ '</style></head><body>'
172
+ '<h2>&#x26A0; Sentinel could not fix this issue</h2>'
173
+ f'<p><strong>{repo_name}</strong> &middot; {ts}</p>'
174
+ '<h3>Details</h3>'
175
+ '<table>'
176
+ f'<tr><td class="label">Source</td><td class="mono">{source}</td></tr>'
177
+ f'<tr><td class="label">Repository</td><td class="mono">{repo_name}</td></tr>'
178
+ f'<tr><td class="label">Message</td><td class="mono">{message}</td></tr>'
179
+ f'<tr><td class="label">Reason</td><td>{reason}</td></tr>'
180
+ '</table>'
181
+ + ctx_html +
182
+ '<hr><small>Sentinel &mdash; Autonomous DevOps Agent</small>'
183
+ '</body></html>'
184
+ )
185
+
186
+ _send_email(cfg, subject, html)
187
+ logger.info('Failure notification sent for %s', source)
188
+
189
+
190
+
191
+ # ---- Confirmed fix notification ----------------------------------------------
192
+
193
+ def send_confirmed_notification(cfg: SentinelConfig, fix: dict):
194
+ """Notify admins that a fix has been confirmed running in production."""
195
+ if not cfg.mails:
196
+ return
197
+ repo_name = fix.get('repo_name', 'unknown')
198
+ fingerprint = fix.get('fingerprint', '')
199
+ marker = fix.get('sentinel_marker', '')
200
+ ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
201
+ subject = f'[Sentinel] ✅ Fix confirmed in production: {repo_name} ({fingerprint[:8]})'
202
+ html = (
203
+ '<!DOCTYPE html><html><head><meta charset="utf-8">'
204
+ '<style>'
205
+ 'body{font-family:Arial,sans-serif;font-size:14px;color:#222}'
206
+ 'h2{color:#2e7d32}'
207
+ 'table{border-collapse:collapse;width:100%;margin-bottom:16px}'
208
+ 'th{background:#f1f3f4;text-align:left;padding:6px 10px}'
209
+ 'td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}'
210
+ '.label{font-weight:bold;width:160px}'
211
+ '.mono{font-family:monospace;font-size:12px}'
212
+ '</style></head><body>'
213
+ '<h2>✅ Fix confirmed running in production</h2>'
214
+ f'<p><strong>{repo_name}</strong> &middot; {ts}</p>'
215
+ '<table>'
216
+ f'<tr><td class="label">Fingerprint</td><td class="mono">{fingerprint}</td></tr>'
217
+ f'<tr><td class="label">Sentinel marker</td><td class="mono">{marker}</td></tr>'
218
+ f'<tr><td class="label">Commit</td><td class="mono">{fix.get("commit_hash", "")}</td></tr>'
219
+ f'<tr><td class="label">Branch</td><td class="mono">{fix.get("branch", "")}</td></tr>'
220
+ f'<tr><td class="label">Confirmed at</td><td>{fix.get("confirmed_at", ts)}</td></tr>'
221
+ '</table>'
222
+ '<p>The marker log line was detected in production logs, confirming the fix is live and the fixed code path executed.</p>'
223
+ '<hr><small>Sentinel &mdash; Autonomous DevOps Agent</small>'
224
+ '</body></html>'
225
+ )
226
+ _send_email(cfg, subject, html)
227
+ logger.info('Confirmed notification sent for %s', fingerprint)
228
+
229
+
230
+ # ---- Regression notification ------------------------------------------------
231
+
232
+ def send_regression_notification(cfg: SentinelConfig, fix: dict, event: dict):
233
+ """Notify admins that a confirmed fix did not resolve the issue."""
234
+ if not cfg.mails:
235
+ return
236
+ repo_name = fix.get('repo_name', 'unknown')
237
+ fingerprint = fix.get('fingerprint', '')
238
+ ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
239
+ subject = f'[Sentinel] ⚠ Regression: fix did not resolve issue in {repo_name}'
240
+ html = (
241
+ '<!DOCTYPE html><html><head><meta charset="utf-8">'
242
+ '<style>'
243
+ 'body{font-family:Arial,sans-serif;font-size:14px;color:#222}'
244
+ 'h2{color:#c62828}'
245
+ 'h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}'
246
+ 'table{border-collapse:collapse;width:100%;margin-bottom:16px}'
247
+ 'th{background:#f1f3f4;text-align:left;padding:6px 10px}'
248
+ 'td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}'
249
+ '.label{font-weight:bold;width:160px}'
250
+ '.mono{font-family:monospace;font-size:12px}'
251
+ 'pre{background:#f8f8f8;border:1px solid #ddd;padding:10px;font-size:12px;white-space:pre-wrap}'
252
+ '</style></head><body>'
253
+ '<h2>⚠ Regression detected &mdash; fix did not resolve the issue</h2>'
254
+ f'<p><strong>{repo_name}</strong> &middot; {ts}</p>'
255
+ '<p>The original error recurred in production logs after the Sentinel fix was confirmed deployed.</p>'
256
+ '<h3>Fix Details</h3>'
257
+ '<table>'
258
+ f'<tr><td class="label">Fingerprint</td><td class="mono">{fingerprint}</td></tr>'
259
+ f'<tr><td class="label">Commit</td><td class="mono">{fix.get("commit_hash", "")}</td></tr>'
260
+ f'<tr><td class="label">Branch</td><td class="mono">{fix.get("branch", "")}</td></tr>'
261
+ f'<tr><td class="label">Confirmed at</td><td>{fix.get("confirmed_at", "")}</td></tr>'
262
+ '</table>'
263
+ '<h3>Recurring Error</h3>'
264
+ '<table>'
265
+ f'<tr><td class="label">Source</td><td class="mono">{event.get("source", "")}</td></tr>'
266
+ f'<tr><td class="label">Message</td><td class="mono">{event.get("message", "")}</td></tr>'
267
+ '</table>'
268
+ f'<pre>{event.get("body", "")}</pre>'
269
+ '<p>Sentinel will not attempt another automatic fix. Please investigate manually.</p>'
270
+ '<hr><small>Sentinel &mdash; Autonomous DevOps Agent</small>'
271
+ '</body></html>'
272
+ )
273
+ _send_email(cfg, subject, html)
274
+ logger.info('Regression notification sent for %s', fingerprint)