@misterhuydo/sentinel 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,223 +1,355 @@
1
- """
2
- main.py — Sentinel entry point and watch loop.
3
-
4
- Usage:
5
- python -m sentinel.main # run watch loop
6
- python -m sentinel.main --init # first-time setup
7
- """
8
-
9
- import argparse
10
- import asyncio
11
- import logging
12
- import signal
13
- import subprocess
14
- import sys
15
- from datetime import datetime, timezone
16
- from pathlib import Path
17
-
18
- from .cairn_client import ensure_installed as cairn_installed, index_repo
19
- from .config_loader import ConfigLoader
20
- from .fix_engine import generate_fix
21
- from .git_manager import apply_and_commit, publish
22
- from .cicd_trigger import trigger as cicd_trigger
23
- from .log_fetcher import fetch_all
24
- from .log_parser import parse_all, ErrorEvent
25
- from .repo_router import route
26
- from .reporter import build_and_send
27
- from .state_store import StateStore
28
-
29
- logging.basicConfig(
30
- level=logging.INFO,
31
- format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
32
- handlers=[
33
- logging.StreamHandler(sys.stdout),
34
- logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
35
- ],
36
- )
37
- logger = logging.getLogger("sentinel")
38
-
39
- _report_requested = False
40
-
41
-
42
- def _on_sigusr1(*_):
43
- global _report_requested
44
- _report_requested = True
45
- logger.info("SIGUSR1 received — health report queued")
46
-
47
-
48
- def _register_signals():
49
- try:
50
- signal.signal(signal.SIGUSR1, _on_sigusr1)
51
- except (OSError, AttributeError):
52
- pass
53
-
54
-
55
- # ── Fix pipeline ──────────────────────────────────────────────────────────────
56
-
57
- async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
58
- sentinel = cfg_loader.sentinel
59
-
60
- repo = route(event, cfg_loader.repos)
61
- if not repo:
62
- return
63
-
64
- if Path("SENTINEL_PAUSE").exists():
65
- logger.info("SENTINEL_PAUSE present — fix activity halted")
66
- return
67
-
68
- if event.is_infra_issue:
69
- logger.info("Infra issue for %s — log only", event.fingerprint)
70
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
71
- return
72
-
73
- if event.severity == "CRITICAL" and repo.auto_publish:
74
- logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
75
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
76
- build_and_send(sentinel, store)
77
- return
78
-
79
- if store.fix_attempted_recently(event.fingerprint, hours=24):
80
- logger.debug("Fix already attempted recently for %s", event.fingerprint)
81
- return
82
-
83
- patches_dir = Path(sentinel.workspace_dir) / "patches"
84
- status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
85
-
86
- if status != "patch" or patch_path is None:
87
- store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed", repo_name=repo.repo_name)
88
- return
89
-
90
- commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
91
- if commit_status != "committed":
92
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
93
- return
94
-
95
- branch, pr_url = publish(event, repo, sentinel, commit_hash)
96
- store.record_fix(
97
- event.fingerprint,
98
- "applied" if repo.auto_publish else "pending",
99
- patch_path=str(patch_path),
100
- commit_hash=commit_hash,
101
- branch=branch,
102
- pr_url=pr_url,
103
- repo_name=repo.repo_name,
104
- )
105
-
106
- if repo.auto_publish:
107
- cicd_trigger(repo, store, event.fingerprint)
108
-
109
-
110
- # ── Poll cycle ────────────────────────────────────────────────────────────────
111
-
112
- async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
113
- global _report_requested
114
-
115
- sources = list(cfg_loader.log_sources.values())
116
- if not sources:
117
- logger.warning("No log-configs found")
118
- return
119
-
120
- logger.info("Fetching logs from %d source(s)...", len(sources))
121
- fetched = await fetch_all(sources, cfg_loader.sentinel)
122
-
123
- events = parse_all(fetched, cfg_loader.log_sources)
124
- logger.info("Parsed %d error/warn events", len(events))
125
-
126
- new_events = []
127
- for event in events:
128
- store.record_error(event.fingerprint, event.source, event.message)
129
- if not store.fix_attempted_recently(event.fingerprint):
130
- new_events.append(event)
131
-
132
- logger.info("%d new event(s) to process", len(new_events))
133
- await asyncio.gather(*[_handle_error(e, cfg_loader, store) for e in new_events], return_exceptions=True)
134
-
135
- if _report_requested or _report_due(cfg_loader, store):
136
- _report_requested = False
137
- logger.info("Sending health report...")
138
- build_and_send(cfg_loader.sentinel, store)
139
-
140
-
141
- def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
142
- last = store.last_report_time()
143
- if last is None:
144
- return True
145
- elapsed = (datetime.now(timezone.utc) - last).total_seconds()
146
- return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
147
-
148
-
149
- # ── Init ──────────────────────────────────────────────────────────────────────
150
-
151
- def run_init(cfg_loader: ConfigLoader):
152
- sentinel = cfg_loader.sentinel
153
- logger.info("=== Sentinel --init ===")
154
-
155
- if not cairn_installed():
156
- logger.error("Cairn not installed. Run: npm install -g @misterhuydo/cairn-mcp")
157
-
158
- for name, repo in cfg_loader.repos.items():
159
- local = Path(repo.local_path)
160
- if not local.exists():
161
- logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
162
- r = subprocess.run(["git", "clone", repo.repo_url, str(local)], capture_output=True, text=True)
163
- if r.returncode != 0:
164
- logger.error("Clone failed for %s: %s", name, r.stderr)
165
- continue
166
- index_repo(repo)
167
-
168
- for src_name, src in cfg_loader.log_sources.items():
169
- if src.source_type == "ssh" and src.hosts:
170
- host = src.hosts[0]
171
- logger.info("Testing SSH to %s (%s)...", src_name, host)
172
- r = subprocess.run(
173
- ["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
174
- "-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
175
- capture_output=True, text=True, timeout=15,
176
- )
177
- logger.info(" SSH %s: %s", host, "OK" if r.returncode == 0 else f"FAILED — {r.stderr.strip()}")
178
-
179
- logger.info("Sending test email...")
180
- try:
181
- build_and_send(sentinel, StateStore(sentinel.state_db))
182
- except Exception as e:
183
- logger.error("Test email failed: %s", e)
184
-
185
- logger.info("=== Init complete ===")
186
-
187
-
188
- # ── Entry point ───────────────────────────────────────────────────────────────
189
-
190
- async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
191
- interval = cfg_loader.sentinel.poll_interval_seconds
192
- logger.info("Sentinel starting — poll interval: %ds, repos: %s", interval, list(cfg_loader.repos.keys()))
193
- while True:
194
- try:
195
- await poll_cycle(cfg_loader, store)
196
- except Exception as e:
197
- logger.exception("Unhandled error in poll cycle: %s", e)
198
- await asyncio.sleep(interval)
199
-
200
-
201
- def main():
202
- Path("logs").mkdir(exist_ok=True)
203
- Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
204
- Path("workspace/patches").mkdir(parents=True, exist_ok=True)
205
-
206
- parser = argparse.ArgumentParser(description="Sentinel Autonomous DevOps Agent")
207
- parser.add_argument("--init", action="store_true", help="First-time setup")
208
- parser.add_argument("--config", default="./config", help="Config directory path")
209
- args = parser.parse_args()
210
-
211
- cfg_loader = ConfigLoader(config_dir=args.config)
212
- store = StateStore(cfg_loader.sentinel.state_db)
213
- _register_signals()
214
-
215
- if args.init:
216
- run_init(cfg_loader)
217
- return
218
-
219
- asyncio.run(run_loop(cfg_loader, store))
220
-
221
-
222
- if __name__ == "__main__":
223
- main()
1
+ """
2
+ main.py — Sentinel entry point and watch loop.
3
+
4
+ Usage:
5
+ python -m sentinel.main # run watch loop
6
+ python -m sentinel.main --init # first-time setup
7
+ """
8
+
9
+ import argparse
10
+ import asyncio
11
+ import logging
12
+ import signal
13
+ import subprocess
14
+ import sys
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ from .cairn_client import ensure_installed as cairn_installed, index_repo
19
+ from .config_loader import ConfigLoader
20
+ from .fix_engine import generate_fix
21
+ from .git_manager import apply_and_commit, publish
22
+ from .cicd_trigger import trigger as cicd_trigger
23
+ from .log_fetcher import fetch_all
24
+ from .log_parser import parse_all, ErrorEvent
25
+ from .issue_watcher import scan_issues, mark_done, IssueEvent
26
+ from .repo_router import route
27
+ from .reporter import build_and_send, send_fix_notification, send_failure_notification
28
+ from .state_store import StateStore
29
+
30
+ logging.basicConfig(
31
+ level=logging.INFO,
32
+ format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
33
+ handlers=[
34
+ logging.StreamHandler(sys.stdout),
35
+ logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
36
+ ],
37
+ )
38
+ logger = logging.getLogger("sentinel")
39
+
40
+ _report_requested = False
41
+
42
+
43
+ def _on_sigusr1(*_):
44
+ global _report_requested
45
+ _report_requested = True
46
+ logger.info("SIGUSR1 received — health report queued")
47
+
48
+
49
+ def _register_signals():
50
+ try:
51
+ signal.signal(signal.SIGUSR1, _on_sigusr1)
52
+ except (OSError, AttributeError):
53
+ pass
54
+
55
+
56
+ # ── Fix pipeline ──────────────────────────────────────────────────────────────
57
+
58
+ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
59
+ sentinel = cfg_loader.sentinel
60
+
61
+ repo = route(event, cfg_loader.repos)
62
+ if not repo:
63
+ return
64
+
65
+ if Path("SENTINEL_PAUSE").exists():
66
+ logger.info("SENTINEL_PAUSE present — fix activity halted")
67
+ return
68
+
69
+ if event.is_infra_issue:
70
+ logger.info("Infra issue for %s — log only", event.fingerprint)
71
+ store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
72
+ return
73
+
74
+ if event.severity == "CRITICAL" and repo.auto_publish:
75
+ logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
76
+ store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
77
+ return
78
+
79
+ if store.fix_attempted_recently(event.fingerprint, hours=24):
80
+ logger.debug("Fix already attempted recently for %s", event.fingerprint)
81
+ return
82
+
83
+ patches_dir = Path(sentinel.workspace_dir) / "patches"
84
+ status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
85
+
86
+ if status != "patch" or patch_path is None:
87
+ outcome = "skipped" if status == "skip" else "failed"
88
+ store.record_fix(event.fingerprint, outcome, repo_name=repo.repo_name)
89
+ send_failure_notification(sentinel, {
90
+ "source": event.source,
91
+ "message": event.message,
92
+ "repo_name": repo.repo_name,
93
+ "reason": f"Claude Code returned {status.upper()}",
94
+ "body": event.full_text()[:500],
95
+ })
96
+ return
97
+
98
+ commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
99
+ if commit_status != "committed":
100
+ store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
101
+ send_failure_notification(sentinel, {
102
+ "source": event.source,
103
+ "message": event.message,
104
+ "repo_name": repo.repo_name,
105
+ "reason": "patch generated but commit/tests failed",
106
+ "body": event.full_text()[:500],
107
+ })
108
+ return
109
+
110
+ branch, pr_url = publish(event, repo, sentinel, commit_hash)
111
+ store.record_fix(
112
+ event.fingerprint,
113
+ "applied" if repo.auto_publish else "pending",
114
+ patch_path=str(patch_path),
115
+ commit_hash=commit_hash,
116
+ branch=branch,
117
+ pr_url=pr_url,
118
+ repo_name=repo.repo_name,
119
+ )
120
+
121
+ send_fix_notification(sentinel, {
122
+ "source": event.source,
123
+ "severity": event.severity,
124
+ "fingerprint": event.fingerprint,
125
+ "first_seen": str(event.timestamp),
126
+ "message": event.message,
127
+ "stack_trace": getattr(event, "stack_trace", ""),
128
+ "repo_name": repo.repo_name,
129
+ "commit_hash": commit_hash,
130
+ "branch": branch,
131
+ "pr_url": pr_url,
132
+ "auto_publish": repo.auto_publish,
133
+ "files_changed": [],
134
+ })
135
+
136
+ if repo.auto_publish:
137
+ cicd_trigger(repo, store, event.fingerprint)
138
+
139
+
140
+ # ── Poll cycle ────────────────────────────────────────────────────────────────
141
+
142
+
143
+ # ── Issue pipeline ────────────────────────────────────────────────────────────
144
+
145
+ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: StateStore):
146
+ """Process a single issue file from the issues/ directory."""
147
+ sentinel = cfg_loader.sentinel
148
+
149
+ if Path("SENTINEL_PAUSE").exists():
150
+ logger.info("SENTINEL_PAUSE present -- fix activity halted")
151
+ return
152
+
153
+ if store.fix_attempted_recently(event.fingerprint, hours=24):
154
+ logger.debug("Issue already processed recently: %s", event.source)
155
+ mark_done(event.issue_file)
156
+ return
157
+
158
+ # Route: explicit TARGET_REPO in file > single-repo shortcut > warn and leave
159
+ if event.target_repo:
160
+ repo = cfg_loader.repos.get(event.target_repo)
161
+ if not repo:
162
+ logger.warning("TARGET_REPO %r not found in config -- leaving %s for admin",
163
+ event.target_repo, event.source)
164
+ return
165
+ elif len(cfg_loader.repos) == 1:
166
+ repo = next(iter(cfg_loader.repos.values()))
167
+ else:
168
+ logger.warning(
169
+ "Cannot auto-route %s -- add 'TARGET_REPO: <repo>' as first line in the file",
170
+ event.source,
171
+ )
172
+ return # Leave the file so admin can add the header
173
+
174
+ patches_dir = Path(sentinel.workspace_dir) / "patches"
175
+ status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
176
+
177
+ if status != "patch" or patch_path is None:
178
+ store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed",
179
+ repo_name=repo.repo_name)
180
+ send_failure_notification(sentinel, {
181
+ "source": event.source,
182
+ "message": event.message,
183
+ "repo_name": repo.repo_name,
184
+ "reason": f"Claude Code returned {status.upper()}",
185
+ "body": event.body[:500],
186
+ })
187
+ mark_done(event.issue_file)
188
+ return
189
+
190
+ commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
191
+ if commit_status != "committed":
192
+ store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
193
+ send_failure_notification(sentinel, {
194
+ "source": event.source,
195
+ "message": event.message,
196
+ "repo_name": repo.repo_name,
197
+ "reason": "patch generated but commit/tests failed",
198
+ "body": event.body[:500],
199
+ })
200
+ mark_done(event.issue_file)
201
+ return
202
+
203
+ branch, pr_url = publish(event, repo, sentinel, commit_hash)
204
+ store.record_fix(
205
+ event.fingerprint,
206
+ "applied" if repo.auto_publish else "pending",
207
+ patch_path=str(patch_path),
208
+ commit_hash=commit_hash,
209
+ branch=branch,
210
+ pr_url=pr_url,
211
+ repo_name=repo.repo_name,
212
+ )
213
+ send_fix_notification(sentinel, {
214
+ "source": event.source,
215
+ "severity": "ERROR",
216
+ "fingerprint": event.fingerprint,
217
+ "first_seen": event.timestamp,
218
+ "message": event.message,
219
+ "stack_trace": event.body,
220
+ "repo_name": repo.repo_name,
221
+ "commit_hash": commit_hash,
222
+ "branch": branch,
223
+ "pr_url": pr_url,
224
+ "auto_publish": repo.auto_publish,
225
+ "files_changed": [],
226
+ })
227
+ mark_done(event.issue_file)
228
+
229
+ if repo.auto_publish:
230
+ cicd_trigger(repo, store, event.fingerprint)
231
+
232
+
233
+ async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
234
+ global _report_requested
235
+
236
+ # ── Log sources (optional) ────────────────────────────────────────────────
237
+ sources = list(cfg_loader.log_sources.values())
238
+ if sources:
239
+ logger.info("Fetching logs from %d source(s)...", len(sources))
240
+ fetched = await fetch_all(sources, cfg_loader.sentinel)
241
+ events = parse_all(fetched, cfg_loader.log_sources)
242
+ logger.info("Parsed %d error/warn events", len(events))
243
+
244
+ new_events = []
245
+ for event in events:
246
+ store.record_error(event.fingerprint, event.source, event.message)
247
+ if not store.fix_attempted_recently(event.fingerprint):
248
+ new_events.append(event)
249
+
250
+ if new_events:
251
+ logger.info("%d new log event(s) to process", len(new_events))
252
+ await asyncio.gather(
253
+ *[_handle_error(e, cfg_loader, store) for e in new_events],
254
+ return_exceptions=True,
255
+ )
256
+
257
+ # ── Issues directory (always checked) ────────────────────────────────────
258
+ issues = scan_issues(Path("."))
259
+ if issues:
260
+ logger.info("%d issue file(s) found in issues/", len(issues))
261
+ await asyncio.gather(
262
+ *[_handle_issue(e, cfg_loader, store) for e in issues],
263
+ return_exceptions=True,
264
+ )
265
+
266
+ if cfg_loader.sentinel.send_health and (_report_requested or _report_due(cfg_loader, store)):
267
+ _report_requested = False
268
+ logger.info("Sending health digest...")
269
+ build_and_send(cfg_loader.sentinel, store)
270
+
271
+
272
+ def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
273
+ last = store.last_report_time()
274
+ if last is None:
275
+ return True
276
+ elapsed = (datetime.now(timezone.utc) - last).total_seconds()
277
+ return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
278
+
279
+
280
+ # ── Init ──────────────────────────────────────────────────────────────────────
281
+
282
+ def run_init(cfg_loader: ConfigLoader):
283
+ sentinel = cfg_loader.sentinel
284
+ logger.info("=== Sentinel --init ===")
285
+
286
+ if not cairn_installed():
287
+ logger.error("Cairn not installed. Run: npm install -g @misterhuydo/cairn-mcp")
288
+
289
+ for name, repo in cfg_loader.repos.items():
290
+ local = Path(repo.local_path)
291
+ if not local.exists():
292
+ logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
293
+ r = subprocess.run(["git", "clone", repo.repo_url, str(local)], capture_output=True, text=True)
294
+ if r.returncode != 0:
295
+ logger.error("Clone failed for %s: %s", name, r.stderr)
296
+ continue
297
+ index_repo(repo)
298
+
299
+ for src_name, src in cfg_loader.log_sources.items():
300
+ if src.source_type == "ssh" and src.hosts:
301
+ host = src.hosts[0]
302
+ logger.info("Testing SSH to %s (%s)...", src_name, host)
303
+ r = subprocess.run(
304
+ ["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
305
+ "-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
306
+ capture_output=True, text=True, timeout=15,
307
+ )
308
+ logger.info(" SSH %s: %s", host, "OK" if r.returncode == 0 else f"FAILED — {r.stderr.strip()}")
309
+
310
+ logger.info("Sending test email...")
311
+ try:
312
+ build_and_send(sentinel, StateStore(sentinel.state_db))
313
+ except Exception as e:
314
+ logger.error("Test email failed: %s", e)
315
+
316
+ logger.info("=== Init complete ===")
317
+
318
+
319
+ # ── Entry point ───────────────────────────────────────────────────────────────
320
+
321
+ async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
322
+ interval = cfg_loader.sentinel.poll_interval_seconds
323
+ logger.info("Sentinel starting — poll interval: %ds, repos: %s", interval, list(cfg_loader.repos.keys()))
324
+ while True:
325
+ try:
326
+ await poll_cycle(cfg_loader, store)
327
+ except Exception as e:
328
+ logger.exception("Unhandled error in poll cycle: %s", e)
329
+ await asyncio.sleep(interval)
330
+
331
+
332
+ def main():
333
+ Path("logs").mkdir(exist_ok=True)
334
+ Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
335
+ Path("workspace/patches").mkdir(parents=True, exist_ok=True)
336
+ Path("issues").mkdir(exist_ok=True)
337
+
338
+ parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
339
+ parser.add_argument("--init", action="store_true", help="First-time setup")
340
+ parser.add_argument("--config", default="./config", help="Config directory path")
341
+ args = parser.parse_args()
342
+
343
+ cfg_loader = ConfigLoader(config_dir=args.config)
344
+ store = StateStore(cfg_loader.sentinel.state_db)
345
+ _register_signals()
346
+
347
+ if args.init:
348
+ run_init(cfg_loader)
349
+ return
350
+
351
+ asyncio.run(run_loop(cfg_loader, store))
352
+
353
+
354
+ if __name__ == "__main__":
355
+ main()