@misterhuydo/sentinel 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -41,8 +41,9 @@ class SentinelConfig:
41
41
  smtp_port: int = 587
42
42
  smtp_user: str = ""
43
43
  smtp_password: str = ""
44
- report_recipients: list[str] = field(default_factory=list)
44
+ mails: list[str] = field(default_factory=list)
45
45
  report_interval_hours: int = 6
46
+ send_health: bool = False
46
47
  state_db: str = "./sentinel.db"
47
48
  workspace_dir: str = "./workspace"
48
49
  claude_code_bin: str = "claude"
@@ -114,8 +115,9 @@ class ConfigLoader:
114
115
  c.smtp_port = int(d.get("SMTP_PORT", 587))
115
116
  c.smtp_user = d.get("SMTP_USER", "")
116
117
  c.smtp_password = d.get("SMTP_PASSWORD", "")
117
- c.report_recipients = _csv(d.get("REPORT_RECIPIENTS", ""))
118
+ c.mails = _csv(d.get("MAILS", ""))
118
119
  c.report_interval_hours = int(d.get("REPORT_INTERVAL_HOURS", 6))
120
+ c.send_health = d.get("SEND_HEALTH", "disabled").lower() == "enabled"
119
121
  c.state_db = d.get("STATE_DB", "./sentinel.db")
120
122
  c.workspace_dir = d.get("WORKSPACE_DIR", "./workspace")
121
123
  c.claude_code_bin = d.get("CLAUDE_CODE_BIN", "claude")
@@ -1,223 +1,237 @@
1
- """
2
- main.py — Sentinel entry point and watch loop.
3
-
4
- Usage:
5
- python -m sentinel.main # run watch loop
6
- python -m sentinel.main --init # first-time setup
7
- """
8
-
9
- import argparse
10
- import asyncio
11
- import logging
12
- import signal
13
- import subprocess
14
- import sys
15
- from datetime import datetime, timezone
16
- from pathlib import Path
17
-
18
- from .cairn_client import ensure_installed as cairn_installed, index_repo
19
- from .config_loader import ConfigLoader
20
- from .fix_engine import generate_fix
21
- from .git_manager import apply_and_commit, publish
22
- from .cicd_trigger import trigger as cicd_trigger
23
- from .log_fetcher import fetch_all
24
- from .log_parser import parse_all, ErrorEvent
25
- from .repo_router import route
26
- from .reporter import build_and_send
27
- from .state_store import StateStore
28
-
29
- logging.basicConfig(
30
- level=logging.INFO,
31
- format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
32
- handlers=[
33
- logging.StreamHandler(sys.stdout),
34
- logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
35
- ],
36
- )
37
- logger = logging.getLogger("sentinel")
38
-
39
- _report_requested = False
40
-
41
-
42
- def _on_sigusr1(*_):
43
- global _report_requested
44
- _report_requested = True
45
- logger.info("SIGUSR1 received — health report queued")
46
-
47
-
48
- def _register_signals():
49
- try:
50
- signal.signal(signal.SIGUSR1, _on_sigusr1)
51
- except (OSError, AttributeError):
52
- pass
53
-
54
-
55
- # ── Fix pipeline ──────────────────────────────────────────────────────────────
56
-
57
- async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
58
- sentinel = cfg_loader.sentinel
59
-
60
- repo = route(event, cfg_loader.repos)
61
- if not repo:
62
- return
63
-
64
- if Path("SENTINEL_PAUSE").exists():
65
- logger.info("SENTINEL_PAUSE present — fix activity halted")
66
- return
67
-
68
- if event.is_infra_issue:
69
- logger.info("Infra issue for %s — log only", event.fingerprint)
70
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
71
- return
72
-
73
- if event.severity == "CRITICAL" and repo.auto_publish:
74
- logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
75
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
76
- build_and_send(sentinel, store)
77
- return
78
-
79
- if store.fix_attempted_recently(event.fingerprint, hours=24):
80
- logger.debug("Fix already attempted recently for %s", event.fingerprint)
81
- return
82
-
83
- patches_dir = Path(sentinel.workspace_dir) / "patches"
84
- status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
85
-
86
- if status != "patch" or patch_path is None:
87
- store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed", repo_name=repo.repo_name)
88
- return
89
-
90
- commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
91
- if commit_status != "committed":
92
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
93
- return
94
-
95
- branch, pr_url = publish(event, repo, sentinel, commit_hash)
96
- store.record_fix(
97
- event.fingerprint,
98
- "applied" if repo.auto_publish else "pending",
99
- patch_path=str(patch_path),
100
- commit_hash=commit_hash,
101
- branch=branch,
102
- pr_url=pr_url,
103
- repo_name=repo.repo_name,
104
- )
105
-
106
- if repo.auto_publish:
107
- cicd_trigger(repo, store, event.fingerprint)
108
-
109
-
110
- # ── Poll cycle ────────────────────────────────────────────────────────────────
111
-
112
- async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
113
- global _report_requested
114
-
115
- sources = list(cfg_loader.log_sources.values())
116
- if not sources:
117
- logger.warning("No log-configs found")
118
- return
119
-
120
- logger.info("Fetching logs from %d source(s)...", len(sources))
121
- fetched = await fetch_all(sources, cfg_loader.sentinel)
122
-
123
- events = parse_all(fetched, cfg_loader.log_sources)
124
- logger.info("Parsed %d error/warn events", len(events))
125
-
126
- new_events = []
127
- for event in events:
128
- store.record_error(event.fingerprint, event.source, event.message)
129
- if not store.fix_attempted_recently(event.fingerprint):
130
- new_events.append(event)
131
-
132
- logger.info("%d new event(s) to process", len(new_events))
133
- await asyncio.gather(*[_handle_error(e, cfg_loader, store) for e in new_events], return_exceptions=True)
134
-
135
- if _report_requested or _report_due(cfg_loader, store):
136
- _report_requested = False
137
- logger.info("Sending health report...")
138
- build_and_send(cfg_loader.sentinel, store)
139
-
140
-
141
- def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
142
- last = store.last_report_time()
143
- if last is None:
144
- return True
145
- elapsed = (datetime.now(timezone.utc) - last).total_seconds()
146
- return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
147
-
148
-
149
- # ── Init ──────────────────────────────────────────────────────────────────────
150
-
151
- def run_init(cfg_loader: ConfigLoader):
152
- sentinel = cfg_loader.sentinel
153
- logger.info("=== Sentinel --init ===")
154
-
155
- if not cairn_installed():
156
- logger.error("Cairn not installed. Run: npm install -g @misterhuydo/cairn-mcp")
157
-
158
- for name, repo in cfg_loader.repos.items():
159
- local = Path(repo.local_path)
160
- if not local.exists():
161
- logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
162
- r = subprocess.run(["git", "clone", repo.repo_url, str(local)], capture_output=True, text=True)
163
- if r.returncode != 0:
164
- logger.error("Clone failed for %s: %s", name, r.stderr)
165
- continue
166
- index_repo(repo)
167
-
168
- for src_name, src in cfg_loader.log_sources.items():
169
- if src.source_type == "ssh" and src.hosts:
170
- host = src.hosts[0]
171
- logger.info("Testing SSH to %s (%s)...", src_name, host)
172
- r = subprocess.run(
173
- ["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
174
- "-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
175
- capture_output=True, text=True, timeout=15,
176
- )
177
- logger.info(" SSH %s: %s", host, "OK" if r.returncode == 0 else f"FAILED — {r.stderr.strip()}")
178
-
179
- logger.info("Sending test email...")
180
- try:
181
- build_and_send(sentinel, StateStore(sentinel.state_db))
182
- except Exception as e:
183
- logger.error("Test email failed: %s", e)
184
-
185
- logger.info("=== Init complete ===")
186
-
187
-
188
- # ── Entry point ───────────────────────────────────────────────────────────────
189
-
190
- async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
191
- interval = cfg_loader.sentinel.poll_interval_seconds
192
- logger.info("Sentinel starting — poll interval: %ds, repos: %s", interval, list(cfg_loader.repos.keys()))
193
- while True:
194
- try:
195
- await poll_cycle(cfg_loader, store)
196
- except Exception as e:
197
- logger.exception("Unhandled error in poll cycle: %s", e)
198
- await asyncio.sleep(interval)
199
-
200
-
201
- def main():
202
- Path("logs").mkdir(exist_ok=True)
203
- Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
204
- Path("workspace/patches").mkdir(parents=True, exist_ok=True)
205
-
206
- parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
207
- parser.add_argument("--init", action="store_true", help="First-time setup")
208
- parser.add_argument("--config", default="./config", help="Config directory path")
209
- args = parser.parse_args()
210
-
211
- cfg_loader = ConfigLoader(config_dir=args.config)
212
- store = StateStore(cfg_loader.sentinel.state_db)
213
- _register_signals()
214
-
215
- if args.init:
216
- run_init(cfg_loader)
217
- return
218
-
219
- asyncio.run(run_loop(cfg_loader, store))
220
-
221
-
222
- if __name__ == "__main__":
223
- main()
1
+ """
2
+ main.py — Sentinel entry point and watch loop.
3
+
4
+ Usage:
5
+ python -m sentinel.main # run watch loop
6
+ python -m sentinel.main --init # first-time setup
7
+ """
8
+
9
+ import argparse
10
+ import asyncio
11
+ import logging
12
+ import signal
13
+ import subprocess
14
+ import sys
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ from .cairn_client import ensure_installed as cairn_installed, index_repo
19
+ from .config_loader import ConfigLoader
20
+ from .fix_engine import generate_fix
21
+ from .git_manager import apply_and_commit, publish
22
+ from .cicd_trigger import trigger as cicd_trigger
23
+ from .log_fetcher import fetch_all
24
+ from .log_parser import parse_all, ErrorEvent
25
+ from .repo_router import route
26
+ from .reporter import build_and_send, send_fix_notification
27
+ from .state_store import StateStore
28
+
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
32
+ handlers=[
33
+ logging.StreamHandler(sys.stdout),
34
+ logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
35
+ ],
36
+ )
37
+ logger = logging.getLogger("sentinel")
38
+
39
+ _report_requested = False
40
+
41
+
42
+ def _on_sigusr1(*_):
43
+ global _report_requested
44
+ _report_requested = True
45
+ logger.info("SIGUSR1 received — health report queued")
46
+
47
+
48
+ def _register_signals():
49
+ try:
50
+ signal.signal(signal.SIGUSR1, _on_sigusr1)
51
+ except (OSError, AttributeError):
52
+ pass
53
+
54
+
55
+ # ── Fix pipeline ──────────────────────────────────────────────────────────────
56
+
57
+ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
58
+ sentinel = cfg_loader.sentinel
59
+
60
+ repo = route(event, cfg_loader.repos)
61
+ if not repo:
62
+ return
63
+
64
+ if Path("SENTINEL_PAUSE").exists():
65
+ logger.info("SENTINEL_PAUSE present — fix activity halted")
66
+ return
67
+
68
+ if event.is_infra_issue:
69
+ logger.info("Infra issue for %s — log only", event.fingerprint)
70
+ store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
71
+ return
72
+
73
+ if event.severity == "CRITICAL" and repo.auto_publish:
74
+ logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
75
+ store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
76
+ return
77
+
78
+ if store.fix_attempted_recently(event.fingerprint, hours=24):
79
+ logger.debug("Fix already attempted recently for %s", event.fingerprint)
80
+ return
81
+
82
+ patches_dir = Path(sentinel.workspace_dir) / "patches"
83
+ status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
84
+
85
+ if status != "patch" or patch_path is None:
86
+ store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed", repo_name=repo.repo_name)
87
+ return
88
+
89
+ commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
90
+ if commit_status != "committed":
91
+ store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
92
+ return
93
+
94
+ branch, pr_url = publish(event, repo, sentinel, commit_hash)
95
+ store.record_fix(
96
+ event.fingerprint,
97
+ "applied" if repo.auto_publish else "pending",
98
+ patch_path=str(patch_path),
99
+ commit_hash=commit_hash,
100
+ branch=branch,
101
+ pr_url=pr_url,
102
+ repo_name=repo.repo_name,
103
+ )
104
+
105
+ send_fix_notification(sentinel, {
106
+ "source": event.source,
107
+ "severity": event.severity,
108
+ "fingerprint": event.fingerprint,
109
+ "first_seen": str(event.timestamp),
110
+ "message": event.message,
111
+ "stack_trace": getattr(event, "stack_trace", ""),
112
+ "repo_name": repo.repo_name,
113
+ "commit_hash": commit_hash,
114
+ "branch": branch,
115
+ "pr_url": pr_url,
116
+ "auto_publish": repo.auto_publish,
117
+ "files_changed": [],
118
+ })
119
+
120
+ if repo.auto_publish:
121
+ cicd_trigger(repo, store, event.fingerprint)
122
+
123
+
124
+ # ── Poll cycle ────────────────────────────────────────────────────────────────
125
+
126
+ async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
127
+ global _report_requested
128
+
129
+ sources = list(cfg_loader.log_sources.values())
130
+ if not sources:
131
+ logger.warning("No log-configs found")
132
+ return
133
+
134
+ logger.info("Fetching logs from %d source(s)...", len(sources))
135
+ fetched = await fetch_all(sources, cfg_loader.sentinel)
136
+
137
+ events = parse_all(fetched, cfg_loader.log_sources)
138
+ logger.info("Parsed %d error/warn events", len(events))
139
+
140
+ new_events = []
141
+ for event in events:
142
+ store.record_error(event.fingerprint, event.source, event.message)
143
+ if not store.fix_attempted_recently(event.fingerprint):
144
+ new_events.append(event)
145
+
146
+ logger.info("%d new event(s) to process", len(new_events))
147
+ await asyncio.gather(*[_handle_error(e, cfg_loader, store) for e in new_events], return_exceptions=True)
148
+
149
+ if cfg_loader.sentinel.send_health and (_report_requested or _report_due(cfg_loader, store)):
150
+ _report_requested = False
151
+ logger.info("Sending health digest...")
152
+ build_and_send(cfg_loader.sentinel, store)
153
+
154
+
155
+ def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
156
+ last = store.last_report_time()
157
+ if last is None:
158
+ return True
159
+ elapsed = (datetime.now(timezone.utc) - last).total_seconds()
160
+ return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
161
+
162
+
163
+ # ── Init ──────────────────────────────────────────────────────────────────────
164
+
165
+ def run_init(cfg_loader: ConfigLoader):
166
+ sentinel = cfg_loader.sentinel
167
+ logger.info("=== Sentinel --init ===")
168
+
169
+ if not cairn_installed():
170
+ logger.error("Cairn not installed. Run: npm install -g @misterhuydo/cairn-mcp")
171
+
172
+ for name, repo in cfg_loader.repos.items():
173
+ local = Path(repo.local_path)
174
+ if not local.exists():
175
+ logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
176
+ r = subprocess.run(["git", "clone", repo.repo_url, str(local)], capture_output=True, text=True)
177
+ if r.returncode != 0:
178
+ logger.error("Clone failed for %s: %s", name, r.stderr)
179
+ continue
180
+ index_repo(repo)
181
+
182
+ for src_name, src in cfg_loader.log_sources.items():
183
+ if src.source_type == "ssh" and src.hosts:
184
+ host = src.hosts[0]
185
+ logger.info("Testing SSH to %s (%s)...", src_name, host)
186
+ r = subprocess.run(
187
+ ["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
188
+ "-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
189
+ capture_output=True, text=True, timeout=15,
190
+ )
191
+ logger.info(" SSH %s: %s", host, "OK" if r.returncode == 0 else f"FAILED — {r.stderr.strip()}")
192
+
193
+ logger.info("Sending test email...")
194
+ try:
195
+ build_and_send(sentinel, StateStore(sentinel.state_db))
196
+ except Exception as e:
197
+ logger.error("Test email failed: %s", e)
198
+
199
+ logger.info("=== Init complete ===")
200
+
201
+
202
+ # ── Entry point ───────────────────────────────────────────────────────────────
203
+
204
+ async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
205
+ interval = cfg_loader.sentinel.poll_interval_seconds
206
+ logger.info("Sentinel starting poll interval: %ds, repos: %s", interval, list(cfg_loader.repos.keys()))
207
+ while True:
208
+ try:
209
+ await poll_cycle(cfg_loader, store)
210
+ except Exception as e:
211
+ logger.exception("Unhandled error in poll cycle: %s", e)
212
+ await asyncio.sleep(interval)
213
+
214
+
215
+ def main():
216
+ Path("logs").mkdir(exist_ok=True)
217
+ Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
218
+ Path("workspace/patches").mkdir(parents=True, exist_ok=True)
219
+
220
+ parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
221
+ parser.add_argument("--init", action="store_true", help="First-time setup")
222
+ parser.add_argument("--config", default="./config", help="Config directory path")
223
+ args = parser.parse_args()
224
+
225
+ cfg_loader = ConfigLoader(config_dir=args.config)
226
+ store = StateStore(cfg_loader.sentinel.state_db)
227
+ _register_signals()
228
+
229
+ if args.init:
230
+ run_init(cfg_loader)
231
+ return
232
+
233
+ asyncio.run(run_loop(cfg_loader, store))
234
+
235
+
236
+ if __name__ == "__main__":
237
+ main()
@@ -1,173 +1,138 @@
1
- """
2
- reporter.py Build and send HTML health-report emails.
3
-
4
- Scheduled every REPORT_INTERVAL_HOURS or triggered by SIGUSR1.
5
- """
6
-
7
- import logging
8
- import smtplib
9
- from datetime import datetime, timezone
10
- from email.mime.multipart import MIMEMultipart
11
- from email.mime.text import MIMEText
12
-
13
- from jinja2 import Template
14
-
15
- from .config_loader import SentinelConfig
16
- from .state_store import StateStore
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- _HTML_TEMPLATE = Template("""\
21
- <!DOCTYPE html>
22
- <html>
23
- <head>
24
- <meta charset="utf-8">
25
- <style>
26
- body { font-family: Arial, sans-serif; font-size: 14px; color: #222; }
27
- h2 { color: #1a73e8; }
28
- h3 { color: #444; border-bottom: 1px solid #ddd; padding-bottom: 4px; }
29
- table { border-collapse: collapse; width: 100%; margin-bottom: 16px; }
30
- th { background: #f1f3f4; text-align: left; padding: 6px 10px; }
31
- td { padding: 5px 10px; border-bottom: 1px solid #eee; }
32
- .ok { color: #2e7d32; }
33
- .fail { color: #c62828; }
34
- .warn { color: #e65100; }
35
- .pr-link { font-weight: bold; }
36
- .mono { font-family: monospace; font-size: 12px; }
37
- </style>
38
- </head>
39
- <body>
40
- <h2>🤖 Sentinel Health Report</h2>
41
- <p>Generated: <strong>{{ generated_at }}</strong></p>
42
-
43
- <h3>Summary (last {{ hours }}h)</h3>
44
- <table>
45
- <tr><th>Metric</th><th>Count</th></tr>
46
- <tr><td>Errors detected</td><td>{{ stats.errors }}</td></tr>
47
- <tr><td>Fixes applied</td><td class="ok">{{ stats.applied }}</td></tr>
48
- <tr><td>Fixes failed</td><td class="fail">{{ stats.failed }}</td></tr>
49
- <tr><td>Skipped</td><td class="warn">{{ stats.skipped }}</td></tr>
50
- </table>
51
-
52
- {% if open_prs %}
53
- <h3>⏳ Pending Review (AUTO_PUBLISH=false)</h3>
54
- <p>The following fixes are waiting for admin approval. Review and merge on GitHub:</p>
55
- <table>
56
- <tr><th>Fingerprint</th><th>Branch</th><th>PR</th><th>Age</th></tr>
57
- {% for pr in open_prs %}
58
- <tr>
59
- <td class="mono">{{ pr.fingerprint[:8] }}</td>
60
- <td class="mono">{{ pr.branch }}</td>
61
- <td><a class="pr-link" href="{{ pr.pr_url }}">{{ pr.pr_url }}</a></td>
62
- <td>{{ pr.age }}</td>
63
- </tr>
64
- {% endfor %}
65
- </table>
66
- {% endif %}
67
-
68
- {% if recent_fixes %}
69
- <h3>Recent Fix Activity</h3>
70
- <table>
71
- <tr><th>Time</th><th>Fingerprint</th><th>Status</th><th>Commit</th></tr>
72
- {% for fix in recent_fixes %}
73
- <tr>
74
- <td>{{ fix.timestamp }}</td>
75
- <td class="mono">{{ fix.fingerprint[:8] }}</td>
76
- <td class="{{ 'ok' if fix.status == 'applied' else 'fail' if fix.status == 'failed' else 'warn' }}">
77
- {{ fix.status }}
78
- </td>
79
- <td class="mono">{{ fix.commit_hash[:8] if fix.commit_hash else '-' }}</td>
80
- </tr>
81
- {% endfor %}
82
- </table>
83
- {% endif %}
84
-
85
- <hr>
86
- <small>Sentinel Autonomous DevOps Agent</small>
87
- </body>
88
- </html>
89
- """)
90
-
91
-
92
- def _age(ts_str: str) -> str:
93
- try:
94
- ts = datetime.fromisoformat(ts_str)
95
- if ts.tzinfo is None:
96
- ts = ts.replace(tzinfo=timezone.utc)
97
- delta = datetime.now(timezone.utc) - ts
98
- hours = int(delta.total_seconds() // 3600)
99
- if hours < 1:
100
- return f"{int(delta.total_seconds() // 60)}m"
101
- return f"{hours}h"
102
- except Exception:
103
- return "?"
104
-
105
-
106
- def build_and_send(cfg: SentinelConfig, store: StateStore):
107
- hours = cfg.report_interval_hours
108
- errors = store.get_recent_errors(hours)
109
- fixes = store.get_recent_fixes(hours)
110
- open_prs = store.get_open_prs()
111
-
112
- stats = {
113
- "errors": len(errors),
114
- "applied": sum(1 for f in fixes if f["status"] == "applied"),
115
- "failed": sum(1 for f in fixes if f["status"] == "failed"),
116
- "skipped": sum(1 for f in fixes if f["status"] == "skipped"),
117
- }
118
-
119
- # Annotate PRs with human-readable age
120
- for pr in open_prs:
121
- pr["age"] = _age(pr.get("timestamp", ""))
122
-
123
- html = _HTML_TEMPLATE.render(
124
- generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
125
- hours=hours,
126
- stats=stats,
127
- open_prs=open_prs,
128
- recent_fixes=fixes,
129
- )
130
-
131
- if not cfg.report_recipients:
132
- logger.warning("No REPORT_RECIPIENTS configured — skipping email")
133
- return
134
-
135
- _send_email(cfg, html, stats)
136
- store.record_report(
137
- recipient_count=len(cfg.report_recipients),
138
- summary=stats,
139
- )
140
-
141
-
142
- def _send_email(cfg: SentinelConfig, html: str, stats: dict):
143
- subject = (
144
- f"[Sentinel] Health Report — "
145
- f"{stats['applied']} fixed, {stats['failed']} failed, "
146
- f"{stats['errors']} errors detected"
147
- )
148
-
149
- msg = MIMEMultipart("alternative")
150
- msg["Subject"] = subject
151
- msg["From"] = cfg.smtp_user
152
- msg["To"] = ", ".join(cfg.report_recipients)
153
- msg.attach(MIMEText(html, "html"))
154
-
155
- if cfg.smtp_host.lower() == "ses":
156
- _send_ses(cfg, msg)
157
- else:
158
- _send_smtp(cfg, msg)
159
- logger.info("Health report sent to %d recipient(s)", len(cfg.report_recipients))
160
-
161
-
162
- def _send_smtp(cfg: SentinelConfig, msg: MIMEMultipart):
163
- with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as smtp:
164
- smtp.ehlo()
165
- smtp.starttls()
166
- smtp.login(cfg.smtp_user, cfg.smtp_password)
167
- smtp.sendmail(cfg.smtp_user, cfg.report_recipients, msg.as_string())
168
-
169
-
170
- def _send_ses(cfg: SentinelConfig, msg: MIMEMultipart):
171
- # AWS SES via SMTP endpoint — same as SMTP with different host
172
- # Set SMTP_HOST=email-smtp.us-east-1.amazonaws.com and use SES SMTP credentials
173
- _send_smtp(cfg, msg)
1
+ """
2
+ reporter.py -- Email notifications for Sentinel.
3
+
4
+ Two modes:
5
+ 1. Per-fix notification -- sent immediately after every fix (always on).
6
+ 2. Health digest -- periodic summary, only if SEND_HEALTH=enabled.
7
+ """
8
+
9
+ import logging
10
+ import smtplib
11
+ from datetime import datetime, timezone
12
+ from email.mime.multipart import MIMEMultipart
13
+ from email.mime.text import MIMEText
14
+
15
+ from jinja2 import Template
16
+
17
+ from .config_loader import SentinelConfig
18
+ from .state_store import StateStore
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # ---- Templates ---------------------------------------------------------------
23
+
24
+ _FIX_TEMPLATE = Template('<!DOCTYPE html><html><head><meta charset="utf-8">\n<style>\n body{font-family:Arial,sans-serif;font-size:14px;color:#222}\n h2{color:#1a73e8;margin-bottom:4px}\n h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}\n table{border-collapse:collapse;width:100%;margin-bottom:16px}\n th{background:#f1f3f4;text-align:left;padding:6px 10px}\n td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}\n .label{font-weight:bold;width:160px}\n .ok{color:#2e7d32;font-weight:bold}\n .mono{font-family:monospace;font-size:12px}\n pre{background:#f8f8f8;border:1px solid #ddd;padding:10px;font-size:12px;white-space:pre-wrap}\n .badge-a{background:#2e7d32;color:#fff;padding:2px 8px;border-radius:4px}\n .badge-p{background:#e65100;color:#fff;padding:2px 8px;border-radius:4px}\n</style></head><body>\n<h2>Sentinel Fix Report</h2>\n<p>\n <span class="{{ \'badge-a\' if auto_publish else \'badge-p\' }}">\n {{ \'PUSHED TO \' + branch|upper if auto_publish else \'PENDING REVIEW\' }}\n </span>\n &nbsp;<strong>{{ repo_name }}</strong> &middot; {{ generated_at }}\n</p>\n<h3>Error Detected</h3>\n<table>\n <tr><td class="label">Service</td><td class="mono">{{ source }}</td></tr>\n <tr><td class="label">Severity</td><td class="mono">{{ severity }}</td></tr>\n <tr><td class="label">Fingerprint</td><td class="mono">{{ fingerprint }}</td></tr>\n <tr><td class="label">First seen</td><td>{{ first_seen }}</td></tr>\n <tr><td class="label">Message</td><td class="mono">{{ message }}</td></tr>\n</table>\n{% if stack_trace %}<h3>Stack Trace</h3><pre>{{ stack_trace }}</pre>{% endif %}\n<h3>Fix Applied</h3>\n<table>\n <tr><td class="label">Repository</td><td class="mono">{{ repo_name }}</td></tr>\n <tr><td class="label">Commit</td><td class="mono">{{ commit_hash }}</td></tr>\n <tr><td class="label">Branch</td><td class="mono">{{ branch }}</td></tr>\n {% if pr_url %}\n <tr><td class="label">Pull Request</td>\n <td><a href="{{ pr_url }}">{{ pr_url }}</a> &mdash; review and merge to apply</td></tr>\n {% else %}\n <tr><td class="label">Status</td>\n <td class="ok">Pushed directly to {{ branch }}</td></tr>\n {% endif %}\n {% if files_changed %}\n <tr><td class="label">Files changed</td>\n <td class="mono">{{ files_changed | join(\'<br>\') }}</td></tr>\n {% endif %}\n</table>\n<hr><small>Sentinel &mdash; Autonomous DevOps Agent</small>\n</body></html>\n')
25
+
26
+ _HEALTH_TEMPLATE = Template('<!DOCTYPE html><html><head><meta charset="utf-8">\n<style>\n body{font-family:Arial,sans-serif;font-size:14px;color:#222}\n h2{color:#1a73e8}\n h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}\n table{border-collapse:collapse;width:100%;margin-bottom:16px}\n th{background:#f1f3f4;text-align:left;padding:6px 10px}\n td{padding:5px 10px;border-bottom:1px solid #eee}\n .ok{color:#2e7d32}.fail{color:#c62828}.warn{color:#e65100}\n .mono{font-family:monospace;font-size:12px}\n</style></head><body>\n<h2>Sentinel Health Digest</h2>\n<p>Generated: <strong>{{ generated_at }}</strong></p>\n<h3>Summary (last {{ hours }}h)</h3>\n<table>\n <tr><th>Metric</th><th>Count</th></tr>\n <tr><td>Errors detected</td><td>{{ stats.errors }}</td></tr>\n <tr><td>Fixes applied</td><td class="ok">{{ stats.applied }}</td></tr>\n <tr><td>Fixes failed</td><td class="fail">{{ stats.failed }}</td></tr>\n <tr><td>Skipped</td><td class="warn">{{ stats.skipped }}</td></tr>\n</table>\n{% if open_prs %}\n<h3>Pending Review (AUTO_PUBLISH=false)</h3>\n<table>\n <tr><th>Repo</th><th>Branch</th><th>PR</th><th>Age</th></tr>\n {% for pr in open_prs %}\n <tr>\n <td>{{ pr.repo_name }}</td>\n <td class="mono">{{ pr.branch }}</td>\n <td><a href="{{ pr.pr_url }}">{{ pr.pr_url }}</a></td>\n <td>{{ pr.age }}</td>\n </tr>\n {% endfor %}\n</table>\n{% endif %}\n<hr><small>Sentinel &mdash; Autonomous DevOps Agent</small>\n</body></html>\n')
27
+
28
+
29
+ # ---- Per-fix notification ----------------------------------------------------
30
+
31
+ def send_fix_notification(cfg: SentinelConfig, fix: dict):
32
+ """
33
+ Send an immediate email after a fix is applied or a PR is opened.
34
+
35
+ fix dict keys:
36
+ source, severity, fingerprint, first_seen, message, stack_trace,
37
+ repo_name, commit_hash, branch, pr_url, auto_publish, files_changed
38
+ """
39
+ if not cfg.mails:
40
+ logger.warning("No MAILS configured -- skipping fix notification")
41
+ return
42
+
43
+ auto_publish = fix.get("auto_publish", False)
44
+ source = fix.get("source", "unknown")
45
+ verb = "fix" if auto_publish else "PR"
46
+ subject = f"[Sentinel] {verb}({source}): {fix.get('message', '')[:80]}"
47
+
48
+ html = _FIX_TEMPLATE.render(
49
+ generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
50
+ auto_publish=auto_publish,
51
+ repo_name=fix.get("repo_name", "unknown"),
52
+ source=source,
53
+ severity=fix.get("severity", "ERROR"),
54
+ fingerprint=fix.get("fingerprint", ""),
55
+ first_seen=fix.get("first_seen", ""),
56
+ message=fix.get("message", ""),
57
+ stack_trace=fix.get("stack_trace", ""),
58
+ commit_hash=fix.get("commit_hash", ""),
59
+ branch=fix.get("branch", "unknown"),
60
+ pr_url=fix.get("pr_url") or "",
61
+ files_changed=fix.get("files_changed") or [],
62
+ )
63
+ _send_email(cfg, subject, html)
64
+ logger.info("Fix notification sent to %d recipient(s)", len(cfg.mails))
65
+
66
+
67
+ # ---- Health digest -----------------------------------------------------------
68
+
69
+ def build_and_send(cfg: SentinelConfig, store: StateStore):
70
+ """Send periodic health digest. Only called if SEND_HEALTH=enabled."""
71
+ if not cfg.mails:
72
+ logger.warning("No MAILS configured -- skipping health digest")
73
+ return
74
+
75
+ hours = cfg.report_interval_hours
76
+ errors = store.get_recent_errors(hours)
77
+ fixes = store.get_recent_fixes(hours)
78
+ open_prs = store.get_open_prs()
79
+
80
+ stats = {
81
+ "errors": len(errors),
82
+ "applied": sum(1 for f in fixes if f["status"] == "applied"),
83
+ "failed": sum(1 for f in fixes if f["status"] == "failed"),
84
+ "skipped": sum(1 for f in fixes if f["status"] == "skipped"),
85
+ }
86
+ for pr in open_prs:
87
+ pr["age"] = _age(pr.get("timestamp", ""))
88
+
89
+ html = _HEALTH_TEMPLATE.render(
90
+ generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
91
+ hours=hours, stats=stats, open_prs=open_prs,
92
+ )
93
+ subject = (
94
+ f"[Sentinel] Health Digest -- "
95
+ f"{stats['applied']} fixed, {stats['failed']} failed, "
96
+ f"{stats['errors']} detected"
97
+ )
98
+ _send_email(cfg, subject, html)
99
+ store.record_report(recipient_count=len(cfg.mails), summary=stats)
100
+ logger.info("Health digest sent to %d recipient(s)", len(cfg.mails))
101
+
102
+
103
+ # ---- Shared helpers ----------------------------------------------------------
104
+
105
+ def _send_email(cfg: SentinelConfig, subject: str, html: str):
106
+ msg = MIMEMultipart("alternative")
107
+ msg["Subject"] = subject
108
+ msg["From"] = cfg.smtp_user
109
+ msg["To"] = ", ".join(cfg.mails)
110
+ msg.attach(MIMEText(html, "html"))
111
+ if cfg.smtp_host.lower() == "ses":
112
+ _send_ses(cfg, msg)
113
+ else:
114
+ _send_smtp(cfg, msg)
115
+
116
+
117
+ def _send_smtp(cfg: SentinelConfig, msg: MIMEMultipart):
118
+ with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as smtp:
119
+ smtp.ehlo()
120
+ smtp.starttls()
121
+ smtp.login(cfg.smtp_user, cfg.smtp_password)
122
+ smtp.sendmail(cfg.smtp_user, cfg.mails, msg.as_string())
123
+
124
+
125
+ def _send_ses(cfg: SentinelConfig, msg: MIMEMultipart):
126
+ _send_smtp(cfg, msg)
127
+
128
+
129
+ def _age(ts_str: str) -> str:
130
+ try:
131
+ ts = datetime.fromisoformat(ts_str)
132
+ if ts.tzinfo is None:
133
+ ts = ts.replace(tzinfo=timezone.utc)
134
+ delta = datetime.now(timezone.utc) - ts
135
+ hours = int(delta.total_seconds() // 3600)
136
+ return f"{int(delta.total_seconds() // 60)}m" if hours < 1 else f"{hours}h"
137
+ except Exception:
138
+ return "?"
@@ -1,31 +1,32 @@
1
- # Sentinel master config
2
-
3
- # Schedule
4
- POLL_INTERVAL_SECONDS=120
5
-
6
- # Email reporting
7
- SMTP_HOST=smtp.gmail.com
8
- SMTP_PORT=587
9
- SMTP_USER=sentinel@yourdomain.com
10
- SMTP_PASSWORD=<app-password>
11
- REPORT_RECIPIENTS=huy@yourdomain.com
12
- REPORT_INTERVAL_HOURS=1
13
-
14
- # State DB
15
- STATE_DB=./sentinel.db
16
-
17
- # Workspace
18
- WORKSPACE_DIR=./workspace
19
-
20
- # Claude Code binary path
21
- CLAUDE_CODE_BIN=claude
22
-
23
- # GitHub token (required for opening PRs when AUTO_PUBLISH=false)
24
- GITHUB_TOKEN=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
25
-
26
- # Fix confidence threshold (0.0 - 1.0); fixes below this are skipped
27
- FIX_CONFIDENCE_THRESHOLD=0.7
28
-
29
- # Rolling log retention window — fetched logs older than this are pruned
30
- # Claude Code reads the full window for context when generating fixes
31
- LOG_RETENTION_HOURS=48
1
+ # Sentinel master config
2
+
3
+ # Schedule
4
+ POLL_INTERVAL_SECONDS=120
5
+
6
+ # Email reporting
7
+ SMTP_HOST=smtp.gmail.com
8
+ SMTP_PORT=587
9
+ SMTP_USER=sentinel@yourdomain.com
10
+ SMTP_PASSWORD=<app-password>
11
+ MAILS=huy@yourdomain.com
12
+ SEND_HEALTH=disabled
13
+ REPORT_INTERVAL_HOURS=1
14
+
15
+ # State DB
16
+ STATE_DB=./sentinel.db
17
+
18
+ # Workspace
19
+ WORKSPACE_DIR=./workspace
20
+
21
+ # Claude Code binary path
22
+ CLAUDE_CODE_BIN=claude
23
+
24
+ # GitHub token (required for opening PRs when AUTO_PUBLISH=false)
25
+ GITHUB_TOKEN=<github-pat>
26
+
27
+ # Fix confidence threshold (0.0 - 1.0); fixes below this are skipped
28
+ FIX_CONFIDENCE_THRESHOLD=0.7
29
+
30
+ # Rolling log retention window fetched logs older than this are pruned
31
+ # Claude Code reads the full window for context when generating fixes
32
+ LOG_RETENTION_HOURS=48