@misterhuydo/sentinel 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/python/sentinel/__pycache__/config_loader.cpython-313.pyc +0 -0
- package/python/sentinel/__pycache__/main.cpython-313.pyc +0 -0
- package/python/sentinel/__pycache__/reporter.cpython-313.pyc +0 -0
- package/python/sentinel/config_loader.py +4 -2
- package/python/sentinel/main.py +237 -223
- package/python/sentinel/reporter.py +138 -173
- package/templates/sentinel.properties +32 -31
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -41,8 +41,9 @@ class SentinelConfig:
|
|
|
41
41
|
smtp_port: int = 587
|
|
42
42
|
smtp_user: str = ""
|
|
43
43
|
smtp_password: str = ""
|
|
44
|
-
|
|
44
|
+
mails: list[str] = field(default_factory=list)
|
|
45
45
|
report_interval_hours: int = 6
|
|
46
|
+
send_health: bool = False
|
|
46
47
|
state_db: str = "./sentinel.db"
|
|
47
48
|
workspace_dir: str = "./workspace"
|
|
48
49
|
claude_code_bin: str = "claude"
|
|
@@ -114,8 +115,9 @@ class ConfigLoader:
|
|
|
114
115
|
c.smtp_port = int(d.get("SMTP_PORT", 587))
|
|
115
116
|
c.smtp_user = d.get("SMTP_USER", "")
|
|
116
117
|
c.smtp_password = d.get("SMTP_PASSWORD", "")
|
|
117
|
-
c.
|
|
118
|
+
c.mails = _csv(d.get("MAILS", ""))
|
|
118
119
|
c.report_interval_hours = int(d.get("REPORT_INTERVAL_HOURS", 6))
|
|
120
|
+
c.send_health = d.get("SEND_HEALTH", "disabled").lower() == "enabled"
|
|
119
121
|
c.state_db = d.get("STATE_DB", "./sentinel.db")
|
|
120
122
|
c.workspace_dir = d.get("WORKSPACE_DIR", "./workspace")
|
|
121
123
|
c.claude_code_bin = d.get("CLAUDE_CODE_BIN", "claude")
|
package/python/sentinel/main.py
CHANGED
|
@@ -1,223 +1,237 @@
|
|
|
1
|
-
"""
|
|
2
|
-
main.py — Sentinel entry point and watch loop.
|
|
3
|
-
|
|
4
|
-
Usage:
|
|
5
|
-
python -m sentinel.main # run watch loop
|
|
6
|
-
python -m sentinel.main --init # first-time setup
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import argparse
|
|
10
|
-
import asyncio
|
|
11
|
-
import logging
|
|
12
|
-
import signal
|
|
13
|
-
import subprocess
|
|
14
|
-
import sys
|
|
15
|
-
from datetime import datetime, timezone
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
from .cairn_client import ensure_installed as cairn_installed, index_repo
|
|
19
|
-
from .config_loader import ConfigLoader
|
|
20
|
-
from .fix_engine import generate_fix
|
|
21
|
-
from .git_manager import apply_and_commit, publish
|
|
22
|
-
from .cicd_trigger import trigger as cicd_trigger
|
|
23
|
-
from .log_fetcher import fetch_all
|
|
24
|
-
from .log_parser import parse_all, ErrorEvent
|
|
25
|
-
from .repo_router import route
|
|
26
|
-
from .reporter import build_and_send
|
|
27
|
-
from .state_store import StateStore
|
|
28
|
-
|
|
29
|
-
logging.basicConfig(
|
|
30
|
-
level=logging.INFO,
|
|
31
|
-
format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
|
|
32
|
-
handlers=[
|
|
33
|
-
logging.StreamHandler(sys.stdout),
|
|
34
|
-
logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
|
|
35
|
-
],
|
|
36
|
-
)
|
|
37
|
-
logger = logging.getLogger("sentinel")
|
|
38
|
-
|
|
39
|
-
_report_requested = False
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _on_sigusr1(*_):
|
|
43
|
-
global _report_requested
|
|
44
|
-
_report_requested = True
|
|
45
|
-
logger.info("SIGUSR1 received — health report queued")
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def _register_signals():
|
|
49
|
-
try:
|
|
50
|
-
signal.signal(signal.SIGUSR1, _on_sigusr1)
|
|
51
|
-
except (OSError, AttributeError):
|
|
52
|
-
pass
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
# ── Fix pipeline ──────────────────────────────────────────────────────────────
|
|
56
|
-
|
|
57
|
-
async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
|
|
58
|
-
sentinel = cfg_loader.sentinel
|
|
59
|
-
|
|
60
|
-
repo = route(event, cfg_loader.repos)
|
|
61
|
-
if not repo:
|
|
62
|
-
return
|
|
63
|
-
|
|
64
|
-
if Path("SENTINEL_PAUSE").exists():
|
|
65
|
-
logger.info("SENTINEL_PAUSE present — fix activity halted")
|
|
66
|
-
return
|
|
67
|
-
|
|
68
|
-
if event.is_infra_issue:
|
|
69
|
-
logger.info("Infra issue for %s — log only", event.fingerprint)
|
|
70
|
-
store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
|
|
71
|
-
return
|
|
72
|
-
|
|
73
|
-
if event.severity == "CRITICAL" and repo.auto_publish:
|
|
74
|
-
logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
|
|
75
|
-
store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
commit_status
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
1
|
+
"""
|
|
2
|
+
main.py — Sentinel entry point and watch loop.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python -m sentinel.main # run watch loop
|
|
6
|
+
python -m sentinel.main --init # first-time setup
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
import signal
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from .cairn_client import ensure_installed as cairn_installed, index_repo
|
|
19
|
+
from .config_loader import ConfigLoader
|
|
20
|
+
from .fix_engine import generate_fix
|
|
21
|
+
from .git_manager import apply_and_commit, publish
|
|
22
|
+
from .cicd_trigger import trigger as cicd_trigger
|
|
23
|
+
from .log_fetcher import fetch_all
|
|
24
|
+
from .log_parser import parse_all, ErrorEvent
|
|
25
|
+
from .repo_router import route
|
|
26
|
+
from .reporter import build_and_send, send_fix_notification
|
|
27
|
+
from .state_store import StateStore
|
|
28
|
+
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level=logging.INFO,
|
|
31
|
+
format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
|
|
32
|
+
handlers=[
|
|
33
|
+
logging.StreamHandler(sys.stdout),
|
|
34
|
+
logging.FileHandler("logs/sentinel.log", encoding="utf-8"),
|
|
35
|
+
],
|
|
36
|
+
)
|
|
37
|
+
logger = logging.getLogger("sentinel")
|
|
38
|
+
|
|
39
|
+
_report_requested = False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _on_sigusr1(*_):
|
|
43
|
+
global _report_requested
|
|
44
|
+
_report_requested = True
|
|
45
|
+
logger.info("SIGUSR1 received — health report queued")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _register_signals():
|
|
49
|
+
try:
|
|
50
|
+
signal.signal(signal.SIGUSR1, _on_sigusr1)
|
|
51
|
+
except (OSError, AttributeError):
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ── Fix pipeline ──────────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
|
|
58
|
+
sentinel = cfg_loader.sentinel
|
|
59
|
+
|
|
60
|
+
repo = route(event, cfg_loader.repos)
|
|
61
|
+
if not repo:
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
if Path("SENTINEL_PAUSE").exists():
|
|
65
|
+
logger.info("SENTINEL_PAUSE present — fix activity halted")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
if event.is_infra_issue:
|
|
69
|
+
logger.info("Infra issue for %s — log only", event.fingerprint)
|
|
70
|
+
store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
if event.severity == "CRITICAL" and repo.auto_publish:
|
|
74
|
+
logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
|
|
75
|
+
store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
if store.fix_attempted_recently(event.fingerprint, hours=24):
|
|
79
|
+
logger.debug("Fix already attempted recently for %s", event.fingerprint)
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
patches_dir = Path(sentinel.workspace_dir) / "patches"
|
|
83
|
+
status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
|
|
84
|
+
|
|
85
|
+
if status != "patch" or patch_path is None:
|
|
86
|
+
store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed", repo_name=repo.repo_name)
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
|
|
90
|
+
if commit_status != "committed":
|
|
91
|
+
store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
branch, pr_url = publish(event, repo, sentinel, commit_hash)
|
|
95
|
+
store.record_fix(
|
|
96
|
+
event.fingerprint,
|
|
97
|
+
"applied" if repo.auto_publish else "pending",
|
|
98
|
+
patch_path=str(patch_path),
|
|
99
|
+
commit_hash=commit_hash,
|
|
100
|
+
branch=branch,
|
|
101
|
+
pr_url=pr_url,
|
|
102
|
+
repo_name=repo.repo_name,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
send_fix_notification(sentinel, {
|
|
106
|
+
"source": event.source,
|
|
107
|
+
"severity": event.severity,
|
|
108
|
+
"fingerprint": event.fingerprint,
|
|
109
|
+
"first_seen": str(event.timestamp),
|
|
110
|
+
"message": event.message,
|
|
111
|
+
"stack_trace": getattr(event, "stack_trace", ""),
|
|
112
|
+
"repo_name": repo.repo_name,
|
|
113
|
+
"commit_hash": commit_hash,
|
|
114
|
+
"branch": branch,
|
|
115
|
+
"pr_url": pr_url,
|
|
116
|
+
"auto_publish": repo.auto_publish,
|
|
117
|
+
"files_changed": [],
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
if repo.auto_publish:
|
|
121
|
+
cicd_trigger(repo, store, event.fingerprint)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ── Poll cycle ────────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
|
|
127
|
+
global _report_requested
|
|
128
|
+
|
|
129
|
+
sources = list(cfg_loader.log_sources.values())
|
|
130
|
+
if not sources:
|
|
131
|
+
logger.warning("No log-configs found")
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
logger.info("Fetching logs from %d source(s)...", len(sources))
|
|
135
|
+
fetched = await fetch_all(sources, cfg_loader.sentinel)
|
|
136
|
+
|
|
137
|
+
events = parse_all(fetched, cfg_loader.log_sources)
|
|
138
|
+
logger.info("Parsed %d error/warn events", len(events))
|
|
139
|
+
|
|
140
|
+
new_events = []
|
|
141
|
+
for event in events:
|
|
142
|
+
store.record_error(event.fingerprint, event.source, event.message)
|
|
143
|
+
if not store.fix_attempted_recently(event.fingerprint):
|
|
144
|
+
new_events.append(event)
|
|
145
|
+
|
|
146
|
+
logger.info("%d new event(s) to process", len(new_events))
|
|
147
|
+
await asyncio.gather(*[_handle_error(e, cfg_loader, store) for e in new_events], return_exceptions=True)
|
|
148
|
+
|
|
149
|
+
if cfg_loader.sentinel.send_health and (_report_requested or _report_due(cfg_loader, store)):
|
|
150
|
+
_report_requested = False
|
|
151
|
+
logger.info("Sending health digest...")
|
|
152
|
+
build_and_send(cfg_loader.sentinel, store)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
|
|
156
|
+
last = store.last_report_time()
|
|
157
|
+
if last is None:
|
|
158
|
+
return True
|
|
159
|
+
elapsed = (datetime.now(timezone.utc) - last).total_seconds()
|
|
160
|
+
return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ── Init ──────────────────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
def run_init(cfg_loader: ConfigLoader):
|
|
166
|
+
sentinel = cfg_loader.sentinel
|
|
167
|
+
logger.info("=== Sentinel --init ===")
|
|
168
|
+
|
|
169
|
+
if not cairn_installed():
|
|
170
|
+
logger.error("Cairn not installed. Run: npm install -g @misterhuydo/cairn-mcp")
|
|
171
|
+
|
|
172
|
+
for name, repo in cfg_loader.repos.items():
|
|
173
|
+
local = Path(repo.local_path)
|
|
174
|
+
if not local.exists():
|
|
175
|
+
logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
|
|
176
|
+
r = subprocess.run(["git", "clone", repo.repo_url, str(local)], capture_output=True, text=True)
|
|
177
|
+
if r.returncode != 0:
|
|
178
|
+
logger.error("Clone failed for %s: %s", name, r.stderr)
|
|
179
|
+
continue
|
|
180
|
+
index_repo(repo)
|
|
181
|
+
|
|
182
|
+
for src_name, src in cfg_loader.log_sources.items():
|
|
183
|
+
if src.source_type == "ssh" and src.hosts:
|
|
184
|
+
host = src.hosts[0]
|
|
185
|
+
logger.info("Testing SSH to %s (%s)...", src_name, host)
|
|
186
|
+
r = subprocess.run(
|
|
187
|
+
["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
|
|
188
|
+
"-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
|
|
189
|
+
capture_output=True, text=True, timeout=15,
|
|
190
|
+
)
|
|
191
|
+
logger.info(" SSH %s: %s", host, "OK" if r.returncode == 0 else f"FAILED — {r.stderr.strip()}")
|
|
192
|
+
|
|
193
|
+
logger.info("Sending test email...")
|
|
194
|
+
try:
|
|
195
|
+
build_and_send(sentinel, StateStore(sentinel.state_db))
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error("Test email failed: %s", e)
|
|
198
|
+
|
|
199
|
+
logger.info("=== Init complete ===")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
|
|
205
|
+
interval = cfg_loader.sentinel.poll_interval_seconds
|
|
206
|
+
logger.info("Sentinel starting — poll interval: %ds, repos: %s", interval, list(cfg_loader.repos.keys()))
|
|
207
|
+
while True:
|
|
208
|
+
try:
|
|
209
|
+
await poll_cycle(cfg_loader, store)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.exception("Unhandled error in poll cycle: %s", e)
|
|
212
|
+
await asyncio.sleep(interval)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def main():
|
|
216
|
+
Path("logs").mkdir(exist_ok=True)
|
|
217
|
+
Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
|
|
218
|
+
Path("workspace/patches").mkdir(parents=True, exist_ok=True)
|
|
219
|
+
|
|
220
|
+
parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
|
|
221
|
+
parser.add_argument("--init", action="store_true", help="First-time setup")
|
|
222
|
+
parser.add_argument("--config", default="./config", help="Config directory path")
|
|
223
|
+
args = parser.parse_args()
|
|
224
|
+
|
|
225
|
+
cfg_loader = ConfigLoader(config_dir=args.config)
|
|
226
|
+
store = StateStore(cfg_loader.sentinel.state_db)
|
|
227
|
+
_register_signals()
|
|
228
|
+
|
|
229
|
+
if args.init:
|
|
230
|
+
run_init(cfg_loader)
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
asyncio.run(run_loop(cfg_loader, store))
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == "__main__":
|
|
237
|
+
main()
|
|
@@ -1,173 +1,138 @@
|
|
|
1
|
-
"""
|
|
2
|
-
reporter.py
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
from
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
body
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _send_email(cfg: SentinelConfig, html: str, stats: dict):
|
|
143
|
-
subject = (
|
|
144
|
-
f"[Sentinel] Health Report — "
|
|
145
|
-
f"{stats['applied']} fixed, {stats['failed']} failed, "
|
|
146
|
-
f"{stats['errors']} errors detected"
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
msg = MIMEMultipart("alternative")
|
|
150
|
-
msg["Subject"] = subject
|
|
151
|
-
msg["From"] = cfg.smtp_user
|
|
152
|
-
msg["To"] = ", ".join(cfg.report_recipients)
|
|
153
|
-
msg.attach(MIMEText(html, "html"))
|
|
154
|
-
|
|
155
|
-
if cfg.smtp_host.lower() == "ses":
|
|
156
|
-
_send_ses(cfg, msg)
|
|
157
|
-
else:
|
|
158
|
-
_send_smtp(cfg, msg)
|
|
159
|
-
logger.info("Health report sent to %d recipient(s)", len(cfg.report_recipients))
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def _send_smtp(cfg: SentinelConfig, msg: MIMEMultipart):
|
|
163
|
-
with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as smtp:
|
|
164
|
-
smtp.ehlo()
|
|
165
|
-
smtp.starttls()
|
|
166
|
-
smtp.login(cfg.smtp_user, cfg.smtp_password)
|
|
167
|
-
smtp.sendmail(cfg.smtp_user, cfg.report_recipients, msg.as_string())
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def _send_ses(cfg: SentinelConfig, msg: MIMEMultipart):
|
|
171
|
-
# AWS SES via SMTP endpoint — same as SMTP with different host
|
|
172
|
-
# Set SMTP_HOST=email-smtp.us-east-1.amazonaws.com and use SES SMTP credentials
|
|
173
|
-
_send_smtp(cfg, msg)
|
|
1
|
+
"""
|
|
2
|
+
reporter.py -- Email notifications for Sentinel.
|
|
3
|
+
|
|
4
|
+
Two modes:
|
|
5
|
+
1. Per-fix notification -- sent immediately after every fix (always on).
|
|
6
|
+
2. Health digest -- periodic summary, only if SEND_HEALTH=enabled.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import smtplib
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from email.mime.multipart import MIMEMultipart
|
|
13
|
+
from email.mime.text import MIMEText
|
|
14
|
+
|
|
15
|
+
from jinja2 import Template
|
|
16
|
+
|
|
17
|
+
from .config_loader import SentinelConfig
|
|
18
|
+
from .state_store import StateStore
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# ---- Templates ---------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_FIX_TEMPLATE = Template('<!DOCTYPE html><html><head><meta charset="utf-8">\n<style>\n body{font-family:Arial,sans-serif;font-size:14px;color:#222}\n h2{color:#1a73e8;margin-bottom:4px}\n h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}\n table{border-collapse:collapse;width:100%;margin-bottom:16px}\n th{background:#f1f3f4;text-align:left;padding:6px 10px}\n td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}\n .label{font-weight:bold;width:160px}\n .ok{color:#2e7d32;font-weight:bold}\n .mono{font-family:monospace;font-size:12px}\n pre{background:#f8f8f8;border:1px solid #ddd;padding:10px;font-size:12px;white-space:pre-wrap}\n .badge-a{background:#2e7d32;color:#fff;padding:2px 8px;border-radius:4px}\n .badge-p{background:#e65100;color:#fff;padding:2px 8px;border-radius:4px}\n</style></head><body>\n<h2>Sentinel Fix Report</h2>\n<p>\n <span class="{{ \'badge-a\' if auto_publish else \'badge-p\' }}">\n {{ \'PUSHED TO \' + branch|upper if auto_publish else \'PENDING REVIEW\' }}\n </span>\n <strong>{{ repo_name }}</strong> · {{ generated_at }}\n</p>\n<h3>Error Detected</h3>\n<table>\n <tr><td class="label">Service</td><td class="mono">{{ source }}</td></tr>\n <tr><td class="label">Severity</td><td class="mono">{{ severity }}</td></tr>\n <tr><td class="label">Fingerprint</td><td class="mono">{{ fingerprint }}</td></tr>\n <tr><td class="label">First seen</td><td>{{ first_seen }}</td></tr>\n <tr><td class="label">Message</td><td class="mono">{{ message }}</td></tr>\n</table>\n{% if stack_trace %}<h3>Stack Trace</h3><pre>{{ stack_trace }}</pre>{% endif %}\n<h3>Fix Applied</h3>\n<table>\n <tr><td class="label">Repository</td><td class="mono">{{ repo_name }}</td></tr>\n <tr><td class="label">Commit</td><td class="mono">{{ commit_hash }}</td></tr>\n <tr><td class="label">Branch</td><td class="mono">{{ branch }}</td></tr>\n {% if pr_url %}\n <tr><td class="label">Pull Request</td>\n <td><a href="{{ pr_url }}">{{ pr_url }}</a> — review and merge to apply</td></tr>\n {% else %}\n <tr><td class="label">Status</td>\n <td class="ok">Pushed directly to {{ branch }}</td></tr>\n {% endif %}\n {% if files_changed %}\n <tr><td class="label">Files changed</td>\n <td class="mono">{{ files_changed | join(\'<br>\') }}</td></tr>\n {% endif %}\n</table>\n<hr><small>Sentinel — Autonomous DevOps Agent</small>\n</body></html>\n')
|
|
25
|
+
|
|
26
|
+
_HEALTH_TEMPLATE = Template('<!DOCTYPE html><html><head><meta charset="utf-8">\n<style>\n body{font-family:Arial,sans-serif;font-size:14px;color:#222}\n h2{color:#1a73e8}\n h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}\n table{border-collapse:collapse;width:100%;margin-bottom:16px}\n th{background:#f1f3f4;text-align:left;padding:6px 10px}\n td{padding:5px 10px;border-bottom:1px solid #eee}\n .ok{color:#2e7d32}.fail{color:#c62828}.warn{color:#e65100}\n .mono{font-family:monospace;font-size:12px}\n</style></head><body>\n<h2>Sentinel Health Digest</h2>\n<p>Generated: <strong>{{ generated_at }}</strong></p>\n<h3>Summary (last {{ hours }}h)</h3>\n<table>\n <tr><th>Metric</th><th>Count</th></tr>\n <tr><td>Errors detected</td><td>{{ stats.errors }}</td></tr>\n <tr><td>Fixes applied</td><td class="ok">{{ stats.applied }}</td></tr>\n <tr><td>Fixes failed</td><td class="fail">{{ stats.failed }}</td></tr>\n <tr><td>Skipped</td><td class="warn">{{ stats.skipped }}</td></tr>\n</table>\n{% if open_prs %}\n<h3>Pending Review (AUTO_PUBLISH=false)</h3>\n<table>\n <tr><th>Repo</th><th>Branch</th><th>PR</th><th>Age</th></tr>\n {% for pr in open_prs %}\n <tr>\n <td>{{ pr.repo_name }}</td>\n <td class="mono">{{ pr.branch }}</td>\n <td><a href="{{ pr.pr_url }}">{{ pr.pr_url }}</a></td>\n <td>{{ pr.age }}</td>\n </tr>\n {% endfor %}\n</table>\n{% endif %}\n<hr><small>Sentinel — Autonomous DevOps Agent</small>\n</body></html>\n')
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ---- Per-fix notification ----------------------------------------------------
|
|
30
|
+
|
|
31
|
+
def send_fix_notification(cfg: SentinelConfig, fix: dict):
|
|
32
|
+
"""
|
|
33
|
+
Send an immediate email after a fix is applied or a PR is opened.
|
|
34
|
+
|
|
35
|
+
fix dict keys:
|
|
36
|
+
source, severity, fingerprint, first_seen, message, stack_trace,
|
|
37
|
+
repo_name, commit_hash, branch, pr_url, auto_publish, files_changed
|
|
38
|
+
"""
|
|
39
|
+
if not cfg.mails:
|
|
40
|
+
logger.warning("No MAILS configured -- skipping fix notification")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
auto_publish = fix.get("auto_publish", False)
|
|
44
|
+
source = fix.get("source", "unknown")
|
|
45
|
+
verb = "fix" if auto_publish else "PR"
|
|
46
|
+
subject = f"[Sentinel] {verb}({source}): {fix.get('message', '')[:80]}"
|
|
47
|
+
|
|
48
|
+
html = _FIX_TEMPLATE.render(
|
|
49
|
+
generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
|
|
50
|
+
auto_publish=auto_publish,
|
|
51
|
+
repo_name=fix.get("repo_name", "unknown"),
|
|
52
|
+
source=source,
|
|
53
|
+
severity=fix.get("severity", "ERROR"),
|
|
54
|
+
fingerprint=fix.get("fingerprint", ""),
|
|
55
|
+
first_seen=fix.get("first_seen", ""),
|
|
56
|
+
message=fix.get("message", ""),
|
|
57
|
+
stack_trace=fix.get("stack_trace", ""),
|
|
58
|
+
commit_hash=fix.get("commit_hash", ""),
|
|
59
|
+
branch=fix.get("branch", "unknown"),
|
|
60
|
+
pr_url=fix.get("pr_url") or "",
|
|
61
|
+
files_changed=fix.get("files_changed") or [],
|
|
62
|
+
)
|
|
63
|
+
_send_email(cfg, subject, html)
|
|
64
|
+
logger.info("Fix notification sent to %d recipient(s)", len(cfg.mails))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---- Health digest -----------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
def build_and_send(cfg: SentinelConfig, store: StateStore):
|
|
70
|
+
"""Send periodic health digest. Only called if SEND_HEALTH=enabled."""
|
|
71
|
+
if not cfg.mails:
|
|
72
|
+
logger.warning("No MAILS configured -- skipping health digest")
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
hours = cfg.report_interval_hours
|
|
76
|
+
errors = store.get_recent_errors(hours)
|
|
77
|
+
fixes = store.get_recent_fixes(hours)
|
|
78
|
+
open_prs = store.get_open_prs()
|
|
79
|
+
|
|
80
|
+
stats = {
|
|
81
|
+
"errors": len(errors),
|
|
82
|
+
"applied": sum(1 for f in fixes if f["status"] == "applied"),
|
|
83
|
+
"failed": sum(1 for f in fixes if f["status"] == "failed"),
|
|
84
|
+
"skipped": sum(1 for f in fixes if f["status"] == "skipped"),
|
|
85
|
+
}
|
|
86
|
+
for pr in open_prs:
|
|
87
|
+
pr["age"] = _age(pr.get("timestamp", ""))
|
|
88
|
+
|
|
89
|
+
html = _HEALTH_TEMPLATE.render(
|
|
90
|
+
generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
|
|
91
|
+
hours=hours, stats=stats, open_prs=open_prs,
|
|
92
|
+
)
|
|
93
|
+
subject = (
|
|
94
|
+
f"[Sentinel] Health Digest -- "
|
|
95
|
+
f"{stats['applied']} fixed, {stats['failed']} failed, "
|
|
96
|
+
f"{stats['errors']} detected"
|
|
97
|
+
)
|
|
98
|
+
_send_email(cfg, subject, html)
|
|
99
|
+
store.record_report(recipient_count=len(cfg.mails), summary=stats)
|
|
100
|
+
logger.info("Health digest sent to %d recipient(s)", len(cfg.mails))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---- Shared helpers ----------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
def _send_email(cfg: SentinelConfig, subject: str, html: str):
|
|
106
|
+
msg = MIMEMultipart("alternative")
|
|
107
|
+
msg["Subject"] = subject
|
|
108
|
+
msg["From"] = cfg.smtp_user
|
|
109
|
+
msg["To"] = ", ".join(cfg.mails)
|
|
110
|
+
msg.attach(MIMEText(html, "html"))
|
|
111
|
+
if cfg.smtp_host.lower() == "ses":
|
|
112
|
+
_send_ses(cfg, msg)
|
|
113
|
+
else:
|
|
114
|
+
_send_smtp(cfg, msg)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _send_smtp(cfg: SentinelConfig, msg: MIMEMultipart):
|
|
118
|
+
with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as smtp:
|
|
119
|
+
smtp.ehlo()
|
|
120
|
+
smtp.starttls()
|
|
121
|
+
smtp.login(cfg.smtp_user, cfg.smtp_password)
|
|
122
|
+
smtp.sendmail(cfg.smtp_user, cfg.mails, msg.as_string())
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _send_ses(cfg: SentinelConfig, msg: MIMEMultipart):
|
|
126
|
+
_send_smtp(cfg, msg)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _age(ts_str: str) -> str:
|
|
130
|
+
try:
|
|
131
|
+
ts = datetime.fromisoformat(ts_str)
|
|
132
|
+
if ts.tzinfo is None:
|
|
133
|
+
ts = ts.replace(tzinfo=timezone.utc)
|
|
134
|
+
delta = datetime.now(timezone.utc) - ts
|
|
135
|
+
hours = int(delta.total_seconds() // 3600)
|
|
136
|
+
return f"{int(delta.total_seconds() // 60)}m" if hours < 1 else f"{hours}h"
|
|
137
|
+
except Exception:
|
|
138
|
+
return "?"
|
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
# Sentinel master config
|
|
2
|
-
|
|
3
|
-
# Schedule
|
|
4
|
-
POLL_INTERVAL_SECONDS=120
|
|
5
|
-
|
|
6
|
-
# Email reporting
|
|
7
|
-
SMTP_HOST=smtp.gmail.com
|
|
8
|
-
SMTP_PORT=587
|
|
9
|
-
SMTP_USER=sentinel@yourdomain.com
|
|
10
|
-
SMTP_PASSWORD=<app-password>
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
#
|
|
31
|
-
|
|
1
|
+
# Sentinel master config
|
|
2
|
+
|
|
3
|
+
# Schedule
|
|
4
|
+
POLL_INTERVAL_SECONDS=120
|
|
5
|
+
|
|
6
|
+
# Email reporting
|
|
7
|
+
SMTP_HOST=smtp.gmail.com
|
|
8
|
+
SMTP_PORT=587
|
|
9
|
+
SMTP_USER=sentinel@yourdomain.com
|
|
10
|
+
SMTP_PASSWORD=<app-password>
|
|
11
|
+
MAILS=huy@yourdomain.com
|
|
12
|
+
SEND_HEALTH=disabled
|
|
13
|
+
REPORT_INTERVAL_HOURS=1
|
|
14
|
+
|
|
15
|
+
# State DB
|
|
16
|
+
STATE_DB=./sentinel.db
|
|
17
|
+
|
|
18
|
+
# Workspace
|
|
19
|
+
WORKSPACE_DIR=./workspace
|
|
20
|
+
|
|
21
|
+
# Claude Code binary path
|
|
22
|
+
CLAUDE_CODE_BIN=claude
|
|
23
|
+
|
|
24
|
+
# GitHub token (required for opening PRs when AUTO_PUBLISH=false)
|
|
25
|
+
GITHUB_TOKEN=<github-pat>
|
|
26
|
+
|
|
27
|
+
# Fix confidence threshold (0.0 - 1.0); fixes below this are skipped
|
|
28
|
+
FIX_CONFIDENCE_THRESHOLD=0.7
|
|
29
|
+
|
|
30
|
+
# Rolling log retention window — fetched logs older than this are pruned
|
|
31
|
+
# Claude Code reads the full window for context when generating fixes
|
|
32
|
+
LOG_RETENTION_HOURS=48
|