@misterhuydo/sentinel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ """
2
+ reporter.py — Build and send HTML health-report emails.
3
+
4
+ Scheduled every REPORT_INTERVAL_HOURS or triggered by SIGUSR1.
5
+ """
6
+
7
+ import logging
8
+ import smtplib
9
+ from datetime import datetime, timezone
10
+ from email.mime.multipart import MIMEMultipart
11
+ from email.mime.text import MIMEText
12
+
13
+ from jinja2 import Template
14
+
15
+ from .config_loader import SentinelConfig
16
+ from .state_store import StateStore
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ _HTML_TEMPLATE = Template("""\
21
+ <!DOCTYPE html>
22
+ <html>
23
+ <head>
24
+ <meta charset="utf-8">
25
+ <style>
26
+ body { font-family: Arial, sans-serif; font-size: 14px; color: #222; }
27
+ h2 { color: #1a73e8; }
28
+ h3 { color: #444; border-bottom: 1px solid #ddd; padding-bottom: 4px; }
29
+ table { border-collapse: collapse; width: 100%; margin-bottom: 16px; }
30
+ th { background: #f1f3f4; text-align: left; padding: 6px 10px; }
31
+ td { padding: 5px 10px; border-bottom: 1px solid #eee; }
32
+ .ok { color: #2e7d32; }
33
+ .fail { color: #c62828; }
34
+ .warn { color: #e65100; }
35
+ .pr-link { font-weight: bold; }
36
+ .mono { font-family: monospace; font-size: 12px; }
37
+ </style>
38
+ </head>
39
+ <body>
40
+ <h2>🤖 Sentinel Health Report</h2>
41
+ <p>Generated: <strong>{{ generated_at }}</strong></p>
42
+
43
+ <h3>Summary (last {{ hours }}h)</h3>
44
+ <table>
45
+ <tr><th>Metric</th><th>Count</th></tr>
46
+ <tr><td>Errors detected</td><td>{{ stats.errors }}</td></tr>
47
+ <tr><td>Fixes applied</td><td class="ok">{{ stats.applied }}</td></tr>
48
+ <tr><td>Fixes failed</td><td class="fail">{{ stats.failed }}</td></tr>
49
+ <tr><td>Skipped</td><td class="warn">{{ stats.skipped }}</td></tr>
50
+ </table>
51
+
52
+ {% if open_prs %}
53
+ <h3>⏳ Pending Review (AUTO_PUBLISH=false)</h3>
54
+ <p>The following fixes are waiting for admin approval. Review and merge on GitHub:</p>
55
+ <table>
56
+ <tr><th>Fingerprint</th><th>Branch</th><th>PR</th><th>Age</th></tr>
57
+ {% for pr in open_prs %}
58
+ <tr>
59
+ <td class="mono">{{ pr.fingerprint[:8] }}</td>
60
+ <td class="mono">{{ pr.branch }}</td>
61
+ <td><a class="pr-link" href="{{ pr.pr_url }}">{{ pr.pr_url }}</a></td>
62
+ <td>{{ pr.age }}</td>
63
+ </tr>
64
+ {% endfor %}
65
+ </table>
66
+ {% endif %}
67
+
68
+ {% if recent_fixes %}
69
+ <h3>Recent Fix Activity</h3>
70
+ <table>
71
+ <tr><th>Time</th><th>Fingerprint</th><th>Status</th><th>Commit</th></tr>
72
+ {% for fix in recent_fixes %}
73
+ <tr>
74
+ <td>{{ fix.timestamp }}</td>
75
+ <td class="mono">{{ fix.fingerprint[:8] }}</td>
76
+ <td class="{{ 'ok' if fix.status == 'applied' else 'fail' if fix.status == 'failed' else 'warn' }}">
77
+ {{ fix.status }}
78
+ </td>
79
+ <td class="mono">{{ fix.commit_hash[:8] if fix.commit_hash else '-' }}</td>
80
+ </tr>
81
+ {% endfor %}
82
+ </table>
83
+ {% endif %}
84
+
85
+ <hr>
86
+ <small>Sentinel — Autonomous DevOps Agent</small>
87
+ </body>
88
+ </html>
89
+ """)
90
+
91
+
92
+ def _age(ts_str: str) -> str:
93
+ try:
94
+ ts = datetime.fromisoformat(ts_str)
95
+ if ts.tzinfo is None:
96
+ ts = ts.replace(tzinfo=timezone.utc)
97
+ delta = datetime.now(timezone.utc) - ts
98
+ hours = int(delta.total_seconds() // 3600)
99
+ if hours < 1:
100
+ return f"{int(delta.total_seconds() // 60)}m"
101
+ return f"{hours}h"
102
+ except Exception:
103
+ return "?"
104
+
105
+
106
+ def build_and_send(cfg: SentinelConfig, store: StateStore):
107
+ hours = cfg.report_interval_hours
108
+ errors = store.get_recent_errors(hours)
109
+ fixes = store.get_recent_fixes(hours)
110
+ open_prs = store.get_open_prs()
111
+
112
+ stats = {
113
+ "errors": len(errors),
114
+ "applied": sum(1 for f in fixes if f["status"] == "applied"),
115
+ "failed": sum(1 for f in fixes if f["status"] == "failed"),
116
+ "skipped": sum(1 for f in fixes if f["status"] == "skipped"),
117
+ }
118
+
119
+ # Annotate PRs with human-readable age
120
+ for pr in open_prs:
121
+ pr["age"] = _age(pr.get("timestamp", ""))
122
+
123
+ html = _HTML_TEMPLATE.render(
124
+ generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
125
+ hours=hours,
126
+ stats=stats,
127
+ open_prs=open_prs,
128
+ recent_fixes=fixes,
129
+ )
130
+
131
+ if not cfg.report_recipients:
132
+ logger.warning("No REPORT_RECIPIENTS configured — skipping email")
133
+ return
134
+
135
+ _send_email(cfg, html, stats)
136
+ store.record_report(
137
+ recipient_count=len(cfg.report_recipients),
138
+ summary=stats,
139
+ )
140
+
141
+
142
+ def _send_email(cfg: SentinelConfig, html: str, stats: dict):
143
+ subject = (
144
+ f"[Sentinel] Health Report — "
145
+ f"{stats['applied']} fixed, {stats['failed']} failed, "
146
+ f"{stats['errors']} errors detected"
147
+ )
148
+
149
+ msg = MIMEMultipart("alternative")
150
+ msg["Subject"] = subject
151
+ msg["From"] = cfg.smtp_user
152
+ msg["To"] = ", ".join(cfg.report_recipients)
153
+ msg.attach(MIMEText(html, "html"))
154
+
155
+ if cfg.smtp_host.lower() == "ses":
156
+ _send_ses(cfg, msg)
157
+ else:
158
+ _send_smtp(cfg, msg)
159
+ logger.info("Health report sent to %d recipient(s)", len(cfg.report_recipients))
160
+
161
+
162
+ def _send_smtp(cfg: SentinelConfig, msg: MIMEMultipart):
163
+ with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as smtp:
164
+ smtp.ehlo()
165
+ smtp.starttls()
166
+ smtp.login(cfg.smtp_user, cfg.smtp_password)
167
+ smtp.sendmail(cfg.smtp_user, cfg.report_recipients, msg.as_string())
168
+
169
+
170
+ def _send_ses(cfg: SentinelConfig, msg: MIMEMultipart):
171
+ # AWS SES via SMTP endpoint — same as SMTP with different host
172
+ # Set SMTP_HOST=email-smtp.us-east-1.amazonaws.com and use SES SMTP credentials
173
+ _send_smtp(cfg, msg)
@@ -0,0 +1,164 @@
1
+ """
2
+ state_store.py — SQLite-backed persistence for errors, fixes, and reports.
3
+ """
4
+
5
+ import json
6
+ import sqlite3
7
+ import logging
8
+ from contextlib import contextmanager
9
+ from datetime import datetime, timezone
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _now() -> str:
15
+ return datetime.now(timezone.utc).isoformat()
16
+
17
+
18
+ class StateStore:
19
+ def __init__(self, db_path: str = "./sentinel.db"):
20
+ self.db_path = db_path
21
+ self._init_db()
22
+
23
+ @contextmanager
24
+ def _conn(self):
25
+ conn = sqlite3.connect(self.db_path)
26
+ conn.row_factory = sqlite3.Row
27
+ try:
28
+ yield conn
29
+ conn.commit()
30
+ finally:
31
+ conn.close()
32
+
33
+ def _init_db(self):
34
+ with self._conn() as conn:
35
+ conn.executescript("""
36
+ CREATE TABLE IF NOT EXISTS errors (
37
+ fingerprint TEXT PRIMARY KEY,
38
+ first_seen TEXT NOT NULL,
39
+ last_seen TEXT NOT NULL,
40
+ count INTEGER NOT NULL DEFAULT 1,
41
+ source TEXT,
42
+ message TEXT
43
+ );
44
+
45
+ CREATE TABLE IF NOT EXISTS fixes (
46
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
47
+ fingerprint TEXT NOT NULL,
48
+ status TEXT NOT NULL, -- pending|applied|failed|skipped
49
+ patch_path TEXT,
50
+ commit_hash TEXT,
51
+ branch TEXT,
52
+ pr_url TEXT,
53
+ repo_name TEXT,
54
+ timestamp TEXT NOT NULL
55
+ );
56
+
57
+ CREATE TABLE IF NOT EXISTS reports (
58
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
59
+ sent_at TEXT NOT NULL,
60
+ recipient_count INTEGER NOT NULL DEFAULT 0,
61
+ summary_json TEXT
62
+ );
63
+ """)
64
+ logger.debug("StateStore initialised at %s", self.db_path)
65
+
66
+ # ── Errors ────────────────────────────────────────────────────────────────
67
+
68
+ def seen(self, fingerprint: str) -> bool:
69
+ with self._conn() as conn:
70
+ row = conn.execute(
71
+ "SELECT fingerprint FROM errors WHERE fingerprint = ?", (fingerprint,)
72
+ ).fetchone()
73
+ return row is not None
74
+
75
+ def record_error(self, fingerprint: str, source: str, message: str):
76
+ now = _now()
77
+ with self._conn() as conn:
78
+ existing = conn.execute(
79
+ "SELECT count FROM errors WHERE fingerprint = ?", (fingerprint,)
80
+ ).fetchone()
81
+ if existing:
82
+ conn.execute(
83
+ "UPDATE errors SET last_seen=?, count=count+1 WHERE fingerprint=?",
84
+ (now, fingerprint),
85
+ )
86
+ else:
87
+ conn.execute(
88
+ "INSERT INTO errors (fingerprint, first_seen, last_seen, count, source, message) "
89
+ "VALUES (?, ?, ?, 1, ?, ?)",
90
+ (fingerprint, now, now, source, message),
91
+ )
92
+
93
+ def get_recent_errors(self, hours: int = 6) -> list[dict]:
94
+ with self._conn() as conn:
95
+ rows = conn.execute(
96
+ "SELECT * FROM errors WHERE last_seen >= datetime('now', ? || ' hours') "
97
+ "ORDER BY last_seen DESC",
98
+ (f"-{hours}",),
99
+ ).fetchall()
100
+ return [dict(r) for r in rows]
101
+
102
+ # ── Fixes ─────────────────────────────────────────────────────────────────
103
+
104
+ def record_fix(
105
+ self,
106
+ fingerprint: str,
107
+ status: str,
108
+ patch_path: str = "",
109
+ commit_hash: str = "",
110
+ branch: str = "",
111
+ pr_url: str = "",
112
+ repo_name: str = "",
113
+ ):
114
+ with self._conn() as conn:
115
+ conn.execute(
116
+ "INSERT INTO fixes (fingerprint, status, patch_path, commit_hash, branch, pr_url, repo_name, timestamp) "
117
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
118
+ (fingerprint, status, patch_path, commit_hash, branch, pr_url, repo_name, _now()),
119
+ )
120
+
121
+ def get_open_prs(self) -> list[dict]:
122
+ """Returns fixes pushed as PRs that have not yet been merged (no commit_hash on main)."""
123
+ with self._conn() as conn:
124
+ rows = conn.execute(
125
+ "SELECT * FROM fixes WHERE status='pending' AND pr_url != '' "
126
+ "ORDER BY timestamp DESC"
127
+ ).fetchall()
128
+ return [dict(r) for r in rows]
129
+
130
+ def get_recent_fixes(self, hours: int = 6) -> list[dict]:
131
+ with self._conn() as conn:
132
+ rows = conn.execute(
133
+ "SELECT * FROM fixes WHERE timestamp >= datetime('now', ? || ' hours') "
134
+ "ORDER BY timestamp DESC",
135
+ (f"-{hours}",),
136
+ ).fetchall()
137
+ return [dict(r) for r in rows]
138
+
139
+ def fix_attempted_recently(self, fingerprint: str, hours: int = 24) -> bool:
140
+ with self._conn() as conn:
141
+ row = conn.execute(
142
+ "SELECT id FROM fixes WHERE fingerprint=? "
143
+ "AND timestamp >= datetime('now', ? || ' hours')",
144
+ (fingerprint, f"-{hours}"),
145
+ ).fetchone()
146
+ return row is not None
147
+
148
+ # ── Reports ───────────────────────────────────────────────────────────────
149
+
150
+ def record_report(self, recipient_count: int, summary: dict):
151
+ with self._conn() as conn:
152
+ conn.execute(
153
+ "INSERT INTO reports (sent_at, recipient_count, summary_json) VALUES (?, ?, ?)",
154
+ (_now(), recipient_count, json.dumps(summary)),
155
+ )
156
+
157
+ def last_report_time(self) -> datetime | None:
158
+ with self._conn() as conn:
159
+ row = conn.execute(
160
+ "SELECT sent_at FROM reports ORDER BY sent_at DESC LIMIT 1"
161
+ ).fetchone()
162
+ if row:
163
+ return datetime.fromisoformat(row["sent_at"])
164
+ return None
@@ -0,0 +1,47 @@
1
+ # log-configs/_example.properties
2
+ #
3
+ # One file per log stream (SSH server or Cloudflare worker).
4
+ # The filename stem must match the corresponding repo-configs/<stem>.properties
5
+ # so Sentinel knows which repository to fix errors from this log source.
6
+ #
7
+ # Copy this file to e.g. "elprint-salescore.properties" and fill in the values.
8
+ #
9
+ # ── Source type ───────────────────────────────────────────────────────────────
10
+
11
+ # ssh | cloudflare
12
+ SOURCE_TYPE=ssh
13
+
14
+ # ── SSH source (SOURCE_TYPE=ssh) ──────────────────────────────────────────────
15
+
16
+ # Path to the SSH private key (.pem) used to connect to the remote hosts
17
+ KEY=/home/<user>/.ssh/<key>.pem
18
+
19
+ # Comma-separated list of hostnames or user@host entries.
20
+ # Hosts without a user@ prefix default to ec2-user@<host>
21
+ HOSTS=ec2-xx-xx-xx-xx.eu-north-1.compute.amazonaws.com, ec2-xx-xx-xx-xx.eu-north-1.compute.amazonaws.com
22
+
23
+ # Comma-separated list of log file paths relative to /home/<REMOTE_SERVICE_USER>/
24
+ LOGS=logs/AppService.log, logs/alarm.log, logs/warning.log
25
+
26
+ # The Linux user owning the log files on the remote host (used to build the path)
27
+ REMOTE_SERVICE_USER=MyServiceUser
28
+
29
+ # Lines to fetch (tail -n N). Takes precedence over HEAD if both set.
30
+ TAIL=500
31
+
32
+ # Lines to fetch from the top instead (head -n N). Only used if TAIL is not set.
33
+ # HEAD=100
34
+
35
+ # Keep only lines matching this regex (grep -E)
36
+ GREP_FILTER=WARN|ERROR
37
+
38
+ # Drop lines matching this regex (grep -iv)
39
+ GREP_EXCLUDE=SSLTool|CommandValidate|hystrix
40
+
41
+ # ── Cloudflare source (SOURCE_TYPE=cloudflare) ────────────────────────────────
42
+
43
+ # Full URL of the Cloudflare Worker log endpoint
44
+ # CF_URL=https://logs.<worker>.workers.dev/<service>
45
+
46
+ # Bearer token for the Cloudflare Worker
47
+ # CF_TOKEN=<bearer-token>
@@ -0,0 +1,37 @@
1
+ # repo-configs/_example.properties
2
+ #
3
+ # One file per GitHub repository that this Sentinel instance manages.
4
+ # The filename stem (e.g. "elprint-salescore") is used as the repo identifier.
5
+ # It must match the corresponding log-configs/<stem>.properties file for
6
+ # log-to-repo linking to work.
7
+ #
8
+ # Copy this file to e.g. "elprint-salescore.properties" and fill in the values.
9
+ #
10
+ # ── Required ──────────────────────────────────────────────────────────────────
11
+
12
+ # SSH clone URL of the GitHub repository
13
+ REPO_URL=git@github.com:<org>/<repo>.git
14
+
15
+ # Absolute path where Sentinel will clone/manage this repo on the local machine
16
+ LOCAL_PATH=/home/<user>/sentinel/repos/<repo-name>
17
+
18
+ # Branch to pull from and push fixes to
19
+ BRANCH=main
20
+
21
+ # false → Sentinel opens a GitHub PR for admin review (recommended)
22
+ # true → Sentinel pushes directly to BRANCH and triggers CI/CD
23
+ AUTO_PUBLISH=false
24
+
25
+ # ── CI/CD (optional) ──────────────────────────────────────────────────────────
26
+ # Leave blank if this repo has no deploy pipeline (e.g. shared libraries)
27
+
28
+ # jenkins | github_actions | (blank)
29
+ CICD_TYPE=jenkins
30
+
31
+ # Full URL of the Jenkins job or GitHub Actions workflow
32
+ # Jenkins: https://jenkins.example.com/job/<job-name>
33
+ # GitHub Actions: https://github.com/<org>/<repo> (dispatch event used)
34
+ CICD_JOB_URL=https://jenkins.example.com/job/<job-name>
35
+
36
+ # Jenkins API token or GitHub PAT with workflow scope
37
+ CICD_TOKEN=<token>
@@ -0,0 +1,31 @@
1
+ # Sentinel master config
2
+
3
+ # Schedule
4
+ POLL_INTERVAL_SECONDS=120
5
+
6
+ # Email reporting
7
+ SMTP_HOST=smtp.gmail.com
8
+ SMTP_PORT=587
9
+ SMTP_USER=sentinel@yourdomain.com
10
+ SMTP_PASSWORD=<app-password>
11
+ REPORT_RECIPIENTS=huy@yourdomain.com
12
+ REPORT_INTERVAL_HOURS=1
13
+
14
+ # State DB
15
+ STATE_DB=./sentinel.db
16
+
17
+ # Workspace
18
+ WORKSPACE_DIR=./workspace
19
+
20
+ # Claude Code binary path
21
+ CLAUDE_CODE_BIN=claude
22
+
23
+ # GitHub token (required for opening PRs when AUTO_PUBLISH=false)
24
+ GITHUB_TOKEN=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
25
+
26
+ # Fix confidence threshold (0.0 - 1.0); fixes below this are skipped
27
+ FIX_CONFIDENCE_THRESHOLD=0.7
28
+
29
+ # Rolling log retention window — fetched logs older than this are pruned
30
+ # Claude Code reads the full window for context when generating fixes
31
+ LOG_RETENTION_HOURS=48