@misterhuydo/sentinel 1.0.6 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/sentinel.js +42 -39
- package/lib/add.js +385 -25
- package/lib/generate.js +14 -5
- package/lib/init.js +19 -2
- package/package.json +1 -1
- package/python/sentinel/__pycache__/issue_watcher.cpython-313.pyc +0 -0
- package/python/sentinel/config_loader.py +15 -3
- package/python/sentinel/fix_engine.py +49 -14
- package/python/sentinel/issue_watcher.py +146 -131
- package/python/sentinel/log_parser.py +175 -149
- package/python/sentinel/main.py +38 -4
- package/python/sentinel/reporter.py +86 -0
- package/python/sentinel/state_store.py +275 -164
- package/templates/sentinel.properties +20 -32
- package/templates/workspace-sentinel.properties +20 -0
|
@@ -1,149 +1,175 @@
|
|
|
1
|
-
"""
|
|
2
|
-
log_parser.py — Parse fetched log files into ErrorEvent objects.
|
|
3
|
-
|
|
4
|
-
Handles Java-style logs (Spring Boot / Logback format):
|
|
5
|
-
2024-01-15 12:34:56.789 ERROR [thread] class.ClassName - Message
|
|
6
|
-
followed by optional stack trace lines (^\tat ...)
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import hashlib
|
|
10
|
-
import re
|
|
11
|
-
import logging
|
|
12
|
-
from dataclasses import dataclass, field
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
|
-
|
|
17
|
-
_LOG_HEADER = re.compile(
|
|
18
|
-
r"^(?P<ts>\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2}[.,\d]*)\s+"
|
|
19
|
-
r"(?P<level>CRITICAL|ERROR|WARN(?:ING)?|INFO|DEBUG)\s+"
|
|
20
|
-
r"(?:\[(?P<thread>[^\]]*)\]\s+)?"
|
|
21
|
-
r"(?P<logger>\S+)\s+-\s+"
|
|
22
|
-
r"(?P<message>.+)$"
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
_STACK_LINE = re.compile(r"^\s+at |\s+\.\.\. \d+ more|^Caused by:")
|
|
26
|
-
|
|
27
|
-
SEVERITY_MAP = {
|
|
28
|
-
"CRITICAL": "CRITICAL",
|
|
29
|
-
"ERROR": "ERROR",
|
|
30
|
-
"WARN": "WARN",
|
|
31
|
-
"WARNING": "WARN",
|
|
32
|
-
"INFO": "INFO",
|
|
33
|
-
"DEBUG": "DEBUG",
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
_CRITICAL_PATTERNS = re.compile(
|
|
37
|
-
r"OutOfMemoryError|StackOverflowError|OOMKilled", re.IGNORECASE
|
|
38
|
-
)
|
|
39
|
-
_INFRA_PATTERNS = re.compile(
|
|
40
|
-
r"ConnectException|TimeoutException|ConnectionRefused|SocketTimeout",
|
|
41
|
-
re.IGNORECASE,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@dataclass
|
|
46
|
-
class ErrorEvent:
|
|
47
|
-
source: str # log-source name (e.g. "SSOLWA")
|
|
48
|
-
log_file: str
|
|
49
|
-
timestamp: str
|
|
50
|
-
level: str # CRITICAL / ERROR / WARN
|
|
51
|
-
thread: str
|
|
52
|
-
logger_name: str
|
|
53
|
-
message: str
|
|
54
|
-
stack_trace: list[str] = field(default_factory=list)
|
|
55
|
-
fingerprint: str = ""
|
|
56
|
-
|
|
57
|
-
def __post_init__(self):
|
|
58
|
-
if not self.fingerprint:
|
|
59
|
-
self.fingerprint = _fingerprint(self.message, self.stack_trace)
|
|
60
|
-
|
|
61
|
-
@property
|
|
62
|
-
def severity(self) -> str:
|
|
63
|
-
if _CRITICAL_PATTERNS.search(self.message) or _CRITICAL_PATTERNS.search(
|
|
64
|
-
"\n".join(self.stack_trace)
|
|
65
|
-
):
|
|
66
|
-
return "CRITICAL"
|
|
67
|
-
return self.level
|
|
68
|
-
|
|
69
|
-
@property
|
|
70
|
-
def is_infra_issue(self) -> bool:
|
|
71
|
-
return bool(_INFRA_PATTERNS.search(self.message))
|
|
72
|
-
|
|
73
|
-
def short_summary(self) -> str:
|
|
74
|
-
return self.message[:120]
|
|
75
|
-
|
|
76
|
-
def full_text(self) -> str:
|
|
77
|
-
lines = [f"{self.timestamp} {self.level} [{self.thread}] {self.logger_name} - {self.message}"]
|
|
78
|
-
lines.extend(self.stack_trace)
|
|
79
|
-
return "\n".join(lines)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _normalize_message(msg: str) -> str:
|
|
83
|
-
msg = re.sub(r"0x[0-9a-fA-F]+", "0xADDR", msg)
|
|
84
|
-
msg = re.sub(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", "UUID", msg)
|
|
85
|
-
msg = re.sub(r"\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[.,\d]*\b", "TIMESTAMP", msg)
|
|
86
|
-
msg = re.sub(r"\b\d+\b", "N", msg)
|
|
87
|
-
return msg.strip()
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def _fingerprint(message: str, stack_trace: list[str]) -> str:
|
|
91
|
-
top_frames = [l for l in stack_trace if l.strip().startswith("at ")][:3]
|
|
92
|
-
raw = _normalize_message(message) + "\n" + "\n".join(top_frames)
|
|
93
|
-
return hashlib.sha1(raw.encode()).hexdigest()[:16]
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def parse_log_file(path: Path, source_name: str) -> list[ErrorEvent]:
|
|
97
|
-
"""Parse a single log file and return all ERROR/WARN events."""
|
|
98
|
-
events: list[ErrorEvent] = []
|
|
99
|
-
current_header: re.Match | None = None
|
|
100
|
-
current_stack: list[str] = []
|
|
101
|
-
|
|
102
|
-
def flush():
|
|
103
|
-
if current_header is None:
|
|
104
|
-
return
|
|
105
|
-
level = SEVERITY_MAP.get(current_header.group("level").upper(), "WARN")
|
|
106
|
-
if level not in ("ERROR", "WARN", "CRITICAL"):
|
|
107
|
-
return
|
|
108
|
-
event = ErrorEvent(
|
|
109
|
-
source=source_name,
|
|
110
|
-
log_file=str(path),
|
|
111
|
-
timestamp=current_header.group("ts"),
|
|
112
|
-
level=level,
|
|
113
|
-
thread=current_header.group("thread") or "",
|
|
114
|
-
logger_name=current_header.group("logger"),
|
|
115
|
-
message=current_header.group("message"),
|
|
116
|
-
stack_trace=list(current_stack),
|
|
117
|
-
)
|
|
118
|
-
events.append(event)
|
|
119
|
-
|
|
120
|
-
try:
|
|
121
|
-
text = path.read_text(encoding="utf-8", errors="replace")
|
|
122
|
-
except OSError as e:
|
|
123
|
-
logger.error("Cannot read %s: %s", path, e)
|
|
124
|
-
return []
|
|
125
|
-
|
|
126
|
-
for line in text.splitlines():
|
|
127
|
-
m = _LOG_HEADER.match(line)
|
|
128
|
-
if m:
|
|
129
|
-
flush()
|
|
130
|
-
current_header = m
|
|
131
|
-
current_stack = []
|
|
132
|
-
elif current_header and _STACK_LINE.match(line):
|
|
133
|
-
current_stack.append(line)
|
|
134
|
-
|
|
135
|
-
flush()
|
|
136
|
-
logger.debug("Parsed %s: %d error/warn events", path.name, len(events))
|
|
137
|
-
return events
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def parse_all(
|
|
141
|
-
fetched_files: dict[str, list[Path]],
|
|
142
|
-
log_sources, # dict[str, LogSourceConfig]
|
|
143
|
-
) -> list[ErrorEvent]:
|
|
144
|
-
"""Parse all fetched log files across all sources."""
|
|
145
|
-
all_events: list[ErrorEvent] = []
|
|
146
|
-
for source_name, files in fetched_files.items():
|
|
147
|
-
for f in files:
|
|
148
|
-
all_events.extend(parse_log_file(f, source_name))
|
|
149
|
-
return all_events
|
|
1
|
+
"""
|
|
2
|
+
log_parser.py — Parse fetched log files into ErrorEvent objects.
|
|
3
|
+
|
|
4
|
+
Handles Java-style logs (Spring Boot / Logback format):
|
|
5
|
+
2024-01-15 12:34:56.789 ERROR [thread] class.ClassName - Message
|
|
6
|
+
followed by optional stack trace lines (^\tat ...)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import re
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_LOG_HEADER = re.compile(
|
|
18
|
+
r"^(?P<ts>\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2}[.,\d]*)\s+"
|
|
19
|
+
r"(?P<level>CRITICAL|ERROR|WARN(?:ING)?|INFO|DEBUG)\s+"
|
|
20
|
+
r"(?:\[(?P<thread>[^\]]*)\]\s+)?"
|
|
21
|
+
r"(?P<logger>\S+)\s+-\s+"
|
|
22
|
+
r"(?P<message>.+)$"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
_STACK_LINE = re.compile(r"^\s+at |\s+\.\.\. \d+ more|^Caused by:")
|
|
26
|
+
|
|
27
|
+
SEVERITY_MAP = {
|
|
28
|
+
"CRITICAL": "CRITICAL",
|
|
29
|
+
"ERROR": "ERROR",
|
|
30
|
+
"WARN": "WARN",
|
|
31
|
+
"WARNING": "WARN",
|
|
32
|
+
"INFO": "INFO",
|
|
33
|
+
"DEBUG": "DEBUG",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
_CRITICAL_PATTERNS = re.compile(
|
|
37
|
+
r"OutOfMemoryError|StackOverflowError|OOMKilled", re.IGNORECASE
|
|
38
|
+
)
|
|
39
|
+
_INFRA_PATTERNS = re.compile(
|
|
40
|
+
r"ConnectException|TimeoutException|ConnectionRefused|SocketTimeout",
|
|
41
|
+
re.IGNORECASE,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class ErrorEvent:
|
|
47
|
+
source: str # log-source name (e.g. "SSOLWA")
|
|
48
|
+
log_file: str
|
|
49
|
+
timestamp: str
|
|
50
|
+
level: str # CRITICAL / ERROR / WARN
|
|
51
|
+
thread: str
|
|
52
|
+
logger_name: str
|
|
53
|
+
message: str
|
|
54
|
+
stack_trace: list[str] = field(default_factory=list)
|
|
55
|
+
fingerprint: str = ""
|
|
56
|
+
|
|
57
|
+
def __post_init__(self):
|
|
58
|
+
if not self.fingerprint:
|
|
59
|
+
self.fingerprint = _fingerprint(self.message, self.stack_trace)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def severity(self) -> str:
|
|
63
|
+
if _CRITICAL_PATTERNS.search(self.message) or _CRITICAL_PATTERNS.search(
|
|
64
|
+
"\n".join(self.stack_trace)
|
|
65
|
+
):
|
|
66
|
+
return "CRITICAL"
|
|
67
|
+
return self.level
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_infra_issue(self) -> bool:
|
|
71
|
+
return bool(_INFRA_PATTERNS.search(self.message))
|
|
72
|
+
|
|
73
|
+
def short_summary(self) -> str:
|
|
74
|
+
return self.message[:120]
|
|
75
|
+
|
|
76
|
+
def full_text(self) -> str:
|
|
77
|
+
lines = [f"{self.timestamp} {self.level} [{self.thread}] {self.logger_name} - {self.message}"]
|
|
78
|
+
lines.extend(self.stack_trace)
|
|
79
|
+
return "\n".join(lines)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _normalize_message(msg: str) -> str:
|
|
83
|
+
msg = re.sub(r"0x[0-9a-fA-F]+", "0xADDR", msg)
|
|
84
|
+
msg = re.sub(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", "UUID", msg)
|
|
85
|
+
msg = re.sub(r"\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[.,\d]*\b", "TIMESTAMP", msg)
|
|
86
|
+
msg = re.sub(r"\b\d+\b", "N", msg)
|
|
87
|
+
return msg.strip()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _fingerprint(message: str, stack_trace: list[str]) -> str:
|
|
91
|
+
top_frames = [l for l in stack_trace if l.strip().startswith("at ")][:3]
|
|
92
|
+
raw = _normalize_message(message) + "\n" + "\n".join(top_frames)
|
|
93
|
+
return hashlib.sha1(raw.encode()).hexdigest()[:16]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_log_file(path: Path, source_name: str) -> list[ErrorEvent]:
|
|
97
|
+
"""Parse a single log file and return all ERROR/WARN events."""
|
|
98
|
+
events: list[ErrorEvent] = []
|
|
99
|
+
current_header: re.Match | None = None
|
|
100
|
+
current_stack: list[str] = []
|
|
101
|
+
|
|
102
|
+
def flush():
|
|
103
|
+
if current_header is None:
|
|
104
|
+
return
|
|
105
|
+
level = SEVERITY_MAP.get(current_header.group("level").upper(), "WARN")
|
|
106
|
+
if level not in ("ERROR", "WARN", "CRITICAL"):
|
|
107
|
+
return
|
|
108
|
+
event = ErrorEvent(
|
|
109
|
+
source=source_name,
|
|
110
|
+
log_file=str(path),
|
|
111
|
+
timestamp=current_header.group("ts"),
|
|
112
|
+
level=level,
|
|
113
|
+
thread=current_header.group("thread") or "",
|
|
114
|
+
logger_name=current_header.group("logger"),
|
|
115
|
+
message=current_header.group("message"),
|
|
116
|
+
stack_trace=list(current_stack),
|
|
117
|
+
)
|
|
118
|
+
events.append(event)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
122
|
+
except OSError as e:
|
|
123
|
+
logger.error("Cannot read %s: %s", path, e)
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
for line in text.splitlines():
|
|
127
|
+
m = _LOG_HEADER.match(line)
|
|
128
|
+
if m:
|
|
129
|
+
flush()
|
|
130
|
+
current_header = m
|
|
131
|
+
current_stack = []
|
|
132
|
+
elif current_header and _STACK_LINE.match(line):
|
|
133
|
+
current_stack.append(line)
|
|
134
|
+
|
|
135
|
+
flush()
|
|
136
|
+
logger.debug("Parsed %s: %d error/warn events", path.name, len(events))
|
|
137
|
+
return events
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def parse_all(
|
|
141
|
+
fetched_files: dict[str, list[Path]],
|
|
142
|
+
log_sources, # dict[str, LogSourceConfig]
|
|
143
|
+
) -> list[ErrorEvent]:
|
|
144
|
+
"""Parse all fetched log files across all sources."""
|
|
145
|
+
all_events: list[ErrorEvent] = []
|
|
146
|
+
for source_name, files in fetched_files.items():
|
|
147
|
+
for f in files:
|
|
148
|
+
all_events.extend(parse_log_file(f, source_name))
|
|
149
|
+
return all_events
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# -- Sentinel marker detection -------------------------------------------------
|
|
153
|
+
|
|
154
|
+
_SENTINEL_MARKER_RE = re.compile(r'SENTINEL:#([0-9a-f]{16})')
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def scan_for_markers(path: Path) -> list[str]:
|
|
158
|
+
"""
|
|
159
|
+
Scan a single log file for SENTINEL:#<fingerprint> markers injected by fix_engine.
|
|
160
|
+
Returns a list of full marker strings (e.g. ['SENTINEL:#abc123de45678901']).
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
text = path.read_text(encoding='utf-8', errors='replace')
|
|
164
|
+
except OSError:
|
|
165
|
+
return []
|
|
166
|
+
return [f'SENTINEL:#{m}' for m in _SENTINEL_MARKER_RE.findall(text)]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def scan_all_for_markers(fetched_files: dict[str, list[Path]]) -> list[str]:
|
|
170
|
+
"""Scan all fetched log files and return every SENTINEL marker found."""
|
|
171
|
+
markers: list[str] = []
|
|
172
|
+
for files in fetched_files.values():
|
|
173
|
+
for f in files:
|
|
174
|
+
markers.extend(scan_for_markers(f))
|
|
175
|
+
return markers
|
package/python/sentinel/main.py
CHANGED
|
@@ -21,10 +21,10 @@ from .fix_engine import generate_fix
|
|
|
21
21
|
from .git_manager import apply_and_commit, publish
|
|
22
22
|
from .cicd_trigger import trigger as cicd_trigger
|
|
23
23
|
from .log_fetcher import fetch_all
|
|
24
|
-
from .log_parser import parse_all, ErrorEvent
|
|
24
|
+
from .log_parser import parse_all, scan_all_for_markers, ErrorEvent
|
|
25
25
|
from .issue_watcher import scan_issues, mark_done, IssueEvent
|
|
26
26
|
from .repo_router import route
|
|
27
|
-
from .reporter import build_and_send, send_fix_notification, send_failure_notification
|
|
27
|
+
from .reporter import build_and_send, send_fix_notification, send_failure_notification, send_confirmed_notification, send_regression_notification
|
|
28
28
|
from .state_store import StateStore
|
|
29
29
|
|
|
30
30
|
logging.basicConfig(
|
|
@@ -81,7 +81,7 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
|
|
|
81
81
|
return
|
|
82
82
|
|
|
83
83
|
patches_dir = Path(sentinel.workspace_dir) / "patches"
|
|
84
|
-
status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
|
|
84
|
+
status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
|
|
85
85
|
|
|
86
86
|
if status != "patch" or patch_path is None:
|
|
87
87
|
outcome = "skipped" if status == "skip" else "failed"
|
|
@@ -116,6 +116,7 @@ async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: Stat
|
|
|
116
116
|
branch=branch,
|
|
117
117
|
pr_url=pr_url,
|
|
118
118
|
repo_name=repo.repo_name,
|
|
119
|
+
sentinel_marker=marker,
|
|
119
120
|
)
|
|
120
121
|
|
|
121
122
|
send_fix_notification(sentinel, {
|
|
@@ -172,7 +173,7 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
|
|
|
172
173
|
return # Leave the file so admin can add the header
|
|
173
174
|
|
|
174
175
|
patches_dir = Path(sentinel.workspace_dir) / "patches"
|
|
175
|
-
status, patch_path = generate_fix(event, repo, sentinel, patches_dir)
|
|
176
|
+
status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
|
|
176
177
|
|
|
177
178
|
if status != "patch" or patch_path is None:
|
|
178
179
|
store.record_fix(event.fingerprint, "skipped" if status == "skip" else "failed",
|
|
@@ -209,6 +210,7 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
|
|
|
209
210
|
branch=branch,
|
|
210
211
|
pr_url=pr_url,
|
|
211
212
|
repo_name=repo.repo_name,
|
|
213
|
+
sentinel_marker=marker,
|
|
212
214
|
)
|
|
213
215
|
send_fix_notification(sentinel, {
|
|
214
216
|
"source": event.source,
|
|
@@ -232,6 +234,8 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
|
|
|
232
234
|
|
|
233
235
|
async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
|
|
234
236
|
global _report_requested
|
|
237
|
+
events: list = []
|
|
238
|
+
fetched: dict = {}
|
|
235
239
|
|
|
236
240
|
# ── Log sources (optional) ────────────────────────────────────────────────
|
|
237
241
|
sources = list(cfg_loader.log_sources.values())
|
|
@@ -254,6 +258,36 @@ async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
|
|
|
254
258
|
return_exceptions=True,
|
|
255
259
|
)
|
|
256
260
|
|
|
261
|
+
# ── SENTINEL marker scanning (phase 1: record first seen in prod logs) ────
|
|
262
|
+
if sources and fetched:
|
|
263
|
+
for marker in set(scan_all_for_markers(fetched)):
|
|
264
|
+
fix = store.mark_marker_seen(marker)
|
|
265
|
+
if fix:
|
|
266
|
+
logger.info("Marker seen in production: %s repo=%s — quiet period started",
|
|
267
|
+
marker, fix.get("repo_name"))
|
|
268
|
+
|
|
269
|
+
# ── Regression detection (error recurred before quiet period elapsed) ──────
|
|
270
|
+
if sources:
|
|
271
|
+
for event in events:
|
|
272
|
+
pending = store.get_marker_seen_fix(event.fingerprint)
|
|
273
|
+
if pending:
|
|
274
|
+
logger.warning("Regression: %s recurred after marker seen", event.fingerprint)
|
|
275
|
+
store.mark_regressed(event.fingerprint)
|
|
276
|
+
send_regression_notification(cfg_loader.sentinel, pending, {
|
|
277
|
+
"source": event.source,
|
|
278
|
+
"message": event.message,
|
|
279
|
+
"body": event.full_text()[:500],
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
# ── Phase 2: confirm fixes whose quiet period has elapsed ────────────────
|
|
283
|
+
quiet_hours = cfg_loader.sentinel.marker_confirm_hours
|
|
284
|
+
for fix in store.get_fixes_pending_confirmation(quiet_hours):
|
|
285
|
+
confirmed = store.confirm_fix(fix["fingerprint"])
|
|
286
|
+
if confirmed:
|
|
287
|
+
logger.info("Fix confirmed after %dh quiet period: %s repo=%s",
|
|
288
|
+
quiet_hours, fix["fingerprint"], fix.get("repo_name"))
|
|
289
|
+
send_confirmed_notification(cfg_loader.sentinel, confirmed)
|
|
290
|
+
|
|
257
291
|
# ── Issues directory (always checked) ────────────────────────────────────
|
|
258
292
|
issues = scan_issues(Path("."))
|
|
259
293
|
if issues:
|
|
@@ -186,3 +186,89 @@ def send_failure_notification(cfg: SentinelConfig, details: dict):
|
|
|
186
186
|
_send_email(cfg, subject, html)
|
|
187
187
|
logger.info('Failure notification sent for %s', source)
|
|
188
188
|
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ---- Confirmed fix notification ----------------------------------------------
|
|
192
|
+
|
|
193
|
+
def send_confirmed_notification(cfg: SentinelConfig, fix: dict):
|
|
194
|
+
"""Notify admins that a fix has been confirmed running in production."""
|
|
195
|
+
if not cfg.mails:
|
|
196
|
+
return
|
|
197
|
+
repo_name = fix.get('repo_name', 'unknown')
|
|
198
|
+
fingerprint = fix.get('fingerprint', '')
|
|
199
|
+
marker = fix.get('sentinel_marker', '')
|
|
200
|
+
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
|
|
201
|
+
subject = f'[Sentinel] ✅ Fix confirmed in production: {repo_name} ({fingerprint[:8]})'
|
|
202
|
+
html = (
|
|
203
|
+
'<!DOCTYPE html><html><head><meta charset="utf-8">'
|
|
204
|
+
'<style>'
|
|
205
|
+
'body{font-family:Arial,sans-serif;font-size:14px;color:#222}'
|
|
206
|
+
'h2{color:#2e7d32}'
|
|
207
|
+
'table{border-collapse:collapse;width:100%;margin-bottom:16px}'
|
|
208
|
+
'th{background:#f1f3f4;text-align:left;padding:6px 10px}'
|
|
209
|
+
'td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}'
|
|
210
|
+
'.label{font-weight:bold;width:160px}'
|
|
211
|
+
'.mono{font-family:monospace;font-size:12px}'
|
|
212
|
+
'</style></head><body>'
|
|
213
|
+
'<h2>✅ Fix confirmed running in production</h2>'
|
|
214
|
+
f'<p><strong>{repo_name}</strong> · {ts}</p>'
|
|
215
|
+
'<table>'
|
|
216
|
+
f'<tr><td class="label">Fingerprint</td><td class="mono">{fingerprint}</td></tr>'
|
|
217
|
+
f'<tr><td class="label">Sentinel marker</td><td class="mono">{marker}</td></tr>'
|
|
218
|
+
f'<tr><td class="label">Commit</td><td class="mono">{fix.get("commit_hash", "")}</td></tr>'
|
|
219
|
+
f'<tr><td class="label">Branch</td><td class="mono">{fix.get("branch", "")}</td></tr>'
|
|
220
|
+
f'<tr><td class="label">Confirmed at</td><td>{fix.get("confirmed_at", ts)}</td></tr>'
|
|
221
|
+
'</table>'
|
|
222
|
+
'<p>The marker log line was detected in production logs, confirming the fix is live and the fixed code path executed.</p>'
|
|
223
|
+
'<hr><small>Sentinel — Autonomous DevOps Agent</small>'
|
|
224
|
+
'</body></html>'
|
|
225
|
+
)
|
|
226
|
+
_send_email(cfg, subject, html)
|
|
227
|
+
logger.info('Confirmed notification sent for %s', fingerprint)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ---- Regression notification ------------------------------------------------
|
|
231
|
+
|
|
232
|
+
def send_regression_notification(cfg: SentinelConfig, fix: dict, event: dict):
|
|
233
|
+
"""Notify admins that a confirmed fix did not resolve the issue."""
|
|
234
|
+
if not cfg.mails:
|
|
235
|
+
return
|
|
236
|
+
repo_name = fix.get('repo_name', 'unknown')
|
|
237
|
+
fingerprint = fix.get('fingerprint', '')
|
|
238
|
+
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
|
|
239
|
+
subject = f'[Sentinel] ⚠ Regression: fix did not resolve issue in {repo_name}'
|
|
240
|
+
html = (
|
|
241
|
+
'<!DOCTYPE html><html><head><meta charset="utf-8">'
|
|
242
|
+
'<style>'
|
|
243
|
+
'body{font-family:Arial,sans-serif;font-size:14px;color:#222}'
|
|
244
|
+
'h2{color:#c62828}'
|
|
245
|
+
'h3{color:#444;border-bottom:1px solid #ddd;padding-bottom:4px}'
|
|
246
|
+
'table{border-collapse:collapse;width:100%;margin-bottom:16px}'
|
|
247
|
+
'th{background:#f1f3f4;text-align:left;padding:6px 10px}'
|
|
248
|
+
'td{padding:5px 10px;border-bottom:1px solid #eee;vertical-align:top}'
|
|
249
|
+
'.label{font-weight:bold;width:160px}'
|
|
250
|
+
'.mono{font-family:monospace;font-size:12px}'
|
|
251
|
+
'pre{background:#f8f8f8;border:1px solid #ddd;padding:10px;font-size:12px;white-space:pre-wrap}'
|
|
252
|
+
'</style></head><body>'
|
|
253
|
+
'<h2>⚠ Regression detected — fix did not resolve the issue</h2>'
|
|
254
|
+
f'<p><strong>{repo_name}</strong> · {ts}</p>'
|
|
255
|
+
'<p>The original error recurred in production logs after the Sentinel fix was confirmed deployed.</p>'
|
|
256
|
+
'<h3>Fix Details</h3>'
|
|
257
|
+
'<table>'
|
|
258
|
+
f'<tr><td class="label">Fingerprint</td><td class="mono">{fingerprint}</td></tr>'
|
|
259
|
+
f'<tr><td class="label">Commit</td><td class="mono">{fix.get("commit_hash", "")}</td></tr>'
|
|
260
|
+
f'<tr><td class="label">Branch</td><td class="mono">{fix.get("branch", "")}</td></tr>'
|
|
261
|
+
f'<tr><td class="label">Confirmed at</td><td>{fix.get("confirmed_at", "")}</td></tr>'
|
|
262
|
+
'</table>'
|
|
263
|
+
'<h3>Recurring Error</h3>'
|
|
264
|
+
'<table>'
|
|
265
|
+
f'<tr><td class="label">Source</td><td class="mono">{event.get("source", "")}</td></tr>'
|
|
266
|
+
f'<tr><td class="label">Message</td><td class="mono">{event.get("message", "")}</td></tr>'
|
|
267
|
+
'</table>'
|
|
268
|
+
f'<pre>{event.get("body", "")}</pre>'
|
|
269
|
+
'<p>Sentinel will not attempt another automatic fix. Please investigate manually.</p>'
|
|
270
|
+
'<hr><small>Sentinel — Autonomous DevOps Agent</small>'
|
|
271
|
+
'</body></html>'
|
|
272
|
+
)
|
|
273
|
+
_send_email(cfg, subject, html)
|
|
274
|
+
logger.info('Regression notification sent for %s', fingerprint)
|