@misterhuydo/sentinel 1.0.5 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,6 +51,7 @@ class SentinelConfig:
51
51
  fix_confidence_threshold: float = 0.7
52
52
  log_retention_hours: int = 48
53
53
  anthropic_api_key: str = ""
54
+ marker_confirm_hours: int = 24 # quiet period before confirming a fix
54
55
 
55
56
 
56
57
  @dataclass
@@ -104,11 +105,21 @@ class ConfigLoader:
104
105
  )
105
106
 
106
107
  def _load_sentinel(self):
108
+ # Load workspace-level config first (~/sentinel/sentinel.properties),
109
+ # then overlay per-project config so project values win.
110
+ d: dict[str, str] = {}
111
+ workspace_props = self.config_dir.parent.parent / "sentinel.properties"
112
+ if workspace_props.exists():
113
+ d.update(_parse_properties(str(workspace_props)))
114
+ logger.debug("Loaded workspace config from %s", workspace_props)
115
+
107
116
  path = self.config_dir / "sentinel.properties"
108
117
  if not path.exists():
109
- logger.warning("sentinel.properties not found at %s", path)
110
- return
111
- d = _parse_properties(str(path))
118
+ if not d:
119
+ logger.warning("sentinel.properties not found at %s", path)
120
+ else:
121
+ d.update(_parse_properties(str(path)))
122
+
112
123
  c = SentinelConfig()
113
124
  c.poll_interval_seconds = int(d.get("POLL_INTERVAL_SECONDS", 120))
114
125
  c.smtp_host = d.get("SMTP_HOST", "")
@@ -125,6 +136,7 @@ class ConfigLoader:
125
136
  c.fix_confidence_threshold = float(d.get("FIX_CONFIDENCE_THRESHOLD", 0.7))
126
137
  c.log_retention_hours = int(d.get("LOG_RETENTION_HOURS", 48))
127
138
  c.anthropic_api_key = d.get("ANTHROPIC_API_KEY", "")
139
+ c.marker_confirm_hours = int(d.get("MARKER_CONFIRM_HOURS", 24))
128
140
  self.sentinel = c
129
141
 
130
142
  def _load_log_sources(self):
@@ -26,36 +26,74 @@ _DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
26
26
  _DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
27
27
 
28
28
 
29
- def _build_prompt(event: ErrorEvent, repo: RepoConfig, log_file: Path) -> str:
30
- return textwrap.dedent(f"""\
31
- You are fixing a production bug in the repository at {repo.local_path}.
32
- Repository: {repo.repo_name}
33
-
34
- LOG FILE: {log_file}
35
- Read this file first. It contains the last 48h of logs from {event.source} —
36
- use it to understand the frequency, surrounding context, and any warnings
37
- that preceded this error.
38
-
39
- ERROR fingerprint to fix (from {event.source}):
40
- {event.full_text()}
41
-
42
- Task:
43
- 1. Read the log file above to understand what led up to this error.
44
- 2. Use your available tools to explore the codebase and identify the root cause.
45
- 3. Output ONLY a unified diff patch (git diff format) fixing the issue.
46
- 4. Do not explain. Output only the patch.
47
- 5. If you cannot determine a safe fix, output: SKIP: <reason>
48
- """)
49
-
29
+ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
30
+ if log_file and log_file.exists():
31
+ ctx = (
32
+ "LOG FILE: " + str(log_file) + "
33
+ "
34
+ "Read this file first -- it contains the last 48h of logs from "
35
+ + event.source + ".
36
+ "
37
+ "Use it to understand frequency, context, and preceding warnings."
38
+ )
39
+ step1 = "Read the log file above to understand what led up to this error."
40
+ else:
41
+ ctx = (
42
+ "SOURCE: " + event.source + "
43
+ "
44
+ "No rolling log file available. The full issue description is below."
45
+ )
46
+ step1 = "Use the issue description above as your primary context."
47
+
48
+ marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
49
+ marker_instruction = "
50
+ ".join([
51
+ "For EVERY method and constructor you modify, add this as the FIRST executable line:",
52
+ f" Java/Kotlin : log.info("{marker_label}");",
53
+ f" Python : logger.info("{marker_label}")",
54
+ f" Node.js : logger.info("{marker_label}")",
55
+ "Use the logger already present in the file. Do not add new imports.",
56
+ "This applies to ALL modified methods and constructors without exception.",
57
+ ])
50
58
 
51
- def _extract_patch(output: str) -> str | None:
52
- m = _DIFF_BLOCK.search(output)
53
- if m:
54
- return m.group(1).strip()
55
- if _DIFF_HEADER.search(output):
56
- return output.strip()
57
- return None
59
+ cleanup = ""
60
+ if stale_markers:
61
+ marker_list = "
62
+ ".join(f" - {m}" for m in stale_markers)
63
+ cleanup = (
64
+ "CLEANUP (do this first, before the fix):
65
+ "
66
+ "Remove any log lines containing these stale Sentinel markers from the codebase:
67
+ "
68
+ + marker_list + "
69
+ "
70
+ "Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'
71
+ "
72
+ )
58
73
 
74
+ lines_out = [
75
+ f"You are fixing a production bug in the repository at {repo.local_path}.",
76
+ f"Repository: {repo.repo_name}",
77
+ "",
78
+ ]
79
+ if cleanup:
80
+ lines_out += [cleanup, ""]
81
+ lines_out += [
82
+ ctx,
83
+ "",
84
+ f"ISSUE TO FIX (from {event.source}):",
85
+ event.full_text(),
86
+ "",
87
+ "Task:",
88
+ f"1. {step1}",
89
+ "2. Use your available tools to explore the codebase and identify the root cause.",
90
+ f"3. {marker_instruction}",
91
+ "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
92
+ "5. Do not explain. Output only the patch.",
93
+ "6. If you cannot determine a safe fix, output: SKIP: <reason>",
94
+ ]
95
+ return "
96
+ ".join(lines_out)
59
97
 
60
98
  def _validate_patch(patch: str) -> tuple[bool, str]:
61
99
  files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
@@ -83,7 +121,10 @@ def generate_fix(
83
121
  (status, patch_path)
84
122
  status: "patch" | "skip" | "error"
85
123
  """
124
+ # Issues have source like "issues/filename" — no rolling log file exists
86
125
  log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
126
+ if not log_file.exists():
127
+ log_file = None
87
128
  prompt = _build_prompt(event, repo, log_file)
88
129
 
89
130
  logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
@@ -98,30 +139,30 @@ def generate_fix(
98
139
  )
99
140
  except subprocess.TimeoutExpired:
100
141
  logger.error("Claude Code timed out for %s", event.fingerprint)
101
- return "error", None
142
+ return "error", None, ""
102
143
  except FileNotFoundError:
103
144
  logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
104
- return "error", None
145
+ return "error", None, ""
105
146
 
106
147
  output = (result.stdout or "") + (result.stderr or "")
107
148
 
108
149
  if output.strip().upper().startswith("SKIP:"):
109
150
  reason = output.strip()[5:].strip()
110
151
  logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
111
- return "skip", None
152
+ return "skip", None, ""
112
153
 
113
154
  patch = _extract_patch(output)
114
155
  if not patch:
115
156
  logger.warning("No patch found in Claude output for %s", event.fingerprint)
116
- return "error", None
157
+ return "error", None, ""
117
158
 
118
159
  ok, reason = _validate_patch(patch)
119
160
  if not ok:
120
161
  logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
121
- return "skip", None
162
+ return "skip", None, ""
122
163
 
123
164
  patches_dir.mkdir(parents=True, exist_ok=True)
124
165
  patch_path = patches_dir / f"{event.fingerprint}.diff"
125
166
  patch_path.write_text(patch, encoding="utf-8")
126
167
  logger.info("Patch written to %s", patch_path)
127
- return "patch", patch_path
168
+ return "patch", patch_path, marker
@@ -0,0 +1,146 @@
1
+ """
2
+ issue_watcher.py — Scan the issues/ directory for manually-submitted bug reports.
3
+
4
+ Admins drop plain-text or markdown files into <project>/issues/.
5
+ Each file is treated as a fix request. Processed files are archived to issues/.done/.
6
+
7
+ File format (TARGET_REPO header is optional):
8
+
9
+ TARGET_REPO: my-repo-name
10
+
11
+ Short summary of the problem (becomes the email subject line)
12
+
13
+ Any details: customer feedback, stack traces, screenshots text, etc.
14
+ If TARGET_REPO is omitted and only one repo is configured, it is used automatically.
15
+ """
16
+
17
+ import hashlib
18
+ import logging
19
+ import time
20
+ from dataclasses import dataclass, field
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ _TARGET_REPO_PREFIX = "TARGET_REPO:"
27
+
28
+
29
+ @dataclass
30
+ class IssueEvent:
31
+ """
32
+ A fix request sourced from the issues/ directory.
33
+ Implements the same interface as ErrorEvent so it can flow through
34
+ the same fix pipeline (_handle_error / generate_fix / git_manager).
35
+ """
36
+ source: str # "issues/<filename>" — shown in emails and logs
37
+ issue_file: Path # full path, used for archiving after processing
38
+ message: str # first non-blank body line — used as subject summary
39
+ body: str # full file content (the issue description)
40
+ target_repo: str # explicit TARGET_REPO value, or "" for auto-select
41
+ fingerprint: str = ""
42
+ severity: str = "ERROR"
43
+ timestamp: str = ""
44
+
45
+ # Compatibility fields matching ErrorEvent interface
46
+ level: str = "ERROR"
47
+ thread: str = ""
48
+ logger_name: str = ""
49
+ stack_trace: list[str] = field(default_factory=list)
50
+ log_file: str = ""
51
+
52
+ def __post_init__(self):
53
+ if not self.fingerprint:
54
+ raw = f"issue:{self.source}:{self.message[:200]}"
55
+ self.fingerprint = hashlib.sha1(raw.encode()).hexdigest()[:16]
56
+ if not self.timestamp:
57
+ self.timestamp = datetime.now(timezone.utc).isoformat()
58
+ if not self.stack_trace:
59
+ self.stack_trace = self.body.splitlines()
60
+
61
+ @property
62
+ def is_infra_issue(self) -> bool:
63
+ return False
64
+
65
+ def short_summary(self) -> str:
66
+ return self.message[:120]
67
+
68
+ def full_text(self) -> str:
69
+ return self.body
70
+
71
+
72
+ # Binary extensions Sentinel will never try to process
73
+ _BINARY_EXTENSIONS = {
74
+ ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z",
75
+ ".jar", ".war", ".ear", ".class",
76
+ ".exe", ".dll", ".so", ".bin", ".pyc",
77
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx",
78
+ ".mp3", ".mp4", ".avi", ".mov",
79
+ }
80
+
81
+
82
+ def scan_issues(project_dir: Path) -> list[IssueEvent]:
83
+ """
84
+ Return all pending issue files from <project_dir>/issues/.
85
+
86
+ Accepts text, markdown, logs, images, JSON — anything Claude can read.
87
+ Skips dotfiles, archives, and compiled binaries.
88
+ """
89
+ issues_dir = project_dir / "issues"
90
+ if not issues_dir.exists():
91
+ return []
92
+
93
+ events = []
94
+ for f in sorted(issues_dir.iterdir()):
95
+ if not f.is_file() or f.name.startswith("."):
96
+ continue
97
+ if f.suffix.lower() in _BINARY_EXTENSIONS:
98
+ logger.debug("Skipping binary issue file: %s", f.name)
99
+ continue
100
+
101
+ try:
102
+ content = f.read_text(encoding="utf-8", errors="replace").strip()
103
+ except OSError as e:
104
+ logger.error("Cannot read issue file %s: %s", f, e)
105
+ continue
106
+
107
+ if not content:
108
+ continue
109
+
110
+ lines = content.splitlines()
111
+ target_repo = ""
112
+ body_start = 0
113
+
114
+ # Parse optional TARGET_REPO: header (must be the first non-blank line)
115
+ for i, line in enumerate(lines):
116
+ stripped = line.strip()
117
+ if stripped.upper().startswith(_TARGET_REPO_PREFIX):
118
+ target_repo = stripped[len(_TARGET_REPO_PREFIX):].strip()
119
+ body_start = i + 1
120
+ elif stripped:
121
+ break
122
+
123
+ body = "\n".join(lines[body_start:]).strip() or content
124
+ message = next((l.strip() for l in lines[body_start:] if l.strip()), f.name)
125
+
126
+ events.append(IssueEvent(
127
+ source=f"issues/{f.name}",
128
+ issue_file=f,
129
+ message=message,
130
+ body=body,
131
+ target_repo=target_repo,
132
+ ))
133
+ logger.info("Found issue: %s (target_repo=%r)", f.name, target_repo or "auto")
134
+
135
+ return events
136
+
137
+
138
+ def mark_done(issue_file: Path) -> None:
139
+ """Archive a processed issue to issues/.done/ regardless of outcome."""
140
+ done_dir = issue_file.parent / ".done"
141
+ done_dir.mkdir(exist_ok=True)
142
+ dest = done_dir / issue_file.name
143
+ if dest.exists():
144
+ dest = done_dir / f"{issue_file.stem}-{int(time.time())}{issue_file.suffix}"
145
+ issue_file.rename(dest)
146
+ logger.info("Issue archived: %s -> .done/%s", issue_file.name, dest.name)
@@ -1,149 +1,175 @@
1
- """
2
- log_parser.py — Parse fetched log files into ErrorEvent objects.
3
-
4
- Handles Java-style logs (Spring Boot / Logback format):
5
- 2024-01-15 12:34:56.789 ERROR [thread] class.ClassName - Message
6
- followed by optional stack trace lines (^\tat ...)
7
- """
8
-
9
- import hashlib
10
- import re
11
- import logging
12
- from dataclasses import dataclass, field
13
- from pathlib import Path
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
- _LOG_HEADER = re.compile(
18
- r"^(?P<ts>\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2}[.,\d]*)\s+"
19
- r"(?P<level>CRITICAL|ERROR|WARN(?:ING)?|INFO|DEBUG)\s+"
20
- r"(?:\[(?P<thread>[^\]]*)\]\s+)?"
21
- r"(?P<logger>\S+)\s+-\s+"
22
- r"(?P<message>.+)$"
23
- )
24
-
25
- _STACK_LINE = re.compile(r"^\s+at |\s+\.\.\. \d+ more|^Caused by:")
26
-
27
- SEVERITY_MAP = {
28
- "CRITICAL": "CRITICAL",
29
- "ERROR": "ERROR",
30
- "WARN": "WARN",
31
- "WARNING": "WARN",
32
- "INFO": "INFO",
33
- "DEBUG": "DEBUG",
34
- }
35
-
36
- _CRITICAL_PATTERNS = re.compile(
37
- r"OutOfMemoryError|StackOverflowError|OOMKilled", re.IGNORECASE
38
- )
39
- _INFRA_PATTERNS = re.compile(
40
- r"ConnectException|TimeoutException|ConnectionRefused|SocketTimeout",
41
- re.IGNORECASE,
42
- )
43
-
44
-
45
- @dataclass
46
- class ErrorEvent:
47
- source: str # log-source name (e.g. "SSOLWA")
48
- log_file: str
49
- timestamp: str
50
- level: str # CRITICAL / ERROR / WARN
51
- thread: str
52
- logger_name: str
53
- message: str
54
- stack_trace: list[str] = field(default_factory=list)
55
- fingerprint: str = ""
56
-
57
- def __post_init__(self):
58
- if not self.fingerprint:
59
- self.fingerprint = _fingerprint(self.message, self.stack_trace)
60
-
61
- @property
62
- def severity(self) -> str:
63
- if _CRITICAL_PATTERNS.search(self.message) or _CRITICAL_PATTERNS.search(
64
- "\n".join(self.stack_trace)
65
- ):
66
- return "CRITICAL"
67
- return self.level
68
-
69
- @property
70
- def is_infra_issue(self) -> bool:
71
- return bool(_INFRA_PATTERNS.search(self.message))
72
-
73
- def short_summary(self) -> str:
74
- return self.message[:120]
75
-
76
- def full_text(self) -> str:
77
- lines = [f"{self.timestamp} {self.level} [{self.thread}] {self.logger_name} - {self.message}"]
78
- lines.extend(self.stack_trace)
79
- return "\n".join(lines)
80
-
81
-
82
- def _normalize_message(msg: str) -> str:
83
- msg = re.sub(r"0x[0-9a-fA-F]+", "0xADDR", msg)
84
- msg = re.sub(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", "UUID", msg)
85
- msg = re.sub(r"\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[.,\d]*\b", "TIMESTAMP", msg)
86
- msg = re.sub(r"\b\d+\b", "N", msg)
87
- return msg.strip()
88
-
89
-
90
- def _fingerprint(message: str, stack_trace: list[str]) -> str:
91
- top_frames = [l for l in stack_trace if l.strip().startswith("at ")][:3]
92
- raw = _normalize_message(message) + "\n" + "\n".join(top_frames)
93
- return hashlib.sha1(raw.encode()).hexdigest()[:16]
94
-
95
-
96
- def parse_log_file(path: Path, source_name: str) -> list[ErrorEvent]:
97
- """Parse a single log file and return all ERROR/WARN events."""
98
- events: list[ErrorEvent] = []
99
- current_header: re.Match | None = None
100
- current_stack: list[str] = []
101
-
102
- def flush():
103
- if current_header is None:
104
- return
105
- level = SEVERITY_MAP.get(current_header.group("level").upper(), "WARN")
106
- if level not in ("ERROR", "WARN", "CRITICAL"):
107
- return
108
- event = ErrorEvent(
109
- source=source_name,
110
- log_file=str(path),
111
- timestamp=current_header.group("ts"),
112
- level=level,
113
- thread=current_header.group("thread") or "",
114
- logger_name=current_header.group("logger"),
115
- message=current_header.group("message"),
116
- stack_trace=list(current_stack),
117
- )
118
- events.append(event)
119
-
120
- try:
121
- text = path.read_text(encoding="utf-8", errors="replace")
122
- except OSError as e:
123
- logger.error("Cannot read %s: %s", path, e)
124
- return []
125
-
126
- for line in text.splitlines():
127
- m = _LOG_HEADER.match(line)
128
- if m:
129
- flush()
130
- current_header = m
131
- current_stack = []
132
- elif current_header and _STACK_LINE.match(line):
133
- current_stack.append(line)
134
-
135
- flush()
136
- logger.debug("Parsed %s: %d error/warn events", path.name, len(events))
137
- return events
138
-
139
-
140
- def parse_all(
141
- fetched_files: dict[str, list[Path]],
142
- log_sources, # dict[str, LogSourceConfig]
143
- ) -> list[ErrorEvent]:
144
- """Parse all fetched log files across all sources."""
145
- all_events: list[ErrorEvent] = []
146
- for source_name, files in fetched_files.items():
147
- for f in files:
148
- all_events.extend(parse_log_file(f, source_name))
149
- return all_events
1
+ """
2
+ log_parser.py — Parse fetched log files into ErrorEvent objects.
3
+
4
+ Handles Java-style logs (Spring Boot / Logback format):
5
+ 2024-01-15 12:34:56.789 ERROR [thread] class.ClassName - Message
6
+ followed by optional stack trace lines (^\tat ...)
7
+ """
8
+
9
+ import hashlib
10
+ import re
11
+ import logging
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _LOG_HEADER = re.compile(
18
+ r"^(?P<ts>\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2}[.,\d]*)\s+"
19
+ r"(?P<level>CRITICAL|ERROR|WARN(?:ING)?|INFO|DEBUG)\s+"
20
+ r"(?:\[(?P<thread>[^\]]*)\]\s+)?"
21
+ r"(?P<logger>\S+)\s+-\s+"
22
+ r"(?P<message>.+)$"
23
+ )
24
+
25
+ _STACK_LINE = re.compile(r"^\s+at |\s+\.\.\. \d+ more|^Caused by:")
26
+
27
+ SEVERITY_MAP = {
28
+ "CRITICAL": "CRITICAL",
29
+ "ERROR": "ERROR",
30
+ "WARN": "WARN",
31
+ "WARNING": "WARN",
32
+ "INFO": "INFO",
33
+ "DEBUG": "DEBUG",
34
+ }
35
+
36
+ _CRITICAL_PATTERNS = re.compile(
37
+ r"OutOfMemoryError|StackOverflowError|OOMKilled", re.IGNORECASE
38
+ )
39
+ _INFRA_PATTERNS = re.compile(
40
+ r"ConnectException|TimeoutException|ConnectionRefused|SocketTimeout",
41
+ re.IGNORECASE,
42
+ )
43
+
44
+
45
+ @dataclass
46
+ class ErrorEvent:
47
+ source: str # log-source name (e.g. "SSOLWA")
48
+ log_file: str
49
+ timestamp: str
50
+ level: str # CRITICAL / ERROR / WARN
51
+ thread: str
52
+ logger_name: str
53
+ message: str
54
+ stack_trace: list[str] = field(default_factory=list)
55
+ fingerprint: str = ""
56
+
57
+ def __post_init__(self):
58
+ if not self.fingerprint:
59
+ self.fingerprint = _fingerprint(self.message, self.stack_trace)
60
+
61
+ @property
62
+ def severity(self) -> str:
63
+ if _CRITICAL_PATTERNS.search(self.message) or _CRITICAL_PATTERNS.search(
64
+ "\n".join(self.stack_trace)
65
+ ):
66
+ return "CRITICAL"
67
+ return self.level
68
+
69
+ @property
70
+ def is_infra_issue(self) -> bool:
71
+ return bool(_INFRA_PATTERNS.search(self.message))
72
+
73
+ def short_summary(self) -> str:
74
+ return self.message[:120]
75
+
76
+ def full_text(self) -> str:
77
+ lines = [f"{self.timestamp} {self.level} [{self.thread}] {self.logger_name} - {self.message}"]
78
+ lines.extend(self.stack_trace)
79
+ return "\n".join(lines)
80
+
81
+
82
+ def _normalize_message(msg: str) -> str:
83
+ msg = re.sub(r"0x[0-9a-fA-F]+", "0xADDR", msg)
84
+ msg = re.sub(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", "UUID", msg)
85
+ msg = re.sub(r"\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[.,\d]*\b", "TIMESTAMP", msg)
86
+ msg = re.sub(r"\b\d+\b", "N", msg)
87
+ return msg.strip()
88
+
89
+
90
+ def _fingerprint(message: str, stack_trace: list[str]) -> str:
91
+ top_frames = [l for l in stack_trace if l.strip().startswith("at ")][:3]
92
+ raw = _normalize_message(message) + "\n" + "\n".join(top_frames)
93
+ return hashlib.sha1(raw.encode()).hexdigest()[:16]
94
+
95
+
96
+ def parse_log_file(path: Path, source_name: str) -> list[ErrorEvent]:
97
+ """Parse a single log file and return all ERROR/WARN events."""
98
+ events: list[ErrorEvent] = []
99
+ current_header: re.Match | None = None
100
+ current_stack: list[str] = []
101
+
102
+ def flush():
103
+ if current_header is None:
104
+ return
105
+ level = SEVERITY_MAP.get(current_header.group("level").upper(), "WARN")
106
+ if level not in ("ERROR", "WARN", "CRITICAL"):
107
+ return
108
+ event = ErrorEvent(
109
+ source=source_name,
110
+ log_file=str(path),
111
+ timestamp=current_header.group("ts"),
112
+ level=level,
113
+ thread=current_header.group("thread") or "",
114
+ logger_name=current_header.group("logger"),
115
+ message=current_header.group("message"),
116
+ stack_trace=list(current_stack),
117
+ )
118
+ events.append(event)
119
+
120
+ try:
121
+ text = path.read_text(encoding="utf-8", errors="replace")
122
+ except OSError as e:
123
+ logger.error("Cannot read %s: %s", path, e)
124
+ return []
125
+
126
+ for line in text.splitlines():
127
+ m = _LOG_HEADER.match(line)
128
+ if m:
129
+ flush()
130
+ current_header = m
131
+ current_stack = []
132
+ elif current_header and _STACK_LINE.match(line):
133
+ current_stack.append(line)
134
+
135
+ flush()
136
+ logger.debug("Parsed %s: %d error/warn events", path.name, len(events))
137
+ return events
138
+
139
+
140
+ def parse_all(
141
+ fetched_files: dict[str, list[Path]],
142
+ log_sources, # dict[str, LogSourceConfig]
143
+ ) -> list[ErrorEvent]:
144
+ """Parse all fetched log files across all sources."""
145
+ all_events: list[ErrorEvent] = []
146
+ for source_name, files in fetched_files.items():
147
+ for f in files:
148
+ all_events.extend(parse_log_file(f, source_name))
149
+ return all_events
150
+
151
+
152
+ # -- Sentinel marker detection -------------------------------------------------
153
+
154
+ _SENTINEL_MARKER_RE = re.compile(r'SENTINEL:#([0-9a-f]{16})')
155
+
156
+
157
+ def scan_for_markers(path: Path) -> list[str]:
158
+ """
159
+ Scan a single log file for SENTINEL:#<fingerprint> markers injected by fix_engine.
160
+ Returns a list of full marker strings (e.g. ['SENTINEL:#abc123de45678901']).
161
+ """
162
+ try:
163
+ text = path.read_text(encoding='utf-8', errors='replace')
164
+ except OSError:
165
+ return []
166
+ return [f'SENTINEL:#{m}' for m in _SENTINEL_MARKER_RE.findall(text)]
167
+
168
+
169
+ def scan_all_for_markers(fetched_files: dict[str, list[Path]]) -> list[str]:
170
+ """Scan all fetched log files and return every SENTINEL marker found."""
171
+ markers: list[str] = []
172
+ for files in fetched_files.values():
173
+ for f in files:
174
+ markers.extend(scan_for_markers(f))
175
+ return markers