@misterhuydo/sentinel 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.cairn/.hint-lock CHANGED
@@ -1 +1 @@
1
- 2026-03-30T14:37:27.048Z
1
+ 2026-04-08T10:44:56.970Z
@@ -11,12 +11,6 @@
11
11
  "minifiedAt": 1774252437350.0059,
12
12
  "readCount": 1
13
13
  },
14
- "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\main.py": {
15
- "tempPath": "J:\\Projects\\Sentinel\\cli\\.cairn\\views\\5f5141_main.py",
16
- "state": "compressed",
17
- "minifiedAt": 1774873506610.8174,
18
- "readCount": 1
19
- },
20
14
  "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\cicd_trigger.py": {
21
15
  "tempPath": "J:\\Projects\\Sentinel\\cli\\.cairn\\views\\7802b9_cicd_trigger.py",
22
16
  "state": "compressed",
@@ -1,7 +1,19 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-03-30T14:46:10.664Z",
3
- "checkpoint_at": "2026-03-30T14:46:10.665Z",
4
- "active_files": [],
5
- "notes": [],
6
- "mtime_snapshot": {}
2
+ "message": "Auto-checkpoint at 2026-04-08T10:32:33.099Z",
3
+ "checkpoint_at": "2026-04-08T10:32:33.174Z",
4
+ "active_files": [
5
+ "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
6
+ "J:\\Projects\\Sentinel\\cli\\lib\\test.js",
7
+ "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\main.py",
8
+ "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\cicd_trigger.py"
9
+ ],
10
+ "notes": [
11
+ "[2026-04-08] git-snapshot: .cairn/session.json | 28 ++++-\n .claude/settings.local.json | 47 ++++++-\n cli/.cairn/.hint-lock | 2 +-\n cli/.cairn/minify-map.json | 14 ++-\n cli/.cairn/session.json | 4 +-\n cli/.cairn/views/62a614_bundle.js | 5 +-\n cli/lib/.cairn/minify-map.json | 6 +\n cli/lib/.cairn/views/fb78ac_upgrade.js | 37 +++++-\n cli/lib/.cairn/views/fc4a1a_add.js | 215 +++++++++++++++++++++++++--------\n 9 files changed, 295 insertions(+), 63 deletions(-) | status: M ../.cairn/session.json\n M ../.claude/settings.local.json\n M .cairn/.hint-lock\n M .cairn/minify-map.json\n M .cairn/session.json\n M .cairn/views/62a614_bundle.js\n M lib/.cairn/minify-map.json\n M lib/.cairn/views/fb78ac_upgrade.js\n M lib/.cairn/views/fc4a1a_add.js\n?? ../.cairn/.cairn-project\n?? ../.cairn/memory/\n?? ../.cairn/minify-map.json\n?? ../.cairn/views/\n?? .cairn/views/23edf4_sentinel_boss.py\n?? .cairn/views/5f5141_main.py\n?? .cairn/views/7802b9_cicd_trigger.py\n?? .cairn/views/ac3df4_repo_task_engine.py\n?? lib/.cairn/views/2a85cc_init.js\n?? lib/.cairn/views/e26996_slack-setup.js\n?? ../scripts/fix_ask_codebase_context.py\n?? ../scripts/fix_ask_codebase_stdin.py\n?? ../scripts/fix_chain_slack.py\n?? ../scripts/fix_fstring.py\n?? ../scripts/fix_knowledge_cache.py\n?? ../scripts/fix_knowledge_cache_staleness.py\n?? ../scripts/fix_merge_confirm.py\n?? ../scripts/fix_permission_messages.py\n?? ../scripts/fix_pr_check_head_detect.py\n?? ../scripts/fix_pr_msg_newlines.py\n?? ../scripts/fix_pr_tracking_boss.py\n?? ../scripts/fix_pr_tracking_db.py\n?? ../scripts/fix_pr_tracking_main.py\n?? ../scripts/fix_project_isolation.py\n?? ../scripts/fix_system_prompt.py\n?? ../scripts/fix_two_bugs.py\n?? ../scripts/patch_chain_release.py"
12
+ ],
13
+ "mtime_snapshot": {
14
+ "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js": 1774252515044.4768,
15
+ "J:\\Projects\\Sentinel\\cli\\lib\\test.js": 1774252437350.0059,
16
+ "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\main.py": 1775575790779.8606,
17
+ "J:\\Projects\\Sentinel\\cli\\python\\sentinel\\cicd_trigger.py": 1774523631399.9514
18
+ }
7
19
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.5.6",
3
+ "version": "1.5.7",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -516,6 +516,8 @@ When to act vs. when to ask:
516
516
  - Write/action tools (create_issue, trigger_poll, pull_repo, pause_sentinel, install_tool, merge_pr,
517
517
  retry_issue, cancel_issue, post_file) → act immediately for clear commands; confirm only when intent is ambiguous
518
518
  (e.g. unclear which project or repo to target).
519
+ - trigger_poll → trigger it, then IMMEDIATELY call get_status and report what is happening.
520
+ Never stop at "poll triggered" — the human wants to know the outcome, not that the trigger was sent.
519
521
  - Explaining a tool ("what does X do?") → explain naturally, then offer to run it if relevant.
520
522
  - NEVER gate investigation on user approval. If diagnosing a problem, run all relevant read tools
521
523
  first, then present findings. Asking "Want me to look?" wastes a round trip.
@@ -578,7 +580,9 @@ Sentinel is a 24/7 autonomous agent. Act on clear signals without asking permiss
578
580
  3. Populate `findings` with curated evidence — only when relevant and concise:
579
581
  - Summarise: which services, how often, key pattern, 1-3 example lines. Max 500 words.
580
582
  - Do NOT paste raw tool output.
581
- 4. After creating, tell them the issue was queued and Sentinel will pick it up on the next poll.
583
+ 4. After creating, immediately call get_status and report: how many items are now in the queue,
584
+ what is already in progress, and when the next poll will pick things up.
585
+ Tell them they will get an @mention in this channel when the fix is applied or blocked.
582
586
 
583
587
  Autonomous action policy — Sentinel acts, humans review outcomes:
584
588
  - Read/investigate tools → always act immediately, no confirmation needed.
@@ -588,6 +592,35 @@ Autonomous action policy — Sentinel acts, humans review outcomes:
588
592
  - pause_sentinel → confirm once (halts all monitoring).
589
593
  - Everything else → use judgment. When in doubt, act and report what you did.
590
594
 
595
+ PROACTIVE COMMUNICATION — CRITICAL RULES:
596
+ You are a push-first agent. The engineer should NEVER have to ask for a status update
597
+ on something you already know about or can find out.
598
+
599
+ BANNED phrases (never say these):
600
+ - "Want me to check again in a few minutes?"
601
+ - "Should I check on that?"
602
+ - "I'll keep an eye on it."
603
+ - "I'll monitor that for you."
604
+ - "Let me know if you'd like an update."
605
+ - Anything that puts the follow-up burden back on the human.
606
+
607
+ Instead:
608
+ - If checking is warranted → check now, report the result in the same message.
609
+ - If something is running async (fix, task, poll) → tell the human EXACTLY what automated
610
+ notification they will receive and when. Example: "The main loop will post here when the
611
+ fix is applied or fails — usually within 2–3 minutes."
612
+ - After trigger_poll → wait for it, then call get_status immediately and report findings.
613
+ Do not stop at "poll triggered" — that is not an answer, it is a non-answer.
614
+
615
+ The main poll loop sends proactive Slack messages automatically when:
616
+ - A fix is applied (commit + PR opened)
617
+ - A fix is blocked (test failure, patch too large, etc.)
618
+ - A fix is confirmed (no recurrence for MARKER_CONFIRM_HOURS)
619
+ - A repo task or dev task completes or fails
620
+ - A health check recovers
621
+ You do NOT need to promise to monitor — the system already does it. Tell users this explicitly
622
+ when it is relevant, then end with [DONE].
623
+
591
624
  When the engineer's request is fully handled, end your LAST message with the token: [DONE]
592
625
  IMPORTANT: Always write your actual reply text FIRST, then append [DONE] at the end. Example: "Hello! I'm Sentinel. [DONE]". Never output [DONE] as your only content.
593
626
  For greetings like "hello" or empty messages, introduce yourself briefly and offer help, then end with [DONE].
@@ -657,9 +690,11 @@ _TOOLS = [
657
690
  {
658
691
  "name": "get_status",
659
692
  "description": (
660
- "Get recent errors, fixes applied, fixes pending review, and open PRs. "
693
+ "Get recent errors, fixes applied, fixes pending review, open PRs, "
694
+ "and the live task queue (issues and repo tasks waiting to be picked up). "
661
695
  "Use for: 'what happened today?', 'any issues?', 'how are things?', "
662
- "'what are the open PRs?', 'did sentinel fix anything?'"
696
+ "'what are the open PRs?', 'did sentinel fix anything?', "
697
+ "'what is queued?', 'what is pending?', 'progress on X?'"
663
698
  ),
664
699
  "input_schema": {
665
700
  "type": "object",
@@ -1829,6 +1864,61 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
1829
1864
  }
1830
1865
  for e in errors[:8]
1831
1866
  ]
1867
+
1868
+ # ── Live queue scan ────────────────────────────────────────────────────
1869
+ # Show what is physically in the queue dirs, not just what is in the DB.
1870
+ from datetime import datetime as _dt, timezone as _tz
1871
+ _now = _dt.now(_tz.utc)
1872
+
1873
+ def _queue_entries(queue_dir: Path) -> list[dict]:
1874
+ """Return pending task/issue files with age and first-line preview."""
1875
+ entries = []
1876
+ if not queue_dir.exists():
1877
+ return entries
1878
+ for f in sorted(queue_dir.iterdir()):
1879
+ if not f.is_file() or f.name.startswith("."):
1880
+ continue
1881
+ try:
1882
+ lines = f.read_text(encoding="utf-8", errors="replace").splitlines()
1883
+ except OSError:
1884
+ continue
1885
+ # Parse RUN_AT if present (scheduled tasks held for the future)
1886
+ run_at_str = next(
1887
+ (l.split(":", 1)[1].strip() for l in lines
1888
+ if l.strip().upper().startswith("RUN_AT:")), None
1889
+ )
1890
+ run_at_dt = None
1891
+ if run_at_str:
1892
+ try:
1893
+ run_at_dt = _dt.fromisoformat(run_at_str).astimezone(_tz.utc)
1894
+ except ValueError:
1895
+ pass
1896
+ age_s = int((_now - _dt.fromtimestamp(f.stat().st_mtime, _tz.utc)).total_seconds())
1897
+ preview = next((l.strip() for l in lines if l.strip() and not l.strip().upper().startswith(
1898
+ ("REPO:", "TYPE:", "SUBMITTED_BY:", "SUBMITTED_AT:", "RUN_AT:", "NOTIFY:",
1899
+ "SOURCE:", "TARGET_REPO:", "FINGERPRINT:", "SUPPORT_URL:")
1900
+ )), f.name)
1901
+ entry = {
1902
+ "file": f.name,
1903
+ "age_seconds": age_s,
1904
+ "preview": preview[:120],
1905
+ }
1906
+ if run_at_dt:
1907
+ remaining = int((run_at_dt - _now).total_seconds())
1908
+ if remaining > 0:
1909
+ entry["scheduled_in_seconds"] = remaining
1910
+ entry["scheduled_at"] = run_at_dt.isoformat()
1911
+ entries.append(entry)
1912
+ return entries
1913
+
1914
+ _project_dirs = _find_project_dirs()
1915
+ _pd = _project_dirs[0] if _project_dirs else Path(".")
1916
+ queued_issues = _queue_entries(_pd / "issues")
1917
+ queued_repo_tasks = _queue_entries(_pd / "repo-tasks")
1918
+ # Scheduled tasks are repo-tasks with a future run_at
1919
+ scheduled_tasks = [t for t in queued_repo_tasks if "scheduled_in_seconds" in t]
1920
+ pending_repo_tasks = [t for t in queued_repo_tasks if "scheduled_in_seconds" not in t]
1921
+
1832
1922
  return json.dumps({
1833
1923
  "window_hours": hours,
1834
1924
  "errors_detected": len(errors),
@@ -1846,6 +1936,9 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
1846
1936
  for p in prs
1847
1937
  ],
1848
1938
  "sentinel_paused": Path("SENTINEL_PAUSE").exists(),
1939
+ "queued_issues": queued_issues,
1940
+ "queued_repo_tasks": pending_repo_tasks,
1941
+ "scheduled_tasks": scheduled_tasks,
1849
1942
  })
1850
1943
 
1851
1944
  if name == "check_auth_status":
@@ -1,1067 +0,0 @@
1
- from __future__ import annotations
2
- import argparse
3
- import asyncio
4
- import json
5
- import logging
6
- import os
7
- import re
8
- import shutil
9
- import signal
10
- import subprocess
11
- import sys
12
- from datetime import datetime, timezone
13
- from pathlib import Path
14
- from .cairn_client import ensure_installed as cairn_installed, index_repo
15
- from .config_loader import ConfigLoader, SentinelConfig
16
- from .fix_engine import generate_fix
17
- from .git_manager import apply_and_commit, publish, _git_env, MissingToolError, poll_open_prs
18
- from .cicd_trigger import trigger as cicd_trigger
19
- from .log_fetcher import fetch_all
20
- from .log_parser import parse_all, scan_all_for_markers, ErrorEvent
21
- from .issue_watcher import scan_issues, mark_done, purge_old_issues, IssueEvent
22
- from .repo_router import route
23
- from .reporter import build_and_send, send_fix_notification, send_failure_notification, send_confirmed_notification, send_regression_notification, send_startup_notification, send_upgrade_notification
24
- from .notify import notify_fix_blocked, notify_fix_applied, notify_missing_tool, notify_tool_installing, notify_cascade_started, notify_cascade_result
25
- from .health_checker import evaluate_repos
26
- from .state_store import StateStore
27
- logging.basicConfig(
28
- level=logging.INFO,
29
- format="%(asctime)s %(levelname)-7s %(name)s — %(message)s",
30
- handlers=[
31
- logging.StreamHandler(sys.stdout),
32
- ],
33
- )
34
- logger = logging.getLogger("sentinel")
35
- _report_requested = False
36
- def _on_sigusr1(*_):
37
- global _report_requested
38
- _report_requested = True
39
- logger.info("SIGUSR1 received — health report queued")
40
- def _register_signals():
41
- try:
42
- signal.signal(signal.SIGUSR1, _on_sigusr1)
43
- except (OSError, AttributeError):
44
- pass
45
- _SAFE_TOOLS: dict[str, dict] = {
46
- "mvn": {"pkg": "maven", "build_files": ["pom.xml"]},
47
- "npm": {"pkg": "npm", "build_files": ["package.json"]},
48
- "gradle": {"pkg": "gradle", "build_files": ["build.gradle", "build.gradle.kts"]},
49
- "pip3": {"pkg": "python3-pip", "build_files": ["requirements.txt", "pyproject.toml", "setup.py"]},
50
- "pip": {"pkg": "python3-pip", "build_files": ["requirements.txt", "pyproject.toml", "setup.py"]},
51
- "yarn": {"pkg": "yarn", "build_files": ["yarn.lock"]},
52
- "make": {"pkg": "make", "build_files": ["Makefile"]},
53
- }
54
- def _auto_install_if_safe(
55
- tool: str,
56
- repo_path: str,
57
- sentinel: SentinelConfig,
58
- repo_name: str,
59
- source: str,
60
- ) -> bool:
61
- from .notify import slack_alert
62
- spec = _SAFE_TOOLS.get(tool)
63
- if not spec:
64
- logger.info("Tool '%s' not in safe whitelist — skipping auto-install", tool)
65
- return False
66
- repo_p = Path(repo_path)
67
- def _build_file_exists(bf: str) -> bool:
68
- if (repo_p / bf).exists():
69
- return True
70
- return any(sub.is_dir() and (sub / bf).exists() for sub in repo_p.iterdir())
71
- if not any(_build_file_exists(bf) for bf in spec["build_files"]):
72
- logger.info(
73
- "Tool '%s' is whitelisted but no matching build file found in %s — skipping",
74
- tool, repo_path,
75
- )
76
- return False
77
- pkg = spec["pkg"]
78
- logger.info("Auto-installing '%s' (pkg: %s) required by %s", tool, pkg, repo_name)
79
- notify_tool_installing(sentinel, tool, repo_name, source)
80
- env = {**os.environ}
81
- api_key = sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
82
- if api_key:
83
- env["ANTHROPIC_API_KEY"] = api_key
84
- prompt = (
85
- f"Install '{pkg}' on this server so it can be used as a build tool. "
86
- f"Detect the OS and package manager (yum/dnf/apt), then run the appropriate install command. "
87
- f"After installing, verify by running '{tool} --version' or the equivalent. "
88
- f"Report the installed version. No explanations — just install and report."
89
- )
90
- try:
91
- result = subprocess.run(
92
- [sentinel.claude_code_bin, "--dangerously-skip-permissions", "--bare", "--print", prompt],
93
- capture_output=True, text=True, timeout=300, env=env,
94
- )
95
- output = ((result.stdout or "") + (result.stderr or "")).strip()
96
- success = result.returncode == 0
97
- logger.info("Auto-install '%s': %s\n%s", tool, "OK" if success else "FAILED", output[-500:])
98
- if success:
99
- slack_alert(
100
- sentinel.slack_bot_token,
101
- sentinel.slack_channel,
102
- f":white_check_mark: *`{tool}` installed* for *{repo_name}*\n"
103
- f"```{output[-300:]}```\nRetrying fix now...",
104
- )
105
- else:
106
- slack_alert(
107
- sentinel.slack_bot_token,
108
- sentinel.slack_channel,
109
- f":x: *Auto-install of `{tool}` failed* for *{repo_name}*\n"
110
- f"```{output[-300:]}```\nAdmin intervention required.",
111
- )
112
- return success
113
- except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
114
- logger.error("Auto-install of '%s' failed: %s", tool, exc)
115
- return False
116
- def _run_cascade(repo, sentinel, cfg_loader):
117
- from .dependency_manager import get_artifact_id, get_release_version, plan_cascade, execute_cascade
118
- artifact_id = get_artifact_id(repo.local_path)
119
- new_version = get_release_version(repo.local_path)
120
- if not artifact_id or not new_version:
121
- logger.warning("Cascade skipped for %s — could not read artifact/version from pom.xml", repo.repo_name)
122
- return
123
- cascade_plan = plan_cascade(repo.repo_name, cfg_loader.repos)
124
- target_repo_names = [d["repo"] for d in cascade_plan.get("dependents", [])]
125
- if not target_repo_names:
126
- logger.info("Cascade: no dependents found for %s", artifact_id)
127
- return
128
- logger.info("Cascade: updating %s in %d repo(s): %s", artifact_id, len(target_repo_names), target_repo_names)
129
- notify_cascade_started(sentinel, artifact_id, new_version, target_repo_names)
130
- results = execute_cascade(repo.repo_name, new_version, artifact_id, cfg_loader.repos, sentinel)
131
- notify_cascade_result(sentinel, artifact_id, new_version, results)
132
- ok = sum(1 for r in results if r.success)
133
- logger.info("Cascade complete: %d/%d repos updated for %s=%s", ok, len(results), artifact_id, new_version)
134
- async def _handle_error(event: ErrorEvent, cfg_loader: ConfigLoader, store: StateStore):
135
- sentinel = cfg_loader.sentinel
136
- repo = route(event, cfg_loader.repos)
137
- if not repo:
138
- return
139
- if Path("SENTINEL_PAUSE").exists():
140
- logger.info("SENTINEL_PAUSE present — fix activity halted")
141
- return
142
- if event.is_infra_issue:
143
- logger.info("Infra issue for %s — log only", event.fingerprint)
144
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
145
- return
146
- if event.severity == "CRITICAL" and repo.auto_publish:
147
- logger.warning("CRITICAL in auto-publish repo '%s' — flagging for human review", repo.repo_name)
148
- store.record_fix(event.fingerprint, "skipped", repo_name=repo.repo_name)
149
- return
150
- if store.fix_attempted_recently(event.fingerprint, hours=24):
151
- logger.debug("Fix already attempted recently for %s", event.fingerprint)
152
- return
153
- patches_dir = Path(sentinel.workspace_dir).resolve() / "patches"
154
- status, patch_path, marker = generate_fix(event, repo, sentinel, patches_dir, store)
155
- if status != "patch" or patch_path is None:
156
- outcome = "skipped" if status in ("skip", "needs_human") else "failed"
157
- store.record_fix(event.fingerprint, outcome, repo_name=repo.repo_name)
158
- if status == "needs_human":
159
- notify_fix_blocked(sentinel, event.source, event.message,
160
- reason=marker, repo_name=repo.repo_name,
161
- submitter_user_id="")
162
- else:
163
- send_failure_notification(sentinel, {
164
- "source": event.source,
165
- "message": event.message,
166
- "repo_name": repo.repo_name,
167
- "reason": f"Claude Code returned {status.upper()}",
168
- "body": event.full_text()[:500],
169
- })
170
- return
171
- try:
172
- commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
173
- except MissingToolError as e:
174
- logger.warning("Missing tool for %s: %s", event.source, e)
175
- if _auto_install_if_safe(e.tool, repo.local_path, sentinel, repo.repo_name, event.source):
176
- try:
177
- commit_status, commit_hash = apply_and_commit(event, patch_path, repo, sentinel)
178
- except MissingToolError as e2:
179
- logger.error("Still missing tool after auto-install: %s", e2)
180
- notify_missing_tool(sentinel, e2.tool, repo.repo_name, event.source, "")
181
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
182
- return
183
- else:
184
- notify_missing_tool(sentinel, e.tool, repo.repo_name, event.source, "")
185
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
186
- return
187
- if commit_status != "committed":
188
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
189
- send_failure_notification(sentinel, {
190
- "source": event.source,
191
- "message": event.message,
192
- "repo_name": repo.repo_name,
193
- "reason": "Patch was generated but commit/tests failed",
194
- "body": event.full_text()[:500],
195
- })
196
- return
197
- branch, pr_url = publish(event, repo, sentinel, commit_hash)
198
- store.record_fix(
199
- event.fingerprint,
200
- "applied" if repo.auto_publish else "pending",
201
- patch_path=str(patch_path),
202
- commit_hash=commit_hash,
203
- branch=branch,
204
- pr_url=pr_url,
205
- repo_name=repo.repo_name,
206
- sentinel_marker=marker,
207
- )
208
- send_fix_notification(sentinel, {
209
- "source": event.source,
210
- "severity": event.severity,
211
- "fingerprint": event.fingerprint,
212
- "first_seen": str(event.timestamp),
213
- "message": event.message,
214
- "stack_trace": getattr(event, "stack_trace", ""),
215
- "repo_name": repo.repo_name,
216
- "commit_hash": commit_hash,
217
- "branch": branch,
218
- "pr_url": pr_url,
219
- "auto_publish": repo.auto_publish,
220
- "files_changed": [],
221
- })
222
- if repo.auto_publish:
223
- ok = cicd_trigger(repo, store, event.fingerprint)
224
- if ok and repo.cicd_type.lower() in ("jenkins_release", "jenkins-release"):
225
- _run_cascade(repo, sentinel, cfg_loader)
226
- async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: StateStore) -> dict | None:
227
- sentinel = cfg_loader.sentinel
228
- if Path("SENTINEL_PAUSE").exists():
229
- logger.info("SENTINEL_PAUSE present -- fix activity halted")
230
- return
231
- if store.fix_attempted_recently(event.fingerprint, hours=24):
232
- logger.debug("Issue already processed recently: %s", event.source)
233
- mark_done(event.issue_file)
234
- return
235
- if event.target_repo:
236
- repo = cfg_loader.repos.get(event.target_repo)
237
- if not repo:
238
- logger.warning("TARGET_REPO %r not found in config -- leaving %s for admin",
239
- event.target_repo, event.source)
240
- return
241
- elif len(cfg_loader.repos) == 1:
242
- repo = next(iter(cfg_loader.repos.values()))
243
- else:
244
- logger.warning(
245
- "Cannot auto-route %s -- add 'TARGET_REPO: <repo>' as first line in the file",
246
- event.source,
247
- )
248
- return
249
- from .notify import slack_alert as _slack_alert, slack_thread_reply as _slack_reply
250
- _submitter = getattr(event, "submitter_user_id", "")
251
- _started_msg = (
252
- f":hammer: Working on *<@{_submitter}>*'s request — *{repo.repo_name}*\n"
253
- f"_{event.message[:120]}_"
254
- ) if _submitter else (
255
- f":hammer: Working on *{repo.repo_name}*\n_{event.message[:120]}_"
256
- )
257
- _thread_ts = _slack_alert(sentinel.slack_bot_token, sentinel.slack_channel, _started_msg)
258
- def _progress(msg: str) -> None:
259
- _slack_reply(sentinel.slack_bot_token, sentinel.slack_channel, _thread_ts, msg)
260
- try:
261
- patches_dir = Path(sentinel.workspace_dir).resolve() / "patches"
262
- _loop = asyncio.get_event_loop()
263
- _progress(":brain: Analyzing with Claude Code...")
264
- status, patch_path, marker = await _loop.run_in_executor(
265
- None, generate_fix, event, repo, sentinel, patches_dir, store, _progress
266
- )
267
- submitter_uid = getattr(event, "submitter_user_id", "")
268
- if status != "patch" or patch_path is None:
269
- store.record_fix(event.fingerprint, "skipped" if status in ("skip", "needs_human") else "failed",
270
- repo_name=repo.repo_name)
271
- raw_reason = marker if status == "needs_human" else f"Claude Code returned {status.upper()}"
272
- reason_text = _boss_qualify_dev_reason(raw_reason, sentinel) if status == "needs_human" else raw_reason
273
- _progress(f":x: Could not generate a safe fix — {reason_text[:120]}")
274
- notify_fix_blocked(sentinel, event.source, event.message,
275
- reason=reason_text, repo_name=repo.repo_name,
276
- submitter_user_id=submitter_uid)
277
- mark_done(event.issue_file)
278
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
279
- "status": "blocked", "summary": reason_text[:120], "pr_url": ""}
280
- _progress(f":mag: Patch generated — running tests (`{repo.repo_name}`)...")
281
- commit_status, commit_hash = await _loop.run_in_executor(
282
- None, apply_and_commit, event, patch_path, repo, sentinel
283
- )
284
- if commit_status != "committed":
285
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
286
- _progress(":x: Tests failed or patch couldn't apply — needs human review")
287
- notify_fix_blocked(sentinel, event.source, event.message,
288
- reason="Patch was generated but commit/tests failed",
289
- repo_name=repo.repo_name,
290
- submitter_user_id=submitter_uid)
291
- mark_done(event.issue_file)
292
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
293
- "status": "blocked", "summary": "Commit/tests failed", "pr_url": ""}
294
- _progress(f":white_check_mark: Tests passed — committed `{commit_hash[:8]}`, pushing to `{repo.branch}`...")
295
- branch, pr_url = publish(event, repo, sentinel, commit_hash)
296
- store.record_fix(
297
- event.fingerprint,
298
- "applied" if repo.auto_publish else "pending",
299
- patch_path=str(patch_path),
300
- commit_hash=commit_hash,
301
- branch=branch,
302
- pr_url=pr_url,
303
- repo_name=repo.repo_name,
304
- sentinel_marker=marker,
305
- )
306
- send_fix_notification(sentinel, {
307
- "source": event.source,
308
- "severity": "ERROR",
309
- "fingerprint": event.fingerprint,
310
- "first_seen": event.timestamp,
311
- "message": event.message,
312
- "stack_trace": event.body,
313
- "repo_name": repo.repo_name,
314
- "commit_hash": commit_hash,
315
- "branch": branch,
316
- "pr_url": pr_url,
317
- "auto_publish": repo.auto_publish,
318
- "files_changed": [],
319
- })
320
- if pr_url:
321
- _progress(f":arrow_right: <{pr_url}|PR opened> — awaiting review")
322
- notify_fix_applied(sentinel, event.source, event.message,
323
- repo_name=repo.repo_name, branch=branch, pr_url=pr_url,
324
- submitter_user_id=submitter_uid)
325
- mark_done(event.issue_file)
326
- if repo.auto_publish:
327
- ok = cicd_trigger(repo, store, event.fingerprint)
328
- if ok:
329
- _progress(f":rocket: Release triggered via {repo.cicd_type}")
330
- if ok and repo.cicd_type.lower() in ("jenkins_release", "jenkins-release"):
331
- _run_cascade(repo, sentinel, cfg_loader)
332
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
333
- "status": "done", "summary": event.message[:120], "pr_url": pr_url}
334
- except MissingToolError as e:
335
- logger.warning("Missing tool for %s: %s", event.source, e)
336
- submitter_uid = getattr(event, "submitter_user_id", "")
337
- _progress(f":wrench: `{e.tool}` not found — auto-installing...")
338
- installed = await _loop.run_in_executor(
339
- None, _auto_install_if_safe, e.tool, repo.local_path, sentinel, repo.repo_name, event.source
340
- )
341
- if not installed:
342
- _progress(f":x: `{e.tool}` is not a known safe tool — manual install required")
343
- notify_missing_tool(sentinel, e.tool, repo.repo_name, event.source, submitter_uid)
344
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
345
- mark_done(event.issue_file)
346
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
347
- "status": "blocked", "summary": f"Missing tool: {e.tool}", "pr_url": ""}
348
- _progress(f":white_check_mark: `{e.tool}` installed — retrying tests...")
349
- try:
350
- commit_status, commit_hash = await _loop.run_in_executor(
351
- None, apply_and_commit, event, patch_path, repo, sentinel
352
- )
353
- except MissingToolError as e2:
354
- logger.error("Still missing tool after auto-install: %s", e2)
355
- _progress(f":x: Still missing `{e2.tool}` after install — manual fix needed")
356
- notify_missing_tool(sentinel, e2.tool, repo.repo_name, event.source, submitter_uid)
357
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
358
- mark_done(event.issue_file)
359
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
360
- "status": "blocked", "summary": f"Missing tool: {e2.tool}", "pr_url": ""}
361
- if commit_status != "committed":
362
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
363
- _progress(":x: Tests still failing after tool install — needs human review")
364
- notify_fix_blocked(sentinel, event.source, event.message,
365
- reason="Patch was generated but commit/tests failed after tool install",
366
- repo_name=repo.repo_name, submitter_user_id=submitter_uid)
367
- mark_done(event.issue_file)
368
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
369
- "status": "blocked", "summary": "Commit/tests failed after tool install", "pr_url": ""}
370
- _progress(f":white_check_mark: Tests passed — committed `{commit_hash[:8]}`, pushing to `{repo.branch}`...")
371
- branch, pr_url = publish(event, repo, sentinel, commit_hash)
372
- store.record_fix(
373
- event.fingerprint,
374
- "applied" if repo.auto_publish else "pending",
375
- patch_path=str(patch_path), commit_hash=commit_hash,
376
- branch=branch, pr_url=pr_url, repo_name=repo.repo_name, sentinel_marker=marker,
377
- )
378
- send_fix_notification(sentinel, {
379
- "source": event.source, "severity": "ERROR",
380
- "fingerprint": event.fingerprint, "first_seen": event.timestamp,
381
- "message": event.message, "stack_trace": event.body,
382
- "repo_name": repo.repo_name, "commit_hash": commit_hash,
383
- "branch": branch, "pr_url": pr_url,
384
- "auto_publish": repo.auto_publish, "files_changed": [],
385
- })
386
- if pr_url:
387
- _progress(f":arrow_right: <{pr_url}|PR opened> — awaiting review")
388
- notify_fix_applied(sentinel, event.source, event.message,
389
- repo_name=repo.repo_name, branch=branch, pr_url=pr_url,
390
- submitter_user_id=submitter_uid)
391
- mark_done(event.issue_file)
392
- if repo.auto_publish:
393
- ok = cicd_trigger(repo, store, event.fingerprint)
394
- if ok:
395
- _progress(f":rocket: Release triggered via {repo.cicd_type}")
396
- if ok and repo.cicd_type.lower() in ("jenkins_release", "jenkins-release"):
397
- _run_cascade(repo, sentinel, cfg_loader)
398
- return {"submitter": submitter_uid, "repo_name": repo.repo_name,
399
- "status": "done", "summary": event.message[:120], "pr_url": pr_url}
400
- except Exception:
401
- logger.exception("Unexpected error processing issue %s — archiving to prevent retry loop", event.source)
402
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
403
- mark_done(event.issue_file)
404
- return {"submitter": getattr(event, "submitter_user_id", ""),
405
- "repo_name": repo.repo_name if repo else event.target_repo,
406
- "status": "blocked", "summary": "Unexpected error — check logs", "pr_url": ""}
407
- async def poll_cycle(cfg_loader: ConfigLoader, store: StateStore):
408
- global _report_requested
409
- events: list = []
410
- fetched: dict = {}
411
- sources = list(cfg_loader.log_sources.values())
412
- if sources:
413
- logger.info("Fetching logs from %d source(s)...", len(sources))
414
- fetched = await fetch_all(sources, cfg_loader.sentinel)
415
- events = parse_all(fetched, cfg_loader.log_sources)
416
- logger.info("Parsed %d error/warn events", len(events))
417
- new_events = []
418
- for event in events:
419
- store.record_error(event.fingerprint, event.source, event.message)
420
- if not store.fix_attempted_recently(event.fingerprint):
421
- new_events.append(event)
422
- if new_events:
423
- logger.info("%d new log event(s) to process", len(new_events))
424
- await asyncio.gather(
425
- *[_handle_error(e, cfg_loader, store) for e in new_events],
426
- return_exceptions=True,
427
- )
428
- if sources and fetched:
429
- for marker in set(scan_all_for_markers(fetched)):
430
- fix = store.mark_marker_seen(marker)
431
- if fix:
432
- logger.info("Marker seen in production: %s repo=%s — quiet period started",
433
- marker, fix.get("repo_name"))
434
- if sources:
435
- for event in events:
436
- pending = store.get_marker_seen_fix(event.fingerprint)
437
- if pending:
438
- logger.warning("Regression: %s recurred after marker seen", event.fingerprint)
439
- store.mark_regressed(event.fingerprint)
440
- send_regression_notification(cfg_loader.sentinel, pending, {
441
- "source": event.source,
442
- "message": event.message,
443
- "body": event.full_text()[:500],
444
- })
445
- quiet_hours = cfg_loader.sentinel.marker_confirm_hours
446
- for fix in store.get_fixes_pending_confirmation(quiet_hours):
447
- confirmed = store.confirm_fix(fix["fingerprint"])
448
- if confirmed:
449
- logger.info("Fix confirmed after %dh quiet period: %s repo=%s",
450
- quiet_hours, fix["fingerprint"], fix.get("repo_name"))
451
- send_confirmed_notification(cfg_loader.sentinel, confirmed)
452
- pr_changes = poll_open_prs(store, cfg_loader.sentinel.github_token, cfg=cfg_loader.sentinel)
453
- for ch in pr_changes:
454
- if ch["new_status"] == "merged":
455
- logger.info("PR merged externally: %s (fp=%s)", ch["pr_url"], ch["fingerprint"][:8])
456
- elif ch["new_status"] == "skipped":
457
- logger.info("PR closed/rejected: %s (fp=%s) — will not retry for 24h",
458
- ch["pr_url"], ch["fingerprint"][:8])
459
- purge_old_issues(Path(".") / "issues")
460
- issues = scan_issues(Path("."))
461
- if issues:
462
- logger.info("%d issue file(s) found in issues/", len(issues))
463
- issue_results = await asyncio.gather(
464
- *[_handle_issue(e, cfg_loader, store) for e in issues],
465
- return_exceptions=True,
466
- )
467
- from collections import defaultdict
468
- from .notify import slack_dm as _slack_dm
469
- by_submitter: dict[str, list] = defaultdict(list)
470
- for r in issue_results:
471
- if isinstance(r, dict) and r.get("submitter"):
472
- by_submitter[r["submitter"]].append(r)
473
- for uid, results in by_submitter.items():
474
- done = [r for r in results if r["status"] == "done"]
475
- blocked = [r for r in results if r["status"] == "blocked"]
476
- lines = []
477
- for r in done:
478
- pr = f" — <{r['pr_url']}|PR>" if r.get("pr_url") else ""
479
- lines.append(f":white_check_mark: *{r['repo_name']}*{pr}")
480
- for r in blocked:
481
- lines.append(f":x: *{r['repo_name']}* — {r['summary']}")
482
- if done and blocked:
483
- header = f":sentinel: *All done — {len(done)} succeeded, {len(blocked)} need attention:*"
484
- elif blocked:
485
- header = f":sentinel: *All done — {len(blocked)} need attention:*"
486
- else:
487
- header = f":sentinel: *All done — {len(done)} completed successfully:*"
488
- _slack_dm(
489
- cfg_loader.sentinel.slack_bot_token,
490
- uid,
491
- f"{header}\n" + "\n".join(lines),
492
- )
493
- if cfg_loader.repos:
494
- import asyncio as _asyncio
495
- _loop = _asyncio.get_event_loop()
496
- health_results = await _loop.run_in_executor(
497
- None,
498
- lambda: evaluate_repos(
499
- cfg_loader.repos, cfg_loader.log_sources,
500
- cfg_loader.sentinel.workspace_dir, store=store,
501
- )
502
- )
503
- for hr in health_results:
504
- if hr["action"] == "fix":
505
- fp = f"health-{hr['repo_name']}"
506
- store.record_error(fp, f"health_checker/{hr['repo_name']}", hr["message"])
507
- if not store.fix_attempted_recently(fp, hours=6):
508
- from .log_parser import ErrorEvent as _EE
509
- from datetime import datetime, timezone as _tz
510
- synth = _EE(
511
- source=f"health_checker/{hr['repo_name']}",
512
- log_file="",
513
- timestamp=datetime.now(_tz.utc).isoformat(),
514
- level="ERROR",
515
- thread="health_checker",
516
- logger_name="health_checker",
517
- message=f"App startup failure detected: {hr['message']}",
518
- stack_trace=[hr["startup_failure_line"]] if hr["startup_failure_line"] else [],
519
- )
520
- synth.fingerprint = fp
521
- await _handle_error(synth, cfg_loader, store)
522
- elif hr["action"] == "alert_once":
523
- from .notify import slack_alert
524
- slack_alert(
525
- cfg_loader.sentinel.slack_bot_token,
526
- cfg_loader.sentinel.slack_channel,
527
- (
528
- f":question: *{hr['repo_name']}* health returned {hr['message']}"
529
- " with no startup errors in logs.\n"
530
- "If this is deliberate maintenance, tell Boss: "
531
- f"`maintenance {hr['repo_name']}` \n"
532
- "I'll silently monitor until it's back online."
533
- ),
534
- )
535
- elif hr["action"] == "recovered":
536
- from .notify import slack_alert
537
- slack_alert(
538
- cfg_loader.sentinel.slack_bot_token,
539
- cfg_loader.sentinel.slack_channel,
540
- f":white_check_mark: *{hr['repo_name']}* is back online.",
541
- )
542
- if _report_requested or (cfg_loader.sentinel.send_health and _report_due(cfg_loader, store)):
543
- _report_requested = False
544
- logger.info("Sending health digest...")
545
- build_and_send(cfg_loader.sentinel, store)
546
- def _report_due(cfg_loader: ConfigLoader, store: StateStore) -> bool:
547
- last = store.last_report_time()
548
- if last is None:
549
- return True
550
- elapsed = (datetime.now(timezone.utc) - last).total_seconds()
551
- return elapsed >= cfg_loader.sentinel.report_interval_hours * 3600
552
- async def _startup_checks(cfg_loader: ConfigLoader) -> dict:
553
- results = {
554
- "repos": [],
555
- "cairn": [],
556
- "ssh": [],
557
- "warnings": [],
558
- }
559
- if not cairn_installed():
560
- results["warnings"].append("Cairn not found — run: npm install -g @misterhuydo/cairn-mcp")
561
- for name, repo in cfg_loader.repos.items():
562
- local = Path(repo.local_path)
563
- if not local.exists():
564
- logger.info("Cloning %s → %s", repo.repo_url, repo.local_path)
565
- r = subprocess.run(
566
- ["git", "clone", repo.repo_url, str(local)],
567
- capture_output=True, text=True,
568
- env=_git_env(repo),
569
- )
570
- if r.returncode != 0:
571
- msg = r.stderr.strip()
572
- logger.error("Clone failed for %s: %s", name, msg)
573
- results["repos"].append({"name": name, "status": "error", "message": msg})
574
- continue
575
- results["repos"].append({"name": name, "status": "cloned", "message": repo.repo_url})
576
- else:
577
- results["repos"].append({"name": name, "status": "exists", "message": str(local)})
578
- ok = index_repo(repo)
579
- results["cairn"].append({
580
- "name": name,
581
- "status": "ok" if ok else "error",
582
- "message": "indexed" if ok else "cairn index failed — check logs",
583
- })
584
- for src_name, src in cfg_loader.log_sources.items():
585
- if src.source_type == "ssh" and src.hosts:
586
- host = src.hosts[0]
587
- try:
588
- r = subprocess.run(
589
- ["ssh", "-i", src.key, "-o", "StrictHostKeyChecking=no",
590
- "-o", "ConnectTimeout=5", f"ec2-user@{host}", "echo ok"],
591
- capture_output=True, text=True, timeout=15,
592
- )
593
- ok = r.returncode == 0
594
- results["ssh"].append({
595
- "name": src_name, "host": host,
596
- "status": "ok" if ok else "error",
597
- "message": "" if ok else r.stderr.strip(),
598
- })
599
- except Exception as e:
600
- results["ssh"].append({"name": src_name, "host": host,
601
- "status": "error", "message": str(e)})
602
- return results
603
- async def _send_startup_email_delayed(cfg, results: dict, delay: int = 300):
604
- await asyncio.sleep(delay)
605
- try:
606
- send_startup_notification(cfg, results)
607
- except Exception as e:
608
- logger.error("Failed to send startup notification: %s", e)
609
- def _config_repo_git_env(project_dir: Path | None = None) -> dict:
610
- env = os.environ.copy()
611
- try:
612
- cwd = str((project_dir or Path(".")).resolve())
613
- r = subprocess.run(
614
- ["git", "remote", "get-url", "origin"],
615
- cwd=cwd, capture_output=True, text=True, timeout=5,
616
- )
617
- remote = r.stdout.strip()
618
- slug = re.sub(r"\.git$", "", remote).rsplit("/", 1)[-1].rsplit(":", 1)[-1].rsplit("/", 1)[-1].lower()
619
- key = Path.home() / ".ssh" / f"{slug}.key"
620
- if key.exists():
621
- env["GIT_SSH_COMMAND"] = f"ssh -i {key} -o StrictHostKeyChecking=no -o BatchMode=yes"
622
- except Exception:
623
- pass
624
- return env
625
- def _poll_config_repo(cfg_loader: ConfigLoader) -> bool:
626
- project_dir = Path(".")
627
- git_dir = project_dir / ".git"
628
- if not git_dir.exists():
629
- return False
630
- try:
631
- result = subprocess.run(
632
- ["git", "pull", "--rebase", "--autostash"],
633
- cwd=str(project_dir),
634
- capture_output=True, text=True, timeout=30,
635
- env=_config_repo_git_env(project_dir),
636
- )
637
- if result.returncode != 0:
638
- logger.warning("Config repo git pull failed: %s", result.stderr.strip())
639
- return False
640
- changed = "Already up to date." not in result.stdout
641
- if changed:
642
- logger.info("Config repo updated — reloading config\n%s", result.stdout.strip())
643
- cfg_loader.load()
644
- return changed
645
- except Exception as e:
646
- logger.warning("Config repo poll error: %s", e)
647
- return False
648
- async def _config_poll_loop(cfg_loader: ConfigLoader):
649
- while True:
650
- interval = cfg_loader.sentinel.config_poll_interval
651
- await asyncio.sleep(interval)
652
- try:
653
- _poll_config_repo(cfg_loader)
654
- except Exception as e:
655
- logger.warning("Config poll loop error: %s", e)
656
- def _find_sentinel_code_dir() -> Path | None:
657
- for candidate in [
658
- Path("../code"),
659
- Path("../../code"),
660
- Path.home() / "sentinel" / "code",
661
- ]:
662
- if candidate.exists() and (candidate / "sentinel").exists():
663
- return candidate.resolve()
664
- return None
665
- def _parse_version(v: str) -> tuple:
666
- try:
667
- return tuple(int(x) for x in v.strip().lstrip("v").split(".")[:3])
668
- except Exception:
669
- return (0, 0, 0)
670
- def _check_and_upgrade(cfg: SentinelConfig) -> bool:
671
- try:
672
- result = subprocess.run(
673
- ["npm", "show", "@misterhuydo/sentinel", "version"],
674
- capture_output=True, text=True, timeout=30,
675
- )
676
- if result.returncode != 0:
677
- logger.warning("npm show failed: %s", result.stderr.strip())
678
- return False
679
- latest = result.stdout.strip()
680
- except Exception as e:
681
- logger.warning("Upgrade check failed: %s", e)
682
- return False
683
- try:
684
- cur_result = subprocess.run(
685
- ["npm", "list", "-g", "--depth=0", "--json"],
686
- capture_output=True, text=True, timeout=30,
687
- )
688
- pkg_data = json.loads(cur_result.stdout or "{}")
689
- deps = pkg_data.get("dependencies", {})
690
- current = deps.get("@misterhuydo/sentinel", {}).get("version", "0.0.0")
691
- except Exception:
692
- current = "0.0.0"
693
- current_t = _parse_version(current)
694
- latest_t = _parse_version(latest)
695
- if cfg.version_pin:
696
- pin_t = _parse_version(cfg.version_pin)
697
- if latest_t > pin_t:
698
- logger.info("Upgrade available (%s → %s) but pinned to %s — skipping",
699
- current, latest, cfg.version_pin)
700
- return False
701
- if latest_t <= current_t:
702
- logger.debug("No upgrade available (current=%s, latest=%s)", current, latest)
703
- return False
704
- logger.info("Upgrading @misterhuydo/sentinel %s → %s", current, latest)
705
- install = subprocess.run(
706
- ["npm", "install", "-g", f"@misterhuydo/sentinel@{latest}"],
707
- capture_output=True, text=True, timeout=120,
708
- )
709
- if install.returncode != 0:
710
- logger.error("npm install failed: %s", install.stderr.strip())
711
- return False
712
- code_dir = _find_sentinel_code_dir()
713
- if code_dir:
714
- try:
715
- npm_prefix_result = subprocess.run(
716
- ["npm", "root", "-g"], capture_output=True, text=True, timeout=10,
717
- )
718
- npm_root = npm_prefix_result.stdout.strip()
719
- new_src = Path(npm_root) / "@misterhuydo" / "sentinel" / "sentinel"
720
- if new_src.exists():
721
- dst = code_dir / "sentinel"
722
- shutil.copytree(str(new_src), str(dst), dirs_exist_ok=True)
723
- logger.info("Python source updated at %s", dst)
724
- except Exception as e:
725
- logger.warning("Failed to copy updated Python source: %s", e)
726
- logger.info("Upgrade complete — restarting Sentinel")
727
- try:
728
- from .reporter import send_upgrade_notification
729
- send_upgrade_notification(cfg, current, latest)
730
- except Exception:
731
- pass
732
- os.execv(sys.executable, [sys.executable] + sys.argv)
733
- return True
734
- async def _upgrade_check_loop(cfg_loader: ConfigLoader):
735
- await asyncio.sleep(cfg_loader.sentinel.upgrade_check_hours * 3600)
736
- while True:
737
- try:
738
- _check_and_upgrade(cfg_loader.sentinel)
739
- except Exception as e:
740
- logger.warning("Upgrade check loop error: %s", e)
741
- await asyncio.sleep(cfg_loader.sentinel.upgrade_check_hours * 3600)
742
- async def _sync_loop(cfg_loader: ConfigLoader):
743
- from .log_syncer import sync_all
744
- await asyncio.sleep(30)
745
- while True:
746
- try:
747
- sync_all(
748
- cfg_loader.log_sources,
749
- cfg_loader.sentinel,
750
- cfg_loader.config_dir,
751
- )
752
- except Exception as e:
753
- logger.warning("Log sync loop error: %s", e)
754
- await asyncio.sleep(cfg_loader.sentinel.sync_interval_seconds)
755
- def _boss_qualify_dev_reason(raw: str, sentinel) -> str:
756
- if not raw.strip():
757
- return "(no reason given)"
758
- if not sentinel.anthropic_api_key:
759
- return raw[:280].strip()
760
- try:
761
- import anthropic as _anthropic
762
- _client = _anthropic.Anthropic(api_key=sentinel.anthropic_api_key)
763
- _resp = _client.messages.create(
764
- model="claude-haiku-4-5-20251001",
765
- max_tokens=200,
766
- system=(
767
- "You are Sentinel Boss, a DevOps agent assistant. "
768
- "Patch (an autonomous dev agent) produced the following explanation for why it "
769
- "could not complete a task. Rewrite it as a clear, concise (1-3 sentences), "
770
- "user-friendly message suitable for a Slack channel. "
771
- "Be direct and specific. Do not pad with pleasantries. "
772
- "Do not start with 'I' or mention 'Patch' by name. "
773
- "Output only the qualified message, nothing else."
774
- ),
775
- messages=[{"role": "user", "content": f"Patch said:\n{raw[:1000]}"}],
776
- )
777
- qualified = _resp.content[0].text.strip() if _resp.content else raw[:280]
778
- return qualified[:400]
779
- except Exception as _e:
780
- logger.warning("Boss: could not qualify dev reason via API: %s", _e)
781
- return raw[:280].strip()
782
- async def _handle_dev_task(task, cfg_loader: ConfigLoader, store: StateStore):
783
- from .sentinel_dev import run_dev_task
784
- from .dev_watcher import mark_dev_done
785
- from .notify import slack_alert as _slack_alert, slack_thread_reply as _slack_reply
786
- sentinel = cfg_loader.sentinel
787
- _submitter = task.submitter_user_id
788
- _started_msg = (
789
- f":wrench: Patch working on *<@{_submitter}>*'s request\n_{task.message[:120]}_"
790
- ) if _submitter else (
791
- f":wrench: Patch working on dev task\n_{task.message[:120]}_"
792
- )
793
- _thread_ts = _slack_alert(sentinel.slack_bot_token, sentinel.slack_channel, _started_msg)
794
- def _progress(msg: str) -> None:
795
- _slack_reply(sentinel.slack_bot_token, sentinel.slack_channel, _thread_ts, msg)
796
- _loop = asyncio.get_event_loop()
797
- try:
798
- status, detail = await _loop.run_in_executor(
799
- None, run_dev_task, task, sentinel, store, _progress
800
- )
801
- except Exception:
802
- logger.exception("Patch: unexpected error on task %s", task.fingerprint[:8])
803
- _progress(":x: Patch hit an unexpected error — check logs")
804
- mark_dev_done(task.task_file)
805
- return
806
- mark_dev_done(task.task_file)
807
- _notify_ids = list(task.notify_user_ids or [])
808
- if task.submitter_user_id:
809
- mentions = f"<@{task.submitter_user_id}> " + " ".join(
810
- f"<@{u}>" for u in _notify_ids if u != task.submitter_user_id
811
- )
812
- mentions = mentions.strip() + " "
813
- else:
814
- mentions = " ".join(f"<@{u}>" for u in _notify_ids)
815
- mentions = (mentions + " ") if mentions else ""
816
- if status == "done":
817
- _slack_alert(
818
- sentinel.slack_bot_token, sentinel.slack_channel,
819
- f"{mentions}:white_check_mark: *Patch finished* — changes committed to Sentinel source.",
820
- )
821
- elif status == "needs_human":
822
- qualified = _boss_qualify_dev_reason(detail, sentinel)
823
- _slack_alert(
824
- sentinel.slack_bot_token, sentinel.slack_channel,
825
- f"{mentions}:warning: *Dev task needs human input*\n{qualified}",
826
- )
827
- elif status == "skip":
828
- qualified = _boss_qualify_dev_reason(detail, sentinel)
829
- _slack_alert(
830
- sentinel.slack_bot_token, sentinel.slack_channel,
831
- f"{mentions}:fast_forward: *Dev task skipped* — {qualified}",
832
- )
833
- else:
834
- _slack_alert(
835
- sentinel.slack_bot_token, sentinel.slack_channel,
836
- f"{mentions}:x: *Patch error* on task `{task.fingerprint[:8]}` — {detail[:200]}",
837
- )
838
- async def _dev_poll_loop(cfg_loader: ConfigLoader, store: StateStore):
839
- from .dev_watcher import (
840
- scan_dev_tasks, purge_old_dev_tasks,
841
- scan_sentinel_errors, drop_self_repair_task,
842
- )
843
- _seen_self_fps: set = set()
844
- project_dir = Path(".")
845
- for done_dir in [
846
- project_dir / "dev-tasks" / ".done",
847
- project_dir / "dev-tasks" / ".cancelled",
848
- ]:
849
- if done_dir.exists():
850
- for f in done_dir.iterdir():
851
- if f.stem.startswith("self-"):
852
- parts = f.stem.split("-")
853
- if len(parts) >= 2:
854
- _seen_self_fps.add(parts[1])
855
- await asyncio.sleep(15)
856
- while True:
857
- try:
858
- if cfg_loader.sentinel.sentinel_dev_repo_path:
859
- purge_old_dev_tasks(project_dir / "dev-tasks")
860
- log_path = project_dir / "logs" / "sentinel.log"
861
- new_errors = scan_sentinel_errors(log_path, seen_fps=_seen_self_fps)
862
- for fp, task_body in new_errors:
863
- logger.info("Dev agent: self-repair task queued for error %s", fp[:8])
864
- drop_self_repair_task(project_dir, fp, task_body)
865
- tasks = scan_dev_tasks(project_dir)
866
- if tasks:
867
- logger.info("Dev agent: %d task(s) found", len(tasks))
868
- for task in tasks:
869
- await _handle_dev_task(task, cfg_loader, store)
870
- except Exception as e:
871
- logger.warning("Dev poll loop error: %s", e)
872
- await asyncio.sleep(60)
873
- async def _handle_repo_task(task, repo_cfg, cfg_loader: ConfigLoader, store: StateStore):
874
- from .repo_task_engine import run_repo_task, mark_repo_task_done
875
- from .notify import slack_alert as _slack_alert, slack_thread_reply as _slack_reply
876
- sentinel = cfg_loader.sentinel
877
- _submitter = task.submitter_user_id
878
- _started_msg = (
879
- f":hammer: Working on *<@{_submitter}>*'s request for `{task.repo_name}`\n_{task.message[:120]}_"
880
- ) if _submitter else (
881
- f":hammer: Working on repo task for `{task.repo_name}`\n_{task.message[:120]}_"
882
- )
883
- _thread_ts = _slack_alert(sentinel.slack_bot_token, sentinel.slack_channel, _started_msg)
884
- def _progress(msg: str) -> None:
885
- _slack_reply(sentinel.slack_bot_token, sentinel.slack_channel, _thread_ts, msg)
886
- _loop = asyncio.get_event_loop()
887
- try:
888
- status, detail = await _loop.run_in_executor(
889
- None, run_repo_task, task, repo_cfg, sentinel, store, _progress,
890
- )
891
- except Exception:
892
- logger.exception("Repo task: unexpected error on task %s", task.fingerprint[:8])
893
- _progress(":x: Unexpected error — check logs")
894
- mark_repo_task_done(task.task_file)
895
- return
896
- mark_repo_task_done(task.task_file)
897
- _notify_ids = list(task.notify_user_ids or [])
898
- if task.submitter_user_id:
899
- mentions = f"<@{task.submitter_user_id}> " + " ".join(
900
- f"<@{u}>" for u in _notify_ids if u != task.submitter_user_id
901
- )
902
- mentions = mentions.strip() + " "
903
- else:
904
- mentions = " ".join(f"<@{u}>" for u in _notify_ids)
905
- mentions = (mentions + " ") if mentions else ""
906
- if status == "done":
907
- if detail and detail.startswith("__cicd__"):
908
- cicd_name = detail[len("__cicd__"):]
909
- _slack_alert(
910
- sentinel.slack_bot_token, sentinel.slack_channel,
911
- f"{mentions}:white_check_mark: Done — pushed to `{task.repo_name}/{repo_cfg.branch}` and triggered `{cicd_name}` release.",
912
- )
913
- elif detail:
914
- _slack_alert(
915
- sentinel.slack_bot_token, sentinel.slack_channel,
916
- f"{mentions}:white_check_mark: Done — PR opened for `{task.repo_name}`: {detail}",
917
- )
918
- else:
919
- _slack_alert(
920
- sentinel.slack_bot_token, sentinel.slack_channel,
921
- f"{mentions}:white_check_mark: Done — changes pushed to `{task.repo_name}/{repo_cfg.branch}`.",
922
- )
923
- elif status == "needs_human":
924
- qualified = _boss_qualify_dev_reason(detail, sentinel)
925
- _slack_alert(
926
- sentinel.slack_bot_token, sentinel.slack_channel,
927
- f"{mentions}:warning: *Task needs human input* (`{task.repo_name}`)\n{qualified}",
928
- )
929
- elif status == "skip":
930
- qualified = _boss_qualify_dev_reason(detail, sentinel)
931
- _slack_alert(
932
- sentinel.slack_bot_token, sentinel.slack_channel,
933
- f"{mentions}:fast_forward: Task skipped for `{task.repo_name}` — {qualified}",
934
- )
935
- else:
936
- _slack_alert(
937
- sentinel.slack_bot_token, sentinel.slack_channel,
938
- f"{mentions}:x: Task error for `{task.repo_name}` — {(detail or '')[:200]}",
939
- )
940
- async def _repo_task_poll_loop(cfg_loader: ConfigLoader, store: StateStore):
941
- from .repo_task_engine import scan_repo_tasks, mark_repo_task_done
942
- await asyncio.sleep(20)
943
- while True:
944
- try:
945
- project_dir = Path(".")
946
- tasks = scan_repo_tasks(project_dir)
947
- if tasks:
948
- logger.info("Repo task: %d task(s) found", len(tasks))
949
- for task in tasks:
950
- repo_cfg = cfg_loader.repos.get(task.repo_name)
951
- if not repo_cfg:
952
- for rname, rcfg in cfg_loader.repos.items():
953
- if task.repo_name.lower() in rname.lower():
954
- repo_cfg = rcfg
955
- break
956
- if not repo_cfg:
957
- logger.warning("Repo task: no config for repo '%s' — skipping", task.repo_name)
958
- mark_repo_task_done(task.task_file)
959
- continue
960
- await _handle_repo_task(task, repo_cfg, cfg_loader, store)
961
- except Exception as e:
962
- logger.warning("Repo task poll loop error: %s", e)
963
- await asyncio.sleep(60)
964
- def _log_auth_status(cfg: SentinelConfig) -> None:
965
- has_api_key = bool(cfg.anthropic_api_key)
966
- has_claude_bin = bool(shutil.which(cfg.claude_code_bin))
967
- pro_for_tasks = cfg.claude_pro_for_tasks
968
- if has_api_key and pro_for_tasks:
969
- logger.info(
970
- "Claude auth: API key ✓ + Claude Pro (OAuth) ✓ — "
971
- "Fix Engine will try Claude Pro first, falls back to API key on auth error. "
972
- "Run `claude login` if not already authenticated."
973
- )
974
- elif has_api_key and not pro_for_tasks:
975
- logger.info(
976
- "Claude auth: API key ✓ — Boss + Fix Engine use API key. "
977
- "CLAUDE_PRO_FOR_TASKS=false; falls back to Claude Pro (OAuth) if key auth fails."
978
- )
979
- elif not has_api_key and has_claude_bin:
980
- logger.warning(
981
- "Claude auth: no ANTHROPIC_API_KEY — Boss will use CLI fallback (limited tools). "
982
- "Fix Engine uses Claude Pro via `claude` CLI."
983
- )
984
- else:
985
- msg = (
986
- ":warning: *Sentinel — no Claude authentication configured*\n"
987
- "Sentinel needs at least one of:\n"
988
- "• `ANTHROPIC_API_KEY` in `sentinel.properties` — full Boss tools, API billing\n"
989
- "• Claude Pro OAuth: run `claude login` on the server — required for Fix Engine\n"
990
- "See the auth section in your workspace `sentinel.properties` for guidance."
991
- )
992
- logger.error("Claude auth: NOTHING configured — Boss and Fix Engine will fail!")
993
- from .notify import slack_alert
994
- slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
995
- async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
996
- interval = cfg_loader.sentinel.poll_interval_seconds
997
- logger.info("Sentinel starting — poll interval: %ds, repos: %s",
998
- interval, list(cfg_loader.repos.keys()))
999
- _log_auth_status(cfg_loader.sentinel)
1000
- results = await _startup_checks(cfg_loader)
1001
- has_errors = any(
1002
- item["status"] == "error"
1003
- for key in ("repos", "cairn", "ssh")
1004
- for item in results[key]
1005
- )
1006
- for key in ("repos", "cairn", "ssh"):
1007
- for item in results[key]:
1008
- if item["status"] == "error":
1009
- logger.warning("Startup check failed [%s] %s: %s",
1010
- key, item.get("name", ""), item.get("message", ""))
1011
- for w in results.get("warnings", []):
1012
- logger.warning("Startup warning: %s", w)
1013
- if has_errors:
1014
- logger.warning("Startup completed with errors — check config and logs")
1015
- else:
1016
- logger.info("Startup checks passed — startup email in 5 minutes")
1017
- asyncio.ensure_future(_send_startup_email_delayed(cfg_loader.sentinel, results))
1018
- asyncio.ensure_future(_config_poll_loop(cfg_loader))
1019
- if cfg_loader.sentinel.auto_upgrade:
1020
- asyncio.ensure_future(_upgrade_check_loop(cfg_loader))
1021
- if cfg_loader.sentinel.sync_enabled:
1022
- asyncio.ensure_future(_sync_loop(cfg_loader))
1023
- if cfg_loader.sentinel.slack_bot_token:
1024
- from .slack_bot import run_slack_bot
1025
- asyncio.ensure_future(run_slack_bot(cfg_loader, store))
1026
- if cfg_loader.sentinel.sentinel_dev_repo_path:
1027
- asyncio.ensure_future(_dev_poll_loop(cfg_loader, store))
1028
- asyncio.ensure_future(_repo_task_poll_loop(cfg_loader, store))
1029
- while True:
1030
- try:
1031
- await poll_cycle(cfg_loader, store)
1032
- except Exception as e:
1033
- logger.exception("Unhandled error in poll cycle: %s", e)
1034
- elapsed = 0
1035
- while elapsed < interval:
1036
- await asyncio.sleep(5)
1037
- elapsed += 5
1038
- poll_now = Path("SENTINEL_POLL_NOW")
1039
- if poll_now.exists():
1040
- poll_now.unlink(missing_ok=True)
1041
- logger.info("Immediate poll triggered by Sentinel Boss")
1042
- break
1043
- def _setup_workspace_log() -> None:
1044
- workspace_log = Path("..") / "logs" / "sentinel.log"
1045
- try:
1046
- workspace_log.parent.mkdir(parents=True, exist_ok=True)
1047
- fmt = logging.Formatter("%(asctime)s %(levelname)-7s %(name)s — %(message)s")
1048
- handler = logging.FileHandler(workspace_log, mode="w", encoding="utf-8")
1049
- handler.setFormatter(fmt)
1050
- logging.getLogger("sentinel").addHandler(handler)
1051
- except Exception as e:
1052
- logger.warning("Could not open workspace log %s: %s", workspace_log, e)
1053
- def main():
1054
- Path("logs").mkdir(exist_ok=True)
1055
- Path("workspace/fetched").mkdir(parents=True, exist_ok=True)
1056
- Path("workspace/patches").mkdir(parents=True, exist_ok=True)
1057
- Path("issues").mkdir(exist_ok=True)
1058
- _setup_workspace_log()
1059
- parser = argparse.ArgumentParser(description="Sentinel — Autonomous DevOps Agent")
1060
- parser.add_argument("--config", default="./config", help="Config directory path")
1061
- args = parser.parse_args()
1062
- cfg_loader = ConfigLoader(config_dir=args.config)
1063
- store = StateStore(cfg_loader.sentinel.state_db)
1064
- _register_signals()
1065
- asyncio.run(run_loop(cfg_loader, store))
1066
- if __name__ == "__main__":
1067
- main()