@misterhuydo/sentinel 1.0.76 → 1.0.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1367 +1,1573 @@
1
- """
2
- sentinel_boss.py — Claude-backed Sentinel Boss.
3
-
4
- Claude acts as the boss: reads project state, decides on actions,
5
- executes them via tool use, and responds naturally. One agentic loop
6
- per turn — Claude may call multiple tools before replying.
7
- """
8
-
9
- import json
10
- import logging
11
- import os
12
- import re
13
- import subprocess
14
- import uuid
15
- from datetime import datetime, timezone
16
- from pathlib import Path
17
- from typing import Optional
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
- # ── System prompt ────────────────────────────────────────────────────────────
22
-
23
- _SYSTEM = """\
24
- You are Sentinel Boss — the AI interface for Sentinel, a 24/7 autonomous DevOps agent.
25
-
26
- Sentinel watches production logs, detects errors, generates code fixes via Claude Code,
27
- and opens GitHub PRs for admin review (or pushes directly if AUTO_PUBLISH=true).
28
-
29
- Your job:
30
- - Understand what the DevOps engineer needs in natural language
31
- - Query Sentinel's live state (errors, fixes, open PRs) on their behalf
32
- - Deliver tasks/issues to the right project — you know all projects in this workspace
33
- - Control Sentinel (pause/resume) when asked
34
- - Give honest, concise answers — you know this system inside out
35
- - If a project name is unclear or ambiguous, ask the engineer to clarify — never guess
36
-
37
- What you can do (tools available):
38
-
39
- 1. get_status — Show recent errors detected, fixes applied/pending, open PRs.
40
- e.g. "what happened today?", "any issues?", "show open PRs"
41
-
42
- 2. create_issue — Deliver a fix/task to any project in this workspace by short name.
43
- You know all project names — use list_projects if you're unsure.
44
- If the project name is ambiguous or not found, ask to clarify.
45
- e.g. "tell 1881 to fix X", "look into Y in elprint", "investigate Z"
46
-
47
- 3. pause_sentinel — Create SENTINEL_PAUSE file to halt all auto-fix activity.
48
- e.g. "pause sentinel", "stop auto-fixing"
49
-
50
- 4. resume_sentinel — Remove SENTINEL_PAUSE file to resume normal operation.
51
- e.g. "resume sentinel", "unpause"
52
-
53
- 5. list_projects — List all configured repos and log sources in this Sentinel instance.
54
- e.g. "what projects are you watching?", "list all repos"
55
-
56
- 6. search_logs — SSH live to servers and grep logs in real time (uses fetch_log.sh with
57
- the query as GREP_FILTER). Falls back to cached files if unavailable.
58
- e.g. "search logs for illegal PIN in 1881", "find X in SSOLWA", "grep logs for Z"
59
-
60
- 7. trigger_poll — Trigger an immediate poll cycle without waiting for the schedule.
61
- e.g. "check now", "poll immediately", "don't wait, run now"
62
-
63
- 8. get_repo_status — Show the current git branch, last commit, and recent fix branches
64
- for a specific repository.
65
- e.g. "status of repo X", "what branch is cairn on?"
66
-
67
- 9. list_recent_commits — List the most recent commits in a repo (including Sentinel's auto-fixes).
68
- e.g. "show me recent commits in elprint-sales", "what did sentinel commit?"
69
-
70
- 10. get_fix_detail — Get full details of a specific fix: error, patch path, PR URL, status.
71
- e.g. "show fix abc123", "details on that fix"
72
-
73
- 11. list_errors — List recent errors from the state store, optionally filtered by repo or source.
74
- e.g. "show all errors today", "what errors hit elprint this week?"
75
-
76
- 12. pull_repo — Run git pull on one or all managed application repos.
77
- e.g. "pull changes", "git pull all repos", "update the code"
78
-
79
- 13. pull_config — Run git pull on one or all Sentinel project config dirs.
80
- e.g. "pull config for 1881", "update sentinel config", "pull all configs"
81
-
82
- 14. fetch_logs — Run fetch_log.sh on demand to pull fresh logs from remote servers right now.
83
- Supports --debug mode and parameter overrides (tail count, grep filter).
84
- e.g. "fetch logs", "try fetch_log.sh for SSOLWA", "fetch logs with debug",
85
- "grab latest logs from STS", "fetch logs without filter"
86
-
87
- 15. watch_bot — Register a Slack bot for passive monitoring. Every message it posts is
88
- auto-queued as an issue in the bot's registered project.
89
- ALWAYS requires a project — infer from context or ask the user first.
90
- e.g. "listen to @alertbot", "watch @bot1 @bot2 for project 1881", "monitor @errorbot"
91
-
92
- 16. unwatch_bot — Remove a Slack bot from the passive watch list.
93
- e.g. "stop watching @alertbot", "unwatch @errorbot"
94
-
95
- 17. list_watched_bots — Show all Slack bots currently being passively monitored and which projects
96
- they are delivering to.
97
- e.g. "which bots are you watching?", "list monitored bots"
98
-
99
- 18. upgrade_sentinel — Pull the latest Sentinel agent code, update Python deps, and restart the
100
- process. Safe to run at any time — no restart if already up to date.
101
- e.g. "upgrade sentinel", "update sentinel", "upgrade yourself"
102
-
103
- 19. ask_codebase — Ask any natural-language question about a managed repo's codebase.
104
- Claude Code answers using its full knowledge of the code.
105
- e.g. "what does the 1881 backend do?", "find PIN validation in elprint",
106
- "any TODOs in cairn?", "are there security issues in elprint-sales?"
107
-
108
- 20. restart_project — Stop and restart a specific project instance (stop.sh + start.sh).
109
- e.g. "restart 1881", "reboot elprint", "restart the cairn project"
110
-
111
- 21. tail_log — Fetch the last N lines of a log source live, without a grep filter.
112
- e.g. "show recent SSOLWA logs", "tail STS", "last 200 lines from 1881 logs"
113
-
114
- When someone asks what you can do, what you support, what your capabilities are, or how you can help,
115
- reply with a short summary grouped by category:
116
-
117
- *Monitoring & status*
118
- • `get_status` — errors detected, fixes applied/pending/failed, open PRs — "what happened today?"
119
- • `get_repo_status` — per-repo breakdown of errors and fixes — "how is elprint doing?"
120
- • `list_recent_commits` — recent Sentinel auto-fix commits — "what did Sentinel commit?"
121
-
122
- *Log management*
123
- • `fetch_logs` — pull fresh logs from servers right now — "fetch logs for SSOLWA"
124
- • `search_logs` — live SSH grep on production servers — "search logs for illegal PIN in 1881"
125
- • `tail_log` — last N lines of a log source, no filter — "show recent SSOLWA logs"
126
-
127
- *Codebase questions*
128
- • `ask_codebase` — any question about a repo's code — "what does 1881 do?", "find PIN validation", "any TODOs?", "security issues?"
129
-
130
- *Fix management*
131
- • `get_fix_details` — full details of a specific fix — "show fix abc123"
132
- • `list_pending_prs` — all open Sentinel PRs awaiting review — "list open PRs"
133
-
134
- *Project & task delivery*
135
- • `list_projects` — all projects and repos Sentinel manages — "what projects do you manage?"
136
- • `create_issue` — deliver a task to any project by name — "tell 1881 to fix X"
137
- • `trigger_poll` — run a log-fetch + fix cycle right now — "check now"
138
- • `pause_sentinel` / `resume_sentinel` — halt or resume all auto-fix activity — "pause Sentinel"
139
-
140
- *Repo & config sync*
141
- • `pull_repo` — git pull on managed application repos — "pull latest code"
142
- • `pull_config` — git pull on Sentinel config dirs — "pull config for elprint"
143
-
144
- *Slack bot watching*
145
- • `watch_bot` — register a Slack bot for passive monitoring; its messages are auto-queued as issues — "listen to @alertbot"
146
- • `unwatch_bot` — stop monitoring a bot — "stop watching @errorbot"
147
- • `list_watched_bots` — show all bots currently being monitored — "which bots are you watching?"
148
-
149
- *Project control*
150
- • `restart_project` — stop + restart a specific project — "restart 1881"
151
-
152
- *Self-management*
153
- • `upgrade_sentinel` — git pull + pip install + restart — "upgrade sentinel", "update yourself"
154
-
155
- Tone: direct, professional, like a senior engineer who owns the system.
156
- Don't pad responses. Don't say "Great question!" or "Certainly!".
157
- If you don't know something, use a tool to find out before saying you don't know.
158
-
159
- When the engineer's request is fully handled, end your LAST message with the token: [DONE]
160
- IMPORTANT: Always write your actual reply text FIRST, then append [DONE] at the end. Example: "Hello! I'm Sentinel. [DONE]". Never output [DONE] as your only content.
161
- For greetings like "hello" or empty messages, introduce yourself briefly and offer help, then end with [DONE].
162
- If you need a follow-up from them, do NOT include [DONE] — wait for their next message.
163
- """
164
-
165
- # ── Tool definitions ─────────────────────────────────────────────────────────
166
-
167
- _TOOLS = [
168
- {
169
- "name": "get_status",
170
- "description": (
171
- "Get recent errors, fixes applied, fixes pending review, and open PRs. "
172
- "Use for: 'what happened today?', 'any issues?', 'how are things?', "
173
- "'what are the open PRs?', 'did sentinel fix anything?'"
174
- ),
175
- "input_schema": {
176
- "type": "object",
177
- "properties": {
178
- "hours": {
179
- "type": "integer",
180
- "description": "Look-back window in hours (default 24)",
181
- "default": 24,
182
- },
183
- },
184
- },
185
- },
186
- {
187
- "name": "create_issue",
188
- "description": (
189
- "Deliver a fix/task request to a Sentinel project instance. "
190
- "Use when the engineer says 'tell 1881 to do X', 'look into Y in project elprint', "
191
- "'implement this in 1881: ...'. Can target any project by short name. "
192
- "Defaults to the current project if no project is specified."
193
- ),
194
- "input_schema": {
195
- "type": "object",
196
- "properties": {
197
- "description": {
198
- "type": "string",
199
- "description": "Full task/problem description — everything the engineer told you",
200
- },
201
- "project": {
202
- "type": "string",
203
- "description": "Project short name to deliver to (e.g. '1881', 'elprint'). Omit for current project.",
204
- },
205
- "target_repo": {
206
- "type": "string",
207
- "description": "Specific repo within the project (omit to let Sentinel auto-route)",
208
- },
209
- },
210
- "required": ["description"],
211
- },
212
- },
213
- {
214
- "name": "get_fix_details",
215
- "description": "Get full details of a specific fix by fingerprint (8+ hex chars).",
216
- "input_schema": {
217
- "type": "object",
218
- "properties": {
219
- "fingerprint": {"type": "string"},
220
- },
221
- "required": ["fingerprint"],
222
- },
223
- },
224
- {
225
- "name": "list_pending_prs",
226
- "description": "List all open Sentinel PRs awaiting admin review.",
227
- "input_schema": {"type": "object", "properties": {}},
228
- },
229
- {
230
- "name": "pause_sentinel",
231
- "description": (
232
- "Pause ALL Sentinel fix activity immediately. "
233
- "Use when the engineer says 'pause', 'stop', 'freeze', or 'hold off'."
234
- ),
235
- "input_schema": {"type": "object", "properties": {}},
236
- },
237
- {
238
- "name": "resume_sentinel",
239
- "description": "Resume Sentinel fix activity after a pause.",
240
- "input_schema": {"type": "object", "properties": {}},
241
- },
242
- {
243
- "name": "list_projects",
244
- "description": (
245
- "List all projects (Sentinel instances) in this workspace and the repos "
246
- "each one manages. Use for: 'what projects do you manage?', 'list projects', "
247
- "'what repos are configured?', 'show me all projects'."
248
- ),
249
- "input_schema": {"type": "object", "properties": {}},
250
- },
251
- {
252
- "name": "search_logs",
253
- "description": (
254
- "Search production logs for a keyword or pattern. "
255
- "When a project or source is specified (or can be inferred), performs a LIVE fetch "
256
- "via fetch_log.sh with the query as the grep filter — SSHes directly to the server. "
257
- "Falls back to searching locally-cached log files when no source can be determined. "
258
- "Use for: 'search logs for illegal PIN in 1881', 'find X in SSOLWA logs', "
259
- "'what did user Y do?', 'show entries for appid=Z', 'grep logs for X'."
260
- ),
261
- "input_schema": {
262
- "type": "object",
263
- "properties": {
264
- "query": {
265
- "type": "string",
266
- "description": "Keyword or regex to grep for",
267
- },
268
- "source": {
269
- "type": "string",
270
- "description": "Log source name to search (partial match against log-config filenames, e.g. 'SSOLWA', '1881'). Leave empty to search all sources.",
271
- },
272
- "max_matches": {
273
- "type": "integer",
274
- "description": "Max matching lines to return per source (default 30)",
275
- "default": 30,
276
- },
277
- },
278
- "required": ["query"],
279
- },
280
- },
281
- {
282
- "name": "trigger_poll",
283
- "description": (
284
- "Trigger an immediate log-fetch and error-detection cycle without waiting "
285
- "for the next scheduled interval. Use when: 'check now', 'run now', "
286
- "'poll immediately', 'don't wait'."
287
- ),
288
- "input_schema": {"type": "object", "properties": {}},
289
- },
290
- {
291
- "name": "get_repo_status",
292
- "description": (
293
- "Per-repository breakdown of errors detected and fixes applied. "
294
- "Use for: 'how is repo X doing?', 'which repo has the most issues?', "
295
- "'break down by repo'."
296
- ),
297
- "input_schema": {
298
- "type": "object",
299
- "properties": {
300
- "hours": {
301
- "type": "integer",
302
- "description": "Look-back window in hours (default 24)",
303
- "default": 24,
304
- },
305
- },
306
- },
307
- },
308
- {
309
- "name": "list_recent_commits",
310
- "description": (
311
- "List recent commits made by Sentinel across all managed repos. "
312
- "Use for: 'what did Sentinel commit?', 'show recent auto-fixes', 'what was changed?'."
313
- ),
314
- "input_schema": {
315
- "type": "object",
316
- "properties": {
317
- "limit": {
318
- "type": "integer",
319
- "description": "Max commits per repo (default 5)",
320
- "default": 5,
321
- },
322
- },
323
- },
324
- },
325
- {
326
- "name": "pull_repo",
327
- "description": (
328
- "Run git pull on one or all managed repos to fetch latest changes from GitHub. "
329
- "Use for: 'pull changes', 'git pull', 'update repo X', 'fetch latest code'."
330
- ),
331
- "input_schema": {
332
- "type": "object",
333
- "properties": {
334
- "repo": {
335
- "type": "string",
336
- "description": "Repo name to pull (omit to pull all configured repos)",
337
- },
338
- },
339
- },
340
- },
341
- {
342
- "name": "pull_config",
343
- "description": (
344
- "Run git pull on one or all Sentinel project config directories. "
345
- "Projects are matched by short name ('1881', 'elprint') or full dir name ('sentinel-1881'). "
346
- "Use for: 'pull config for 1881', 'update sentinel config', 'pull all configs'."
347
- ),
348
- "input_schema": {
349
- "type": "object",
350
- "properties": {
351
- "project": {
352
- "type": "string",
353
- "description": "Project short name or dir name to pull (omit for all projects)",
354
- },
355
- },
356
- },
357
- },
358
- {
359
- "name": "fetch_logs",
360
- "description": (
361
- "Run fetch_log.sh for one or all configured log sources to pull the latest logs "
362
- "from remote servers right now. Use for: 'fetch logs', 'run fetch_log.sh', "
363
- "'grab latest logs from SSOLWA', 'try fetch_log.sh for STS', "
364
- "'pull logs from server', 'get fresh logs'."
365
- ),
366
- "input_schema": {
367
- "type": "object",
368
- "properties": {
369
- "source": {
370
- "type": "string",
371
- "description": "Log source name to fetch (partial match, e.g. 'SSOLWA'). Omit to fetch all.",
372
- },
373
- "debug": {
374
- "type": "boolean",
375
- "description": "Run fetch_log.sh with --debug flag to show SSH/grep details",
376
- "default": False,
377
- },
378
- "tail": {
379
- "type": "integer",
380
- "description": "Override TAIL lines (how many log lines to fetch)",
381
- },
382
- "grep_filter": {
383
- "type": "string",
384
- "description": "Override GREP_FILTER (regex). Pass 'none' to disable filtering.",
385
- },
386
- },
387
- },
388
- },
389
- {
390
- "name": "watch_bot",
391
- "description": (
392
- "Tell Sentinel to passively monitor a Slack bot — queuing its messages as issues. "
393
- "Extract all <@UXXXXXX> user IDs from the message and pass them here. "
394
- "Sentinel verifies each is actually a bot (not a human) before adding to the watch list. "
395
- "IMPORTANT: a bot watcher is only useful if its issues can be delivered to a project. "
396
- "Try to infer the project from context (bot name, prior messages, available projects). "
397
- "If it cannot be determined, do NOT call this tool — instead ask the user which project "
398
- "the bot's alerts belong to, then call this tool with the project filled in. "
399
- "Use for: 'listen to @alertbot', 'watch @bot1 @bot2', 'monitor @errorbot'."
400
- ),
401
- "input_schema": {
402
- "type": "object",
403
- "properties": {
404
- "user_ids": {
405
- "type": "array",
406
- "items": {"type": "string"},
407
- "description": "Slack user IDs to watch — extract from <@UXXXXXX> patterns in the message",
408
- },
409
- "project": {
410
- "type": "string",
411
- "description": "Project short name this bot's issues should be routed to (e.g. '1881', 'elprint'). Infer from context or ask user before calling.",
412
- },
413
- },
414
- "required": ["user_ids"],
415
- },
416
- },
417
- {
418
- "name": "unwatch_bot",
419
- "description": (
420
- "Stop Sentinel from monitoring a Slack bot. "
421
- "Use for: 'stop watching @alertbot', 'unwatch @bot', 'remove @errorbot from watchers'."
422
- ),
423
- "input_schema": {
424
- "type": "object",
425
- "properties": {
426
- "user_ids": {
427
- "type": "array",
428
- "items": {"type": "string"},
429
- "description": "Slack user IDs to remove from the watch list",
430
- },
431
- },
432
- "required": ["user_ids"],
433
- },
434
- },
435
- {
436
- "name": "list_watched_bots",
437
- "description": (
438
- "List all Slack bots Sentinel is currently monitoring passively. "
439
- "Use for: 'who are you watching?', 'which bots are you monitoring?', 'list watched bots'."
440
- ),
441
- "input_schema": {"type": "object", "properties": {}},
442
- },
443
- {
444
- "name": "upgrade_sentinel",
445
- "description": (
446
- "Upgrade the Sentinel agent itself: git pull the latest code, update Python deps, "
447
- "then restart the process. Safe to call at any time — if already up to date, "
448
- "no restart is triggered. "
449
- "Use for: 'upgrade sentinel', 'update sentinel', 'upgrade yourself', "
450
- "'pull latest sentinel code', 'restart sentinel after upgrade'."
451
- ),
452
- "input_schema": {"type": "object", "properties": {}},
453
- },
454
- {
455
- "name": "ask_codebase",
456
- "description": (
457
- "Ask any natural-language question about a managed codebase. "
458
- "Accepts a repo name (e.g. 'STS', 'elprint-sales') OR a project name (e.g. '1881', 'elprint') "
459
- "— if a project name is given and it has multiple repos, all are queried. "
460
- "Claude Code answers using its full codebase knowledge — no need to specify how. "
461
- "Use for: 'what does 1881 do?', 'TODOs in 1881', 'find PIN validation in STS', "
462
- "'security issues in elprint-sales?', 'summarize the cairn repo'."
463
- ),
464
- "input_schema": {
465
- "type": "object",
466
- "properties": {
467
- "repo": {
468
- "type": "string",
469
- "description": "Repo name (e.g. 'STS', 'elprint-sales') OR project name (e.g. '1881', 'elprint') — project name queries all its repos",
470
- },
471
- "question": {
472
- "type": "string",
473
- "description": "Natural language question about the codebase",
474
- },
475
- },
476
- "required": ["repo", "question"],
477
- },
478
- },
479
- {
480
- "name": "restart_project",
481
- "description": (
482
- "Stop and restart a specific Sentinel project instance (runs stop.sh then start.sh). "
483
- "Use when: 'restart 1881', 'restart elprint', 'reboot the cairn project'. "
484
- "Safer than restarting all projects at once."
485
- ),
486
- "input_schema": {
487
- "type": "object",
488
- "properties": {
489
- "project": {
490
- "type": "string",
491
- "description": "Project short name or dir name (e.g. '1881', 'elprint')",
492
- },
493
- },
494
- "required": ["project"],
495
- },
496
- },
497
- {
498
- "name": "tail_log",
499
- "description": (
500
- "Fetch the last N lines of a log source's live production logs without any grep filter. "
501
- "Use when: 'show me recent SSOLWA logs', 'tail STS', 'what's happening in 1881 logs right now', "
502
- "'show last 100 lines from SSOLWA'. Different from search_logs no pattern required."
503
- ),
504
- "input_schema": {
505
- "type": "object",
506
- "properties": {
507
- "source": {
508
- "type": "string",
509
- "description": "Log source name (partial match against log-config filenames, e.g. 'SSOLWA', 'STS')",
510
- },
511
- "lines": {
512
- "type": "integer",
513
- "description": "Number of recent lines to fetch (default 100)",
514
- "default": 100,
515
- },
516
- },
517
- "required": ["source"],
518
- },
519
- },
520
- ]
521
-
522
-
523
- # ── Workspace helpers ─────────────────────────────────────────────────────────
524
-
525
- def _workspace_dir() -> Path:
526
- return Path(".").resolve().parent
527
-
528
- def _short_name(dir_name: str) -> str:
529
- """'sentinel-1881' → '1881', 'sentinel-elprint' → 'elprint', others unchanged."""
530
- if dir_name.startswith("sentinel-"):
531
- return dir_name[len("sentinel-"):]
532
- return dir_name
533
-
534
- def _read_project_name(project_dir: Path) -> str:
535
- """Return PROJECT_NAME from sentinel.properties if set, else fall back to _short_name(dir)."""
536
- props = project_dir / "config" / "sentinel.properties"
537
- if props.exists():
538
- try:
539
- for line in props.read_text(encoding="utf-8", errors="ignore").splitlines():
540
- line = line.strip()
541
- if line.startswith("PROJECT_NAME"):
542
- _, _, val = line.partition("=")
543
- val = val.partition("#")[0].strip()
544
- if val:
545
- return val
546
- except Exception:
547
- pass
548
- return _short_name(project_dir.name)
549
-
550
- def _find_project_dirs(target: str = "") -> list[Path]:
551
- """Return project dirs matching target (PROJECT_NAME, short name, or full dir name), or all if target empty."""
552
- workspace = _workspace_dir()
553
- results = []
554
- try:
555
- for d in sorted(workspace.iterdir()):
556
- if not d.is_dir() or d.name in ("code", ".git"):
557
- continue
558
- if not (d / "config").exists():
559
- continue
560
- if target:
561
- t = target.lower()
562
- if (t not in d.name.lower()
563
- and t not in _short_name(d.name).lower()
564
- and t not in _read_project_name(d).lower()):
565
- continue
566
- results.append(d)
567
- except Exception:
568
- pass
569
- return results
570
-
571
- def _git_pull(path: Path) -> dict:
572
- try:
573
- r = subprocess.run(
574
- ["git", "pull", "--rebase", "origin"],
575
- cwd=str(path), capture_output=True, text=True, timeout=60,
576
- )
577
- last = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
578
- return {"status": "ok" if r.returncode == 0 else "error",
579
- "detail": last if r.returncode == 0 else r.stderr.strip()}
580
- except Exception as e:
581
- return {"status": "error", "detail": str(e)}
582
-
583
-
584
- # ── Tool execution ────────────────────────────────────────────────────────────
585
-
586
- async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None) -> str:
587
- if name == "get_status":
588
- hours = int(inputs.get("hours", 24))
589
- errors = store.get_recent_errors(hours)
590
- fixes = store.get_recent_fixes(hours)
591
- prs = store.get_open_prs()
592
- top_errors = [
593
- {
594
- "message": e["message"][:120],
595
- "count": e["count"],
596
- "source": e["source"],
597
- "last_seen": e["last_seen"],
598
- }
599
- for e in errors[:8]
600
- ]
601
- return json.dumps({
602
- "window_hours": hours,
603
- "errors_detected": len(errors),
604
- "top_errors": top_errors,
605
- "fixes_applied": sum(1 for f in fixes if f["status"] == "applied"),
606
- "fixes_pending": sum(1 for f in fixes if f["status"] == "pending"),
607
- "fixes_failed": sum(1 for f in fixes if f["status"] == "failed"),
608
- "open_prs": [
609
- {
610
- "repo": p["repo_name"],
611
- "branch": p["branch"],
612
- "pr_url": p["pr_url"],
613
- "age": p.get("timestamp", ""),
614
- }
615
- for p in prs
616
- ],
617
- "sentinel_paused": Path("SENTINEL_PAUSE").exists(),
618
- })
619
-
620
- if name == "create_issue":
621
- description = inputs["description"]
622
- target_repo = inputs.get("target_repo", "")
623
- project_arg = inputs.get("project", "")
624
-
625
- if project_arg:
626
- project_dirs = _find_project_dirs(project_arg)
627
- if not project_dirs:
628
- all_names = [_read_project_name(d) for d in _find_project_dirs()]
629
- return json.dumps({
630
- "error": f"No project found matching '{project_arg}'",
631
- "available_projects": all_names,
632
- "action_needed": "Ask the user which project they meant.",
633
- })
634
- if len(project_dirs) > 1:
635
- matches = [_read_project_name(d) for d in project_dirs]
636
- return json.dumps({
637
- "error": f"Ambiguous project name '{project_arg}' matches: {matches}",
638
- "action_needed": "Ask the user to clarify which project they mean.",
639
- })
640
- project_dir = project_dirs[0]
641
- else:
642
- project_dir = Path(".")
643
-
644
- issues_dir = project_dir / "issues"
645
- issues_dir.mkdir(exist_ok=True)
646
- fname = f"slack-{uuid.uuid4().hex[:8]}.txt"
647
- content = (f"TARGET_REPO: {target_repo}\n\n" if target_repo else "") + description
648
- (issues_dir / fname).write_text(content, encoding="utf-8")
649
-
650
- # Touch SENTINEL_POLL_NOW so the target instance picks it up immediately
651
- (project_dir / "SENTINEL_POLL_NOW").touch()
652
-
653
- project_label = _read_project_name(project_dir.resolve()) if project_arg else "this project"
654
- logger.info("Boss created issue for %s: %s", project_label, fname)
655
- return json.dumps({
656
- "status": "queued",
657
- "project": project_label,
658
- "file": fname,
659
- "note": f"Delivered to '{project_label}'. Sentinel will process it on the next poll cycle.",
660
- })
661
-
662
- if name == "get_fix_details":
663
- fp = inputs["fingerprint"]
664
- fix = store.get_confirmed_fix(fp) or store.get_marker_seen_fix(fp)
665
- if not fix:
666
- # Fallback: search recent fixes by prefix
667
- recent = store.get_recent_fixes(hours=72)
668
- fix = next((f for f in recent if f.get("fingerprint", "").startswith(fp)), None)
669
- return json.dumps(fix or {"error": "not found"})
670
-
671
- if name == "list_pending_prs":
672
- prs = store.get_open_prs()
673
- return json.dumps({
674
- "count": len(prs),
675
- "open_prs": [
676
- {
677
- "repo": p["repo_name"],
678
- "branch": p["branch"],
679
- "pr_url": p["pr_url"],
680
- "timestamp": p.get("timestamp", ""),
681
- }
682
- for p in prs
683
- ],
684
- })
685
-
686
- if name == "pause_sentinel":
687
- Path("SENTINEL_PAUSE").touch()
688
- logger.info("Boss: SENTINEL_PAUSE created")
689
- return json.dumps({"status": "paused"})
690
-
691
- if name == "resume_sentinel":
692
- p = Path("SENTINEL_PAUSE")
693
- if p.exists():
694
- p.unlink()
695
- logger.info("Boss: SENTINEL_PAUSE removed")
696
- return json.dumps({"status": "resumed"})
697
-
698
- if name == "list_projects":
699
- projects = []
700
- for d in _find_project_dirs():
701
- repo_cfg_dir = d / "config" / "repo-configs"
702
- repos_in_project = []
703
- if repo_cfg_dir.exists():
704
- for p in sorted(repo_cfg_dir.glob("*.properties")):
705
- if p.name.startswith("_"):
706
- continue
707
- repo_url = ""
708
- for line in p.read_text(encoding="utf-8", errors="ignore").splitlines():
709
- if line.startswith("REPO_URL"):
710
- repo_url = line.split("=", 1)[-1].strip()
711
- break
712
- repos_in_project.append({"repo": p.stem, "url": repo_url})
713
- projects.append({
714
- "project": _read_project_name(d),
715
- "dir": d.name,
716
- "running": (d / "sentinel.pid").exists(),
717
- "this": d.resolve() == Path(".").resolve(),
718
- "repos": repos_in_project,
719
- })
720
- return json.dumps({"projects": projects})
721
-
722
- if name == "search_logs":
723
- query = inputs.get("query", "")
724
- source = inputs.get("source", "").lower()
725
- max_matches = int(inputs.get("max_matches", 30))
726
-
727
- # ── Live fetch path: SSH to servers and grep in real time ──────────────
728
- script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
729
- log_cfg_dir = Path("config") / "log-configs"
730
- if script.exists() and log_cfg_dir.exists():
731
- props_files = sorted(log_cfg_dir.glob("*.properties"))
732
- if source:
733
- props_files = [p for p in props_files if source in p.stem.lower()]
734
- if props_files:
735
- live_results = []
736
- for props in props_files:
737
- env = os.environ.copy()
738
- env["GREP_FILTER"] = query
739
- try:
740
- r = subprocess.run(
741
- ["bash", str(script), str(props)],
742
- capture_output=True, text=True, timeout=60, env=env,
743
- )
744
- lines = (r.stdout or "").strip().splitlines()
745
- matches = [ln[:300] for ln in lines if ln.strip()][:max_matches]
746
- if matches:
747
- live_results.append({"source": props.stem, "matches": matches})
748
- logger.info("Boss search_logs live %s rc=%d found=%d", props.stem, r.returncode, len(matches))
749
- except subprocess.TimeoutExpired:
750
- live_results.append({"source": props.stem, "error": "timed out"})
751
- except Exception as e:
752
- live_results.append({"source": props.stem, "error": str(e)})
753
- total = sum(len(r.get("matches", [])) for r in live_results)
754
- return json.dumps({
755
- "query": query,
756
- "mode": "live",
757
- "total_matches": total,
758
- "results": live_results,
759
- })
760
-
761
- # ── Fallback: search locally-cached log files ──────────────────────────
762
- fetched_dir = Path("workspace/fetched")
763
- if not fetched_dir.exists():
764
- return json.dumps({"error": "No fetched logs found and fetch_log.sh unavailable"})
765
- try:
766
- pattern = re.compile(query, re.IGNORECASE)
767
- except re.error as e:
768
- return json.dumps({"error": f"Invalid regex: {e}"})
769
- results = []
770
- for log_file in sorted(fetched_dir.glob("*.log")):
771
- if source and source not in log_file.name.lower():
772
- continue
773
- try:
774
- lines = log_file.read_text(encoding="utf-8", errors="ignore").splitlines()
775
- matches = [
776
- {"line": i + 1, "text": line[:300]}
777
- for i, line in enumerate(lines)
778
- if pattern.search(line)
779
- ][:max_matches]
780
- if matches:
781
- results.append({"file": log_file.name, "matches": matches})
782
- except Exception:
783
- pass
784
- total = sum(len(r["matches"]) for r in results)
785
- return json.dumps({
786
- "query": query,
787
- "mode": "cached",
788
- "total_matches": total,
789
- "files_searched": len(list(fetched_dir.glob("*.log"))),
790
- "results": results,
791
- })
792
-
793
- if name == "trigger_poll":
794
- Path("SENTINEL_POLL_NOW").touch()
795
- logger.info("Boss: immediate poll requested")
796
- return json.dumps({"status": "triggered", "note": "Sentinel will run a poll cycle within seconds"})
797
-
798
- if name == "get_repo_status":
799
- hours = int(inputs.get("hours", 24))
800
- fixes = store.get_recent_fixes(hours)
801
- errors = store.get_recent_errors(hours)
802
- by_repo: dict = {}
803
- for fix in fixes:
804
- repo = fix.get("repo_name", "unknown")
805
- s = by_repo.setdefault(repo, {"applied": 0, "pending": 0, "failed": 0, "skipped": 0})
806
- key = fix.get("status", "failed")
807
- s[key] = s.get(key, 0) + 1
808
- return json.dumps({"window_hours": hours, "total_errors": len(errors), "by_repo": by_repo})
809
-
810
- if name == "list_recent_commits":
811
- limit = int(inputs.get("limit", 5))
812
- results = []
813
- for repo_name, repo in cfg_loader.repos.items():
814
- local = Path(repo.local_path)
815
- if not local.exists():
816
- continue
817
- try:
818
- r = subprocess.run(
819
- ["git", "log", "--oneline", "--grep=sentinel", "-n", str(limit)],
820
- cwd=str(local), capture_output=True, text=True, timeout=10,
821
- )
822
- commits = r.stdout.strip().splitlines()
823
- if commits:
824
- results.append({"repo": repo_name, "commits": commits})
825
- except Exception:
826
- pass
827
- return json.dumps({"sentinel_commits": results})
828
-
829
- if name == "pull_repo":
830
- target = inputs.get("repo", "").lower()
831
- results = []
832
- for repo_name, repo in cfg_loader.repos.items():
833
- if target and target not in repo_name.lower():
834
- continue
835
- local = Path(repo.local_path)
836
- if not local.exists():
837
- results.append({"repo": repo_name, "status": "error", "detail": "local path not found"})
838
- continue
839
- try:
840
- r = subprocess.run(
841
- ["git", "pull", "--rebase", "origin", repo.branch],
842
- cwd=str(local), capture_output=True, text=True, timeout=60,
843
- )
844
- last_line = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
845
- if r.returncode == 0:
846
- results.append({"repo": repo_name, "status": "ok", "detail": last_line})
847
- else:
848
- results.append({"repo": repo_name, "status": "error", "detail": r.stderr.strip()})
849
- except Exception as e:
850
- results.append({"repo": repo_name, "status": "error", "detail": str(e)})
851
- return json.dumps({"results": results})
852
-
853
- if name == "pull_config":
854
- target = inputs.get("project", "")
855
- dirs = _find_project_dirs(target)
856
- if not dirs:
857
- return json.dumps({"error": f"No project found matching '{target}'"})
858
- results = []
859
- for d in dirs:
860
- res = _git_pull(d)
861
- results.append({"project": _read_project_name(d), "dir": d.name, **res})
862
- logger.info("Boss: pull_config %s → %s", d.name, res["status"])
863
- return json.dumps({"results": results})
864
-
865
- if name == "fetch_logs":
866
- source_filter = inputs.get("source", "").lower()
867
- debug = bool(inputs.get("debug", False))
868
- tail_override = inputs.get("tail")
869
- grep_override = inputs.get("grep_filter", "")
870
-
871
- # Find fetch_log.sh relative to this file
872
- script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
873
- if not script.exists():
874
- return json.dumps({"error": f"fetch_log.sh not found at {script}"})
875
-
876
- log_cfg_dir = Path("config") / "log-configs"
877
- if not log_cfg_dir.exists():
878
- return json.dumps({"error": "config/log-configs/ not found"})
879
-
880
- props_files = sorted(log_cfg_dir.glob("*.properties"))
881
- if source_filter:
882
- props_files = [p for p in props_files if source_filter in p.stem.lower()]
883
- if not props_files:
884
- return json.dumps({"error": f"No log-config found matching '{source_filter}'"})
885
-
886
- results = []
887
- for props in props_files:
888
- env = os.environ.copy()
889
- if tail_override:
890
- env["TAIL"] = str(tail_override)
891
- if grep_override:
892
- env["GREP_FILTER"] = grep_override
893
-
894
- cmd = ["bash", str(script)]
895
- if debug:
896
- cmd.append("--debug")
897
- cmd.append(str(props))
898
-
899
- try:
900
- r = subprocess.run(
901
- cmd, capture_output=True, text=True, timeout=120, env=env,
902
- )
903
- output = (r.stdout or "").strip()
904
- stderr = (r.stderr or "").strip()
905
- results.append({
906
- "source": props.stem,
907
- "returncode": r.returncode,
908
- "output": output[-2000:] if output else "",
909
- "stderr": stderr[-1000:] if stderr else "",
910
- })
911
- logger.info("Boss fetch_logs %s rc=%d", props.stem, r.returncode)
912
- except subprocess.TimeoutExpired:
913
- results.append({"source": props.stem, "error": "timed out after 120s"})
914
- except Exception as e:
915
- results.append({"source": props.stem, "error": str(e)})
916
-
917
- return json.dumps({"fetched": len(results), "results": results})
918
-
919
- if name == "watch_bot":
920
- user_ids = inputs.get("user_ids", [])
921
- project_arg = inputs.get("project", "").strip()
922
- if not user_ids:
923
- return json.dumps({"error": "No user_ids provided"})
924
-
925
- # Resolve + validate project required for bot issue routing
926
- resolved_project = ""
927
- if project_arg:
928
- project_dirs = _find_project_dirs(project_arg)
929
- if not project_dirs:
930
- all_names = [_read_project_name(d) for d in _find_project_dirs()]
931
- return json.dumps({
932
- "error": f"No project found matching '{project_arg}'",
933
- "available_projects": all_names,
934
- "action_needed": "Ask the user which project these bot alerts belong to.",
935
- })
936
- if len(project_dirs) > 1:
937
- matches = [_read_project_name(d) for d in project_dirs]
938
- return json.dumps({
939
- "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
940
- "action_needed": "Ask the user to clarify which project.",
941
- })
942
- resolved_project = _read_project_name(project_dirs[0])
943
- else:
944
- all_projects = _find_project_dirs()
945
- if len(all_projects) == 1:
946
- # Single project in workspace — auto-assign
947
- resolved_project = _read_project_name(all_projects[0])
948
- elif all_projects:
949
- all_names = [_read_project_name(d) for d in all_projects]
950
- return json.dumps({
951
- "error": "Cannot determine which project these bot alerts belong to.",
952
- "available_projects": all_names,
953
- "action_needed": "Ask the user to specify the project, then retry with project filled in.",
954
- })
955
-
956
- results = []
957
- for uid in user_ids:
958
- if not slack_client:
959
- results.append({"user_id": uid, "status": "error", "reason": "no Slack client available"})
960
- continue
961
- try:
962
- info = await slack_client.users_info(user=uid)
963
- user = info.get("user", {})
964
- if not user.get("is_bot", False):
965
- results.append({"user_id": uid, "status": "skipped", "reason": "not a bot — only bots can be watched passively"})
966
- continue
967
- bot_name = user.get("real_name") or user.get("name") or uid
968
- store.add_watched_bot(uid, bot_name, added_by="boss", project_name=resolved_project)
969
- logger.info("Boss: now watching bot %s (%s) → project '%s'", bot_name, uid, resolved_project or "unset")
970
- results.append({"user_id": uid, "bot_name": bot_name, "project": resolved_project, "status": "watching"})
971
- except Exception as e:
972
- results.append({"user_id": uid, "status": "error", "reason": str(e)})
973
- return json.dumps({"results": results})
974
-
975
- if name == "unwatch_bot":
976
- user_ids = inputs.get("user_ids", [])
977
- if not user_ids:
978
- return json.dumps({"error": "No user_ids provided"})
979
- results = []
980
- for uid in user_ids:
981
- removed = store.remove_watched_bot(uid)
982
- logger.info("Boss: unwatch bot %s → %s", uid, "removed" if removed else "not found")
983
- results.append({"user_id": uid, "status": "removed" if removed else "not found"})
984
- return json.dumps({"results": results})
985
-
986
- if name == "list_watched_bots":
987
- bots = store.get_watched_bots()
988
- return json.dumps({
989
- "count": len(bots),
990
- "bots": [
991
- {
992
- "bot_id": b["bot_id"],
993
- "bot_name": b["bot_name"],
994
- "project": b.get("project_name") or "",
995
- "added_by": b["added_by"],
996
- "added_at": b["added_at"],
997
- }
998
- for b in bots
999
- ],
1000
- })
1001
-
1002
- if name == "upgrade_sentinel":
1003
- import threading
1004
-
1005
- # Sentinel is installed via npm — use `sentinel upgrade` which handles
1006
- # npm install + Python bundle copy + restart via stopAll/startAll.
1007
- # Run it in the background after a short delay so the Slack reply is
1008
- # sent before the process is replaced.
1009
- try:
1010
- r = subprocess.run(
1011
- ["sentinel", "--version"],
1012
- capture_output=True, text=True, timeout=10,
1013
- )
1014
- sentinel_bin_ok = r.returncode == 0
1015
- except Exception:
1016
- sentinel_bin_ok = False
1017
-
1018
- if not sentinel_bin_ok:
1019
- return json.dumps({
1020
- "status": "error",
1021
- "note": "`sentinel` CLI not found. Run: npm install -g @misterhuydo/sentinel",
1022
- })
1023
-
1024
- def _do_upgrade():
1025
- import time
1026
- time.sleep(10) # give Slack time to post the reply
1027
- subprocess.Popen(["sentinel", "upgrade"], close_fds=True)
1028
-
1029
- threading.Thread(target=_do_upgrade, daemon=True).start()
1030
- logger.info("Boss: upgrade_sentinel scheduled via `sentinel upgrade`")
1031
- return json.dumps({
1032
- "status": "ok",
1033
- "note": "Upgrade started — pulling latest version via npm and restarting. Give me ~30 seconds then I'll be back.",
1034
- })
1035
-
1036
- if name == "ask_codebase":
1037
- target = inputs.get("repo", "").lower()
1038
- question = inputs.get("question", "")
1039
-
1040
- # 1. Find repos whose name contains the target (e.g. "STS", "elprint-sales")
1041
- matched = [(rn, r) for rn, r in cfg_loader.repos.items() if target in rn.lower()]
1042
-
1043
- # 2. No repo match — check if target is a project name → use ALL repos in cfg_loader
1044
- # (each Sentinel instance is scoped to one project, so all repos belong to it)
1045
- if not matched:
1046
- current_project = _read_project_name(Path("."))
1047
- if target in current_project.lower() or current_project.lower() in target:
1048
- matched = list(cfg_loader.repos.items())
1049
-
1050
- if not matched:
1051
- return json.dumps({
1052
- "error": f"No repo or project found matching '{target}'",
1053
- "available_repos": list(cfg_loader.repos.keys()),
1054
- })
1055
-
1056
- cfg = cfg_loader.sentinel
1057
- env = os.environ.copy()
1058
- if cfg.anthropic_api_key:
1059
- env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
1060
-
1061
- def _ask_one(repo_name, repo_cfg) -> dict:
1062
- local_path = Path(repo_cfg.local_path)
1063
- if not local_path.exists():
1064
- return {"repo": repo_name, "error": f"not cloned yet at {local_path}"}
1065
- prompt = (
1066
- f"You are a code analyst. Answer the following question about the codebase at: {local_path}\n\n"
1067
- f"Question: {question}\n\n"
1068
- f"Use whatever tools you need to answer accurately. Be concise and direct. Plain text only."
1069
- )
1070
- try:
1071
- r = subprocess.run(
1072
- [cfg.claude_code_bin, "--permission-mode", "bypassPermissions", "--print", prompt],
1073
- capture_output=True, text=True, timeout=180, env=env,
1074
- cwd=str(local_path),
1075
- )
1076
- output = (r.stdout or "").strip()
1077
- logger.info("Boss ask_codebase %s rc=%d len=%d", repo_name, r.returncode, len(output))
1078
- if r.returncode != 0 and not output:
1079
- return {"repo": repo_name, "error": f"claude --print failed (rc={r.returncode}): {(r.stderr or '')[:200]}"}
1080
- return {"repo": repo_name, "answer": output[:3000]}
1081
- except subprocess.TimeoutExpired:
1082
- return {"repo": repo_name, "error": "timed out after 180s"}
1083
- except Exception as e:
1084
- return {"repo": repo_name, "error": str(e)}
1085
-
1086
- if len(matched) == 1:
1087
- result = _ask_one(*matched[0])
1088
- # Unwrap single-repo result for cleaner response
1089
- return json.dumps(result)
1090
-
1091
- # Multiple repos — query each and combine
1092
- results = [_ask_one(rn, r) for rn, r in matched]
1093
- return json.dumps({"project": target, "repos_queried": len(results), "results": results})
1094
-
1095
- if name == "restart_project":
1096
- project_arg = inputs.get("project", "").lower()
1097
- dirs = _find_project_dirs(project_arg)
1098
- if not dirs:
1099
- return json.dumps({"error": f"No project found matching '{project_arg}'"})
1100
- results = []
1101
- for d in dirs:
1102
- stop_sh = d / "stop.sh"
1103
- start_sh = d / "start.sh"
1104
- if not stop_sh.exists() or not start_sh.exists():
1105
- results.append({"project": d.name, "status": "error", "detail": "stop.sh or start.sh not found"})
1106
- continue
1107
- try:
1108
- subprocess.run(["bash", str(stop_sh)], cwd=str(d), timeout=30)
1109
- subprocess.run(["bash", str(start_sh)], cwd=str(d), timeout=30)
1110
- results.append({"project": d.name, "status": "restarted"})
1111
- logger.info("Boss: restarted project %s", d.name)
1112
- except Exception as e:
1113
- results.append({"project": d.name, "status": "error", "detail": str(e)})
1114
- return json.dumps({"results": results})
1115
-
1116
- if name == "tail_log":
1117
- source = inputs.get("source", "").lower()
1118
- lines = int(inputs.get("lines", 100))
1119
- script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
1120
- log_cfg_dir = Path("config") / "log-configs"
1121
-
1122
- if not script.exists():
1123
- return json.dumps({"error": "fetch_log.sh not found"})
1124
- if not log_cfg_dir.exists():
1125
- return json.dumps({"error": "config/log-configs/ not found"})
1126
-
1127
- props_files = sorted(log_cfg_dir.glob("*.properties"))
1128
- if source:
1129
- props_files = [p for p in props_files if source in p.stem.lower()]
1130
- if not props_files:
1131
- return json.dumps({"error": f"No log-config found matching '{source}'"})
1132
-
1133
- results = []
1134
- for props in props_files:
1135
- env = os.environ.copy()
1136
- env["TAIL"] = str(lines)
1137
- env["GREP_FILTER"] = "" # no filter show everything
1138
- try:
1139
- r = subprocess.run(
1140
- ["bash", str(script), str(props)],
1141
- capture_output=True, text=True, timeout=60, env=env,
1142
- )
1143
- tail_lines = (r.stdout or "").strip().splitlines()[-lines:]
1144
- results.append({
1145
- "source": props.stem,
1146
- "lines": len(tail_lines),
1147
- "content": "\n".join(tail_lines),
1148
- })
1149
- logger.info("Boss tail_log %s rc=%d lines=%d", props.stem, r.returncode, len(tail_lines))
1150
- except subprocess.TimeoutExpired:
1151
- results.append({"source": props.stem, "error": "timed out"})
1152
- except Exception as e:
1153
- results.append({"source": props.stem, "error": str(e)})
1154
- return json.dumps({"results": results})
1155
-
1156
- return json.dumps({"error": f"unknown tool: {name}"})
1157
-
1158
-
1159
- # ── CLI fallback (OAuth / no API key) ────────────────────────────────────────
1160
-
1161
- _ACTION_RE = re.compile(r"^ACTION:\s*(\{.*\})", re.MULTILINE)
1162
-
1163
-
1164
- async def _handle_with_cli(
1165
- message: str,
1166
- history: list,
1167
- cfg_loader,
1168
- store,
1169
- slack_client=None,
1170
- user_name: str = "",
1171
- ) -> tuple[str, bool]:
1172
- """Fallback: use `claude --print` for users without an Anthropic API key."""
1173
- status_json = await _run_tool("get_status", {"hours": 24}, cfg_loader, store)
1174
- prs_json = await _run_tool("list_pending_prs", {}, cfg_loader, store)
1175
-
1176
- # Pre-fetch log search if the message is a search request.
1177
- # Use quoted strings as the query, or fall back to the full message.
1178
- # Never hardcode field names — the query is whatever the user said.
1179
- search_json = ""
1180
- _search_kws = ("search", "find", "look for", "show me log", "grep", "entries for")
1181
- if any(kw in message.lower() for kw in _search_kws):
1182
- quoted = re.findall(r'"([^"]+)"', message)
1183
- query = quoted[0] if quoted else message
1184
- search_json = await _run_tool("search_logs", {"query": query}, cfg_loader, store)
1185
-
1186
- paused = Path("SENTINEL_PAUSE").exists()
1187
- repos = list(cfg_loader.repos.keys())
1188
- log_sources = list(cfg_loader.log_sources.keys())
1189
- ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
1190
-
1191
- history_text = ""
1192
- for msg in history[-8:]:
1193
- role = msg["role"].upper()
1194
- content = msg["content"]
1195
- if isinstance(content, list):
1196
- content = " ".join(
1197
- (b.get("text", "") if isinstance(b, dict) else getattr(b, "text", ""))
1198
- for b in content
1199
- if (isinstance(b, dict) and b.get("type") == "text")
1200
- or (hasattr(b, "type") and b.type == "text")
1201
- )
1202
- history_text += f"\n{role}: {content}"
1203
-
1204
- prompt = (
1205
- _SYSTEM
1206
- + (f"\nYou are speaking with: {user_name}" if user_name else "")
1207
- + f"\n\nCurrent time: {ts}"
1208
- + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
1209
- + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
1210
- + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
1211
- + f"\n\nCurrent status (last 24 h):\n{status_json}"
1212
- + f"\n\nOpen PRs:\n{prs_json}"
1213
- + (f"\n\nLog search results:\n{search_json}" if search_json else "")
1214
- + (f"\n\nConversation so far:{history_text}" if history_text else "")
1215
- + f"\n\nUSER: {message}"
1216
- + "\n\nIf you need to take an action, include a line like:\n"
1217
- + " ACTION: {\"action\": \"pause_sentinel\"}\n"
1218
- + " ACTION: {\"action\": \"resume_sentinel\"}\n"
1219
- + " ACTION: {\"action\": \"trigger_poll\"}\n"
1220
- + " ACTION: {\"action\": \"create_issue\", \"description\": \"...\", \"target_repo\": \"\"}\n"
1221
- + " ACTION: {\"action\": \"search_logs\", \"query\": \"<whatever the user asked to find>\"}\n"
1222
- + "End with [DONE] if the request is fully handled."
1223
- )
1224
-
1225
- cfg = cfg_loader.sentinel
1226
- env = os.environ.copy()
1227
- if cfg.anthropic_api_key:
1228
- env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
1229
-
1230
- try:
1231
- result = subprocess.run(
1232
- [cfg.claude_code_bin, "--permission-mode", "bypassPermissions", "--print", prompt],
1233
- capture_output=True, text=True, timeout=180, env=env,
1234
- )
1235
- output = (result.stdout or "").strip()
1236
- if result.returncode != 0 or not output:
1237
- stderr = (result.stderr or "").strip()
1238
- logger.error(
1239
- "Boss CLI call failed (rc=%d): stdout=%r stderr=%r",
1240
- result.returncode, output[:200], stderr[:200],
1241
- )
1242
- if result.returncode != 0 and not output:
1243
- return f":warning: `claude --print` failed (exit {result.returncode}): {(result.stderr or '').strip()[:300]}", True
1244
- except Exception as e:
1245
- logger.error("Boss CLI call failed: %s", e)
1246
- return f":warning: Boss unavailable: {e}", True
1247
-
1248
- for m in _ACTION_RE.finditer(output):
1249
- try:
1250
- action = json.loads(m.group(1))
1251
- name = action.pop("action", "")
1252
- if name:
1253
- result_str = await _run_tool(name, action, cfg_loader, store)
1254
- logger.info("Boss CLI action: %s → %s", name, result_str[:80])
1255
- except Exception as e:
1256
- logger.warning("Boss action parse error: %s", e)
1257
-
1258
- reply = _ACTION_RE.sub("", output).strip()
1259
- is_done = "[DONE]" in reply
1260
- reply = reply.replace("[DONE]", "").strip()
1261
- if not reply:
1262
- greeting = f"Hi {user_name}! " if user_name else "Hi! "
1263
- reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
1264
-
1265
- history.append({"role": "user", "content": message})
1266
- history.append({"role": "assistant", "content": reply})
1267
- return reply, is_done
1268
-
1269
-
1270
- # ── Main entry point ──────────────────────────────────────────────────────────
1271
-
1272
- async def handle_message(
1273
- message: str,
1274
- history: list,
1275
- cfg_loader,
1276
- store,
1277
- slack_client=None,
1278
- user_name: str = "",
1279
- ) -> tuple[str, bool]:
1280
- """
1281
- Process one user message through the Sentinel Boss (Claude with tool use).
1282
-
1283
- Args:
1284
- message: The user's Slack message text.
1285
- history: Conversation history list — mutated in place (role/content dicts).
1286
- cfg_loader: ConfigLoader for repo/sentinel config.
1287
- store: StateStore for DB queries.
1288
-
1289
- Returns:
1290
- (reply_text, is_done)
1291
- is_done=True → session complete, release the Slack queue slot.
1292
- is_done=False waiting for user follow-up, keep the slot.
1293
- """
1294
- try:
1295
- import anthropic
1296
- except ImportError:
1297
- return (
1298
- ":warning: `anthropic` package not installed. Run: `pip install anthropic`",
1299
- True,
1300
- )
1301
-
1302
- api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
1303
- if not api_key:
1304
- return await _handle_with_cli(message, history, cfg_loader, store, slack_client=slack_client, user_name=user_name)
1305
-
1306
- client = anthropic.Anthropic(api_key=api_key)
1307
-
1308
- # Build system context snapshot
1309
- paused = Path("SENTINEL_PAUSE").exists()
1310
- repos = list(cfg_loader.repos.keys())
1311
- ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
1312
- known_projects = [_read_project_name(d) for d in _find_project_dirs()]
1313
- log_sources = list(cfg_loader.log_sources.keys())
1314
- system = (
1315
- _SYSTEM
1316
- + (f"\nYou are speaking with: {user_name}" if user_name else "")
1317
- + f"\n\nCurrent time: {ts}"
1318
- + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
1319
- + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
1320
- + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
1321
- + (f"\nKnown projects in workspace: {', '.join(known_projects)}" if known_projects else "")
1322
- )
1323
-
1324
- history.append({"role": "user", "content": message})
1325
- messages = list(history)
1326
-
1327
- # Agentic loop — Claude may call multiple tools before giving a final reply
1328
- while True:
1329
- response = client.messages.create(
1330
- model="claude-opus-4-6",
1331
- max_tokens=1024,
1332
- system=system,
1333
- tools=_TOOLS,
1334
- messages=messages,
1335
- )
1336
-
1337
- text_parts = []
1338
- tool_blocks = []
1339
- for block in response.content:
1340
- if block.type == "text":
1341
- text_parts.append(block.text)
1342
- elif block.type == "tool_use":
1343
- tool_blocks.append(block)
1344
-
1345
- if not tool_blocks:
1346
- # Final response no more tool calls
1347
- reply = " ".join(text_parts).strip()
1348
- is_done = "[DONE]" in reply
1349
- reply = reply.replace("[DONE]", "").strip()
1350
- if not reply:
1351
- greeting = f"Hi {user_name}! " if user_name else "Hi! "
1352
- reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
1353
- history.append({"role": "assistant", "content": response.content})
1354
- return reply, is_done
1355
-
1356
- # Execute tools and continue
1357
- messages.append({"role": "assistant", "content": response.content})
1358
- tool_results = []
1359
- for tc in tool_blocks:
1360
- result = await _run_tool(tc.name, tc.input, cfg_loader, store, slack_client=slack_client)
1361
- logger.info("Boss tool: %s(%s) → %s", tc.name, tc.input, result[:120])
1362
- tool_results.append({
1363
- "type": "tool_result",
1364
- "tool_use_id": tc.id,
1365
- "content": result,
1366
- })
1367
- messages.append({"role": "user", "content": tool_results})
1
+ """
2
+ sentinel_boss.py — Claude-backed Sentinel Boss.
3
+
4
+ Claude acts as the boss: reads project state, decides on actions,
5
+ executes them via tool use, and responds naturally. One agentic loop
6
+ per turn — Claude may call multiple tools before replying.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ import os
12
+ import re
13
+ import subprocess
14
+ import uuid
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # ── System prompt ────────────────────────────────────────────────────────────
22
+
23
+ _SYSTEM = """\
24
+ You are Sentinel Boss — the AI interface for Sentinel, a 24/7 autonomous DevOps agent.
25
+
26
+ Sentinel watches production logs, detects errors, generates code fixes via Claude Code,
27
+ and opens GitHub PRs for admin review (or pushes directly if AUTO_PUBLISH=true).
28
+
29
+ Your job:
30
+ - Understand what the DevOps engineer needs in natural language
31
+ - Query Sentinel's live state (errors, fixes, open PRs) on their behalf
32
+ - Deliver tasks/issues to the right project — you know all projects in this workspace
33
+ - Control Sentinel (pause/resume) when asked
34
+ - Give honest, concise answers — you know this system inside out
35
+ - If a project name is unclear or ambiguous, ask the engineer to clarify — never guess
36
+
37
+ What you can do (tools available):
38
+
39
+ 1. get_status — Show recent errors detected, fixes applied/pending, open PRs.
40
+ e.g. "what happened today?", "any issues?", "show open PRs"
41
+
42
+ 2. create_issue — Deliver a fix/task to any project in this workspace by short name.
43
+ You know all project names — use list_projects if you're unsure.
44
+ If the project name is ambiguous or not found, ask to clarify.
45
+ e.g. "tell 1881 to fix X", "look into Y in elprint", "investigate Z"
46
+
47
+ 3. pause_sentinel — Create SENTINEL_PAUSE file to halt all auto-fix activity.
48
+ e.g. "pause sentinel", "stop auto-fixing"
49
+
50
+ 4. resume_sentinel — Remove SENTINEL_PAUSE file to resume normal operation.
51
+ e.g. "resume sentinel", "unpause"
52
+
53
+ 5. list_projects — List all configured repos and log sources in this Sentinel instance.
54
+ e.g. "what projects are you watching?", "list all repos"
55
+
56
+ 6. search_logs — SSH live to servers and grep logs in real time (uses fetch_log.sh with
57
+ the query as GREP_FILTER). Falls back to cached files if unavailable.
58
+ e.g. "search logs for illegal PIN in 1881", "find X in SSOLWA", "grep logs for Z"
59
+
60
+ 7. trigger_poll — Trigger an immediate poll cycle without waiting for the schedule.
61
+ e.g. "check now", "poll immediately", "don't wait, run now"
62
+
63
+ 8. get_repo_status — Show the current git branch, last commit, and recent fix branches
64
+ for a specific repository.
65
+ e.g. "status of repo X", "what branch is cairn on?"
66
+
67
+ 9. list_recent_commits — List the most recent commits in a repo (including Sentinel's auto-fixes).
68
+ e.g. "show me recent commits in elprint-sales", "what did sentinel commit?"
69
+
70
+ 10. get_fix_detail — Get full details of a specific fix: error, patch path, PR URL, status.
71
+ e.g. "show fix abc123", "details on that fix"
72
+
73
+ 11. list_errors — List recent errors from the state store, optionally filtered by repo or source.
74
+ e.g. "show all errors today", "what errors hit elprint this week?"
75
+
76
+ 12. pull_repo — Run git pull on one or all managed application repos.
77
+ e.g. "pull changes", "git pull all repos", "update the code"
78
+
79
+ 13. pull_config — Run git pull on one or all Sentinel project config dirs.
80
+ e.g. "pull config for 1881", "update sentinel config", "pull all configs"
81
+
82
+ 14. fetch_logs — Run fetch_log.sh on demand to pull fresh logs from remote servers right now.
83
+ Supports --debug mode and parameter overrides (tail count, grep filter).
84
+ e.g. "fetch logs", "try fetch_log.sh for SSOLWA", "fetch logs with debug",
85
+ "grab latest logs from STS", "fetch logs without filter"
86
+
87
+ 15. watch_bot — Register a Slack bot for passive monitoring. Every message it posts is
88
+ auto-queued as an issue in the bot's registered project.
89
+ ALWAYS requires a project — infer from context or ask the user first.
90
+ e.g. "listen to @alertbot", "watch @bot1 @bot2 for project 1881", "monitor @errorbot"
91
+
92
+ 16. unwatch_bot — Remove a Slack bot from the passive watch list.
93
+ e.g. "stop watching @alertbot", "unwatch @errorbot"
94
+
95
+ 17. list_watched_bots — Show all Slack bots currently being passively monitored and which projects
96
+ they are delivering to.
97
+ e.g. "which bots are you watching?", "list monitored bots"
98
+
99
+ 18. upgrade_sentinel — Pull the latest Sentinel agent code, update Python deps, and restart the
100
+ process. Safe to run at any time — no restart if already up to date.
101
+ e.g. "upgrade sentinel", "update sentinel", "upgrade yourself"
102
+
103
+ 19. ask_codebase — Ask any natural-language question about a managed repo's codebase.
104
+ Claude Code answers using its full knowledge of the code.
105
+ e.g. "what does the 1881 backend do?", "find PIN validation in elprint",
106
+ "any TODOs in cairn?", "are there security issues in elprint-sales?"
107
+
108
+ 20. restart_project — Stop and restart a specific project instance (stop.sh + start.sh).
109
+ e.g. "restart 1881", "reboot elprint", "restart the cairn project"
110
+
111
+ 21. tail_log — Fetch the last N lines of a log source live, without a grep filter.
112
+ e.g. "show recent SSOLWA logs", "tail STS", "last 200 lines from 1881 logs"
113
+
114
+ When someone asks what you can do, what you support, what your capabilities are, or how you can help,
115
+ reply with a short summary grouped by category:
116
+
117
+ *Monitoring & status*
118
+ • `get_status` — errors detected, fixes applied/pending/failed, open PRs — "what happened today?"
119
+ • `get_repo_status` — per-repo breakdown of errors and fixes — "how is elprint doing?"
120
+ • `list_recent_commits` — recent Sentinel auto-fix commits — "what did Sentinel commit?"
121
+
122
+ *Log management*
123
+ • `fetch_logs` — pull fresh logs from servers right now — "fetch logs for SSOLWA"
124
+ • `search_logs` — live SSH grep on production servers — "search logs for illegal PIN in 1881"
125
+ • `tail_log` — last N lines of a log source, no filter — "show recent SSOLWA logs"
126
+
127
+ *Codebase questions*
128
+ • `ask_codebase` — any question about a repo's code — "what does 1881 do?", "find PIN validation", "any TODOs?", "security issues?"
129
+
130
+ *Fix management*
131
+ • `get_fix_details` — full details of a specific fix — "show fix abc123"
132
+ • `list_pending_prs` — all open Sentinel PRs awaiting review — "list open PRs"
133
+
134
+ *Project & task delivery*
135
+ • `list_projects` — all projects and repos Sentinel manages — "what projects do you manage?"
136
+ • `create_issue` — deliver a task to any project by name — "tell 1881 to fix X"
137
+ • `trigger_poll` — run a log-fetch + fix cycle right now — "check now"
138
+ • `pause_sentinel` / `resume_sentinel` — halt or resume all auto-fix activity — "pause Sentinel"
139
+
140
+ *Repo & config sync*
141
+ • `pull_repo` — git pull on managed application repos — "pull latest code"
142
+ • `pull_config` — git pull on Sentinel config dirs — "pull config for elprint"
143
+
144
+ *Slack bot watching*
145
+ • `watch_bot` — register a Slack bot for passive monitoring; its messages are auto-queued as issues — "listen to @alertbot"
146
+ • `unwatch_bot` — stop monitoring a bot — "stop watching @errorbot"
147
+ • `list_watched_bots` — show all bots currently being monitored — "which bots are you watching?"
148
+
149
+ *Project control*
150
+ • `restart_project` — stop + restart a specific project — "restart 1881"
151
+
152
+ *Self-management*
153
+ • `upgrade_sentinel` — git pull + pip install + restart — "upgrade sentinel", "update yourself"
154
+
155
+ Tone: direct, professional, like a senior engineer who owns the system.
156
+ Don't pad responses. Don't say "Great question!" or "Certainly!".
157
+ If you don't know something, use a tool to find out before saying you don't know.
158
+
159
+ When the engineer's request is fully handled, end your LAST message with the token: [DONE]
160
+ IMPORTANT: Always write your actual reply text FIRST, then append [DONE] at the end. Example: "Hello! I'm Sentinel. [DONE]". Never output [DONE] as your only content.
161
+ For greetings like "hello" or empty messages, introduce yourself briefly and offer help, then end with [DONE].
162
+ If you need a follow-up from them, do NOT include [DONE] — wait for their next message.
163
+ """
164
+
165
+ # ── Tool definitions ─────────────────────────────────────────────────────────
166
+
167
+ _TOOLS = [
168
+ {
169
+ "name": "get_status",
170
+ "description": (
171
+ "Get recent errors, fixes applied, fixes pending review, and open PRs. "
172
+ "Use for: 'what happened today?', 'any issues?', 'how are things?', "
173
+ "'what are the open PRs?', 'did sentinel fix anything?'"
174
+ ),
175
+ "input_schema": {
176
+ "type": "object",
177
+ "properties": {
178
+ "hours": {
179
+ "type": "integer",
180
+ "description": "Look-back window in hours (default 24)",
181
+ "default": 24,
182
+ },
183
+ },
184
+ },
185
+ },
186
+ {
187
+ "name": "create_issue",
188
+ "description": (
189
+ "Deliver a fix/task request to a Sentinel project instance. "
190
+ "Use when the engineer says 'tell 1881 to do X', 'look into Y in project elprint', "
191
+ "'implement this in 1881: ...'. Can target any project by short name. "
192
+ "Defaults to the current project if no project is specified."
193
+ ),
194
+ "input_schema": {
195
+ "type": "object",
196
+ "properties": {
197
+ "description": {
198
+ "type": "string",
199
+ "description": "Full task/problem description — everything the engineer told you",
200
+ },
201
+ "project": {
202
+ "type": "string",
203
+ "description": "Project short name to deliver to (e.g. '1881', 'elprint'). Omit for current project.",
204
+ },
205
+ "target_repo": {
206
+ "type": "string",
207
+ "description": "Specific repo within the project (omit to let Sentinel auto-route)",
208
+ },
209
+ },
210
+ "required": ["description"],
211
+ },
212
+ },
213
+ {
214
+ "name": "get_fix_details",
215
+ "description": "Get full details of a specific fix by fingerprint (8+ hex chars).",
216
+ "input_schema": {
217
+ "type": "object",
218
+ "properties": {
219
+ "fingerprint": {"type": "string"},
220
+ },
221
+ "required": ["fingerprint"],
222
+ },
223
+ },
224
+ {
225
+ "name": "list_pending_prs",
226
+ "description": "List all open Sentinel PRs awaiting admin review.",
227
+ "input_schema": {"type": "object", "properties": {}},
228
+ },
229
+ {
230
+ "name": "pause_sentinel",
231
+ "description": (
232
+ "Pause ALL Sentinel fix activity immediately. "
233
+ "Use when the engineer says 'pause', 'stop', 'freeze', or 'hold off'."
234
+ ),
235
+ "input_schema": {"type": "object", "properties": {}},
236
+ },
237
+ {
238
+ "name": "resume_sentinel",
239
+ "description": "Resume Sentinel fix activity after a pause.",
240
+ "input_schema": {"type": "object", "properties": {}},
241
+ },
242
+ {
243
+ "name": "list_projects",
244
+ "description": (
245
+ "List all projects (Sentinel instances) in this workspace and the repos "
246
+ "each one manages. Use for: 'what projects do you manage?', 'list projects', "
247
+ "'what repos are configured?', 'show me all projects'."
248
+ ),
249
+ "input_schema": {"type": "object", "properties": {}},
250
+ },
251
+ {
252
+ "name": "search_logs",
253
+ "description": (
254
+ "Search production logs for a keyword or pattern. "
255
+ "When a project or source is specified (or can be inferred), performs a LIVE fetch "
256
+ "via fetch_log.sh with the query as the grep filter — SSHes directly to the server. "
257
+ "Falls back to searching locally-cached log files when no source can be determined. "
258
+ "Use for: 'search logs for illegal PIN in 1881', 'find X in SSOLWA logs', "
259
+ "'what did user Y do?', 'show entries for appid=Z', 'grep logs for X'."
260
+ ),
261
+ "input_schema": {
262
+ "type": "object",
263
+ "properties": {
264
+ "query": {
265
+ "type": "string",
266
+ "description": "Keyword or regex to grep for",
267
+ },
268
+ "source": {
269
+ "type": "string",
270
+ "description": "Log source name to search (partial match against log-config filenames, e.g. 'SSOLWA', '1881'). Leave empty to search all sources.",
271
+ },
272
+ "max_matches": {
273
+ "type": "integer",
274
+ "description": "Max matching lines to return per source (default 30)",
275
+ "default": 30,
276
+ },
277
+ },
278
+ "required": ["query"],
279
+ },
280
+ },
281
+ {
282
+ "name": "trigger_poll",
283
+ "description": (
284
+ "Trigger an immediate log-fetch and error-detection cycle without waiting "
285
+ "for the next scheduled interval. Use when: 'check now', 'run now', "
286
+ "'poll immediately', 'don't wait'."
287
+ ),
288
+ "input_schema": {"type": "object", "properties": {}},
289
+ },
290
+ {
291
+ "name": "get_repo_status",
292
+ "description": (
293
+ "Per-repository breakdown of errors detected and fixes applied. "
294
+ "Use for: 'how is repo X doing?', 'which repo has the most issues?', "
295
+ "'break down by repo'."
296
+ ),
297
+ "input_schema": {
298
+ "type": "object",
299
+ "properties": {
300
+ "hours": {
301
+ "type": "integer",
302
+ "description": "Look-back window in hours (default 24)",
303
+ "default": 24,
304
+ },
305
+ },
306
+ },
307
+ },
308
+ {
309
+ "name": "list_recent_commits",
310
+ "description": (
311
+ "List recent commits made by Sentinel across all managed repos. "
312
+ "Use for: 'what did Sentinel commit?', 'show recent auto-fixes', 'what was changed?'."
313
+ ),
314
+ "input_schema": {
315
+ "type": "object",
316
+ "properties": {
317
+ "limit": {
318
+ "type": "integer",
319
+ "description": "Max commits per repo (default 5)",
320
+ "default": 5,
321
+ },
322
+ },
323
+ },
324
+ },
325
+ {
326
+ "name": "pull_repo",
327
+ "description": (
328
+ "Run git pull on one or all managed repos to fetch latest changes from GitHub. "
329
+ "Use for: 'pull changes', 'git pull', 'update repo X', 'fetch latest code'."
330
+ ),
331
+ "input_schema": {
332
+ "type": "object",
333
+ "properties": {
334
+ "repo": {
335
+ "type": "string",
336
+ "description": "Repo name to pull (omit to pull all configured repos)",
337
+ },
338
+ },
339
+ },
340
+ },
341
+ {
342
+ "name": "pull_config",
343
+ "description": (
344
+ "Run git pull on one or all Sentinel project config directories. "
345
+ "Projects are matched by short name ('1881', 'elprint') or full dir name ('sentinel-1881'). "
346
+ "Use for: 'pull config for 1881', 'update sentinel config', 'pull all configs'."
347
+ ),
348
+ "input_schema": {
349
+ "type": "object",
350
+ "properties": {
351
+ "project": {
352
+ "type": "string",
353
+ "description": "Project short name or dir name to pull (omit for all projects)",
354
+ },
355
+ },
356
+ },
357
+ },
358
+ {
359
+ "name": "fetch_logs",
360
+ "description": (
361
+ "Run fetch_log.sh for one or all configured log sources to pull the latest logs "
362
+ "from remote servers right now. Use for: 'fetch logs', 'run fetch_log.sh', "
363
+ "'grab latest logs from SSOLWA', 'try fetch_log.sh for STS', "
364
+ "'pull logs from server', 'get fresh logs'."
365
+ ),
366
+ "input_schema": {
367
+ "type": "object",
368
+ "properties": {
369
+ "source": {
370
+ "type": "string",
371
+ "description": "Log source name to fetch (partial match, e.g. 'SSOLWA'). Omit to fetch all.",
372
+ },
373
+ "debug": {
374
+ "type": "boolean",
375
+ "description": "Run fetch_log.sh with --debug flag to show SSH/grep details",
376
+ "default": False,
377
+ },
378
+ "tail": {
379
+ "type": "integer",
380
+ "description": "Override TAIL lines (how many log lines to fetch)",
381
+ },
382
+ "grep_filter": {
383
+ "type": "string",
384
+ "description": "Override GREP_FILTER (regex). Pass 'none' to disable filtering.",
385
+ },
386
+ },
387
+ },
388
+ },
389
+ {
390
+ "name": "watch_bot",
391
+ "description": (
392
+ "Tell Sentinel to passively monitor a Slack bot — queuing its messages as issues. "
393
+ "Extract all <@UXXXXXX> user IDs from the message and pass them here. "
394
+ "Sentinel verifies each is actually a bot (not a human) before adding to the watch list. "
395
+ "IMPORTANT: a bot watcher is only useful if its issues can be delivered to a project. "
396
+ "Try to infer the project from context (bot name, prior messages, available projects). "
397
+ "If it cannot be determined, do NOT call this tool — instead ask the user which project "
398
+ "the bot's alerts belong to, then call this tool with the project filled in. "
399
+ "Use for: 'listen to @alertbot', 'watch @bot1 @bot2', 'monitor @errorbot'."
400
+ ),
401
+ "input_schema": {
402
+ "type": "object",
403
+ "properties": {
404
+ "user_ids": {
405
+ "type": "array",
406
+ "items": {"type": "string"},
407
+ "description": "Slack user IDs to watch — extract from <@UXXXXXX> patterns in the message",
408
+ },
409
+ "project": {
410
+ "type": "string",
411
+ "description": "Project short name this bot's issues should be routed to (e.g. '1881', 'elprint'). Infer from context or ask user before calling.",
412
+ },
413
+ },
414
+ "required": ["user_ids"],
415
+ },
416
+ },
417
+ {
418
+ "name": "unwatch_bot",
419
+ "description": (
420
+ "Stop Sentinel from monitoring a Slack bot. "
421
+ "Use for: 'stop watching @alertbot', 'unwatch @bot', 'remove @errorbot from watchers'."
422
+ ),
423
+ "input_schema": {
424
+ "type": "object",
425
+ "properties": {
426
+ "user_ids": {
427
+ "type": "array",
428
+ "items": {"type": "string"},
429
+ "description": "Slack user IDs to remove from the watch list",
430
+ },
431
+ },
432
+ "required": ["user_ids"],
433
+ },
434
+ },
435
+ {
436
+ "name": "list_watched_bots",
437
+ "description": (
438
+ "List all Slack bots Sentinel is currently monitoring passively. "
439
+ "Use for: 'who are you watching?', 'which bots are you monitoring?', 'list watched bots'."
440
+ ),
441
+ "input_schema": {"type": "object", "properties": {}},
442
+ },
443
+ {
444
+ "name": "upgrade_sentinel",
445
+ "description": (
446
+ "Upgrade the Sentinel agent itself: git pull the latest code, update Python deps, "
447
+ "then restart the process. Safe to call at any time — if already up to date, "
448
+ "no restart is triggered. "
449
+ "Use for: 'upgrade sentinel', 'update sentinel', 'upgrade yourself', "
450
+ "'pull latest sentinel code', 'restart sentinel after upgrade'."
451
+ ),
452
+ "input_schema": {"type": "object", "properties": {}},
453
+ },
454
+ {
455
+ "name": "ask_codebase",
456
+ "description": (
457
+ "Ask any natural-language question about a managed codebase. "
458
+ "Accepts a repo name (e.g. 'STS', 'elprint-sales') OR a project name (e.g. '1881', 'elprint') "
459
+ "— if a project name is given and it has multiple repos, all are queried. "
460
+ "Claude Code answers using its full codebase knowledge — no need to specify how. "
461
+ "Use for: 'what does 1881 do?', 'TODOs in 1881', 'find PIN validation in STS', "
462
+ "'security issues in elprint-sales?', 'summarize the cairn repo'."
463
+ ),
464
+ "input_schema": {
465
+ "type": "object",
466
+ "properties": {
467
+ "repo": {
468
+ "type": "string",
469
+ "description": "Repo name (e.g. 'STS', 'elprint-sales') OR project name (e.g. '1881', 'elprint') — project name queries all its repos",
470
+ },
471
+ "question": {
472
+ "type": "string",
473
+ "description": "Natural language question about the codebase",
474
+ },
475
+ },
476
+ "required": ["repo", "question"],
477
+ },
478
+ },
479
+ {
480
+ "name": "restart_project",
481
+ "description": (
482
+ "Stop and restart a specific Sentinel project instance (runs stop.sh then start.sh). "
483
+ "Use when: 'restart 1881', 'restart elprint', 'reboot the cairn project'. "
484
+ "Safer than restarting all projects at once."
485
+ ),
486
+ "input_schema": {
487
+ "type": "object",
488
+ "properties": {
489
+ "project": {
490
+ "type": "string",
491
+ "description": "Project short name or dir name (e.g. '1881', 'elprint')",
492
+ },
493
+ },
494
+ "required": ["project"],
495
+ },
496
+ },
497
+ {
498
+ "name": "my_stats",
499
+ "description": (
500
+ "Show the current user's personal Sentinel dashboard: "
501
+ "conversation history length, issues they submitted, and "
502
+ "a summary of Sentinel fix activity (errors caught, fixes applied, "
503
+ "fixes pending PR review, fixes confirmed live, fixes failed). "
504
+ "Use for: 'what have you done for me?', 'show my stats', "
505
+ "'how many issues have been fixed?', 'my history', 'summary', "
506
+ "'what did sentinel fix this week?', 'pending fixes', 'open PRs'."
507
+ ),
508
+ "input_schema": {
509
+ "type": "object",
510
+ "properties": {
511
+ "hours": {
512
+ "type": "integer",
513
+ "description": "Look-back window in hours (default 168 = 7 days)",
514
+ "default": 168,
515
+ },
516
+ },
517
+ },
518
+ },
519
+ {
520
+ "name": "clear_my_history",
521
+ "description": (
522
+ "Clear the current user's conversation history with Sentinel. "
523
+ "After clearing, future sessions start with no memory of past conversations. "
524
+ "Use for: 'clear my history', 'forget our conversation', "
525
+ "'start fresh', 'reset my context', 'wipe my history'."
526
+ ),
527
+ "input_schema": {"type": "object", "properties": {}},
528
+ },
529
+ {
530
+ "name": "tail_log",
531
+ "description": (
532
+ "Fetch the last N lines of a log source's live production logs without any grep filter. "
533
+ "Use when: 'show me recent SSOLWA logs', 'tail STS', 'what's happening in 1881 logs right now', "
534
+ "'show last 100 lines from SSOLWA'. Different from search_logs — no pattern required."
535
+ ),
536
+ "input_schema": {
537
+ "type": "object",
538
+ "properties": {
539
+ "source": {
540
+ "type": "string",
541
+ "description": "Log source name (partial match against log-config filenames, e.g. 'SSOLWA', 'STS')",
542
+ },
543
+ "lines": {
544
+ "type": "integer",
545
+ "description": "Number of recent lines to fetch (default 100)",
546
+ "default": 100,
547
+ },
548
+ },
549
+ "required": ["source"],
550
+ },
551
+ },
552
+ ]
553
+
554
+
555
+ # ── Workspace helpers ─────────────────────────────────────────────────────────
556
+
557
+ def _workspace_dir() -> Path:
558
+ return Path(".").resolve().parent
559
+
560
+ def _short_name(dir_name: str) -> str:
561
+ """'sentinel-1881' '1881', 'sentinel-elprint' → 'elprint', others unchanged."""
562
+ if dir_name.startswith("sentinel-"):
563
+ return dir_name[len("sentinel-"):]
564
+ return dir_name
565
+
566
+ def _read_project_name(project_dir: Path) -> str:
567
+ """Return PROJECT_NAME from sentinel.properties if set, else fall back to _short_name(dir)."""
568
+ props = project_dir / "config" / "sentinel.properties"
569
+ if props.exists():
570
+ try:
571
+ for line in props.read_text(encoding="utf-8", errors="ignore").splitlines():
572
+ line = line.strip()
573
+ if line.startswith("PROJECT_NAME"):
574
+ _, _, val = line.partition("=")
575
+ val = val.partition("#")[0].strip()
576
+ if val:
577
+ return val
578
+ except Exception:
579
+ pass
580
+ return _short_name(project_dir.name)
581
+
582
+ def _find_project_dirs(target: str = "") -> list[Path]:
583
+ """Return project dirs matching target (PROJECT_NAME, short name, or full dir name), or all if target empty."""
584
+ workspace = _workspace_dir()
585
+ results = []
586
+ try:
587
+ for d in sorted(workspace.iterdir()):
588
+ if not d.is_dir() or d.name in ("code", ".git"):
589
+ continue
590
+ if not (d / "config").exists():
591
+ continue
592
+ if target:
593
+ t = target.lower()
594
+ if (t not in d.name.lower()
595
+ and t not in _short_name(d.name).lower()
596
+ and t not in _read_project_name(d).lower()):
597
+ continue
598
+ results.append(d)
599
+ except Exception:
600
+ pass
601
+ return results
602
+
603
+ def _git_pull(path: Path) -> dict:
604
+ try:
605
+ r = subprocess.run(
606
+ ["git", "pull", "--rebase", "origin"],
607
+ cwd=str(path), capture_output=True, text=True, timeout=60,
608
+ )
609
+ last = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
610
+ return {"status": "ok" if r.returncode == 0 else "error",
611
+ "detail": last if r.returncode == 0 else r.stderr.strip()}
612
+ except Exception as e:
613
+ return {"status": "error", "detail": str(e)}
614
+
615
+
616
+ # ── Tool execution ────────────────────────────────────────────────────────────
617
+
618
+ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "") -> str:
619
+ if name == "get_status":
620
+ hours = int(inputs.get("hours", 24))
621
+ errors = store.get_recent_errors(hours)
622
+ fixes = store.get_recent_fixes(hours)
623
+ prs = store.get_open_prs()
624
+ top_errors = [
625
+ {
626
+ "message": e["message"][:120],
627
+ "count": e["count"],
628
+ "source": e["source"],
629
+ "last_seen": e["last_seen"],
630
+ }
631
+ for e in errors[:8]
632
+ ]
633
+ return json.dumps({
634
+ "window_hours": hours,
635
+ "errors_detected": len(errors),
636
+ "top_errors": top_errors,
637
+ "fixes_applied": sum(1 for f in fixes if f["status"] == "applied"),
638
+ "fixes_pending": sum(1 for f in fixes if f["status"] == "pending"),
639
+ "fixes_failed": sum(1 for f in fixes if f["status"] == "failed"),
640
+ "open_prs": [
641
+ {
642
+ "repo": p["repo_name"],
643
+ "branch": p["branch"],
644
+ "pr_url": p["pr_url"],
645
+ "age": p.get("timestamp", ""),
646
+ }
647
+ for p in prs
648
+ ],
649
+ "sentinel_paused": Path("SENTINEL_PAUSE").exists(),
650
+ })
651
+
652
+ if name == "create_issue":
653
+ description = inputs["description"]
654
+ target_repo = inputs.get("target_repo", "")
655
+ project_arg = inputs.get("project", "")
656
+
657
+ if project_arg:
658
+ project_dirs = _find_project_dirs(project_arg)
659
+ if not project_dirs:
660
+ all_names = [_read_project_name(d) for d in _find_project_dirs()]
661
+ return json.dumps({
662
+ "error": f"No project found matching '{project_arg}'",
663
+ "available_projects": all_names,
664
+ "action_needed": "Ask the user which project they meant.",
665
+ })
666
+ if len(project_dirs) > 1:
667
+ matches = [_read_project_name(d) for d in project_dirs]
668
+ return json.dumps({
669
+ "error": f"Ambiguous project name '{project_arg}' — matches: {matches}",
670
+ "action_needed": "Ask the user to clarify which project they mean.",
671
+ })
672
+ project_dir = project_dirs[0]
673
+ else:
674
+ project_dir = Path(".")
675
+
676
+ issues_dir = project_dir / "issues"
677
+ issues_dir.mkdir(exist_ok=True)
678
+ fname = f"slack-{uuid.uuid4().hex[:8]}.txt"
679
+ content = (f"TARGET_REPO: {target_repo}\n\n" if target_repo else "") + description
680
+ (issues_dir / fname).write_text(content, encoding="utf-8")
681
+
682
+ # Touch SENTINEL_POLL_NOW so the target instance picks it up immediately
683
+ (project_dir / "SENTINEL_POLL_NOW").touch()
684
+
685
+ project_label = _read_project_name(project_dir.resolve()) if project_arg else "this project"
686
+ logger.info("Boss created issue for %s: %s", project_label, fname)
687
+ if user_id:
688
+ try:
689
+ store.record_submitted_issue(
690
+ user_id=user_id,
691
+ user_name="", # resolved by caller if needed
692
+ project=project_label,
693
+ fname=fname,
694
+ description=description,
695
+ )
696
+ except Exception as _rec_err:
697
+ logger.debug("Boss: could not record submitted issue: %s", _rec_err)
698
+ return json.dumps({
699
+ "status": "queued",
700
+ "project": project_label,
701
+ "file": fname,
702
+ "note": f"Delivered to '{project_label}'. Sentinel will process it on the next poll cycle.",
703
+ })
704
+
705
+ if name == "get_fix_details":
706
+ fp = inputs["fingerprint"]
707
+ fix = store.get_confirmed_fix(fp) or store.get_marker_seen_fix(fp)
708
+ if not fix:
709
+ # Fallback: search recent fixes by prefix
710
+ recent = store.get_recent_fixes(hours=72)
711
+ fix = next((f for f in recent if f.get("fingerprint", "").startswith(fp)), None)
712
+ return json.dumps(fix or {"error": "not found"})
713
+
714
+ if name == "list_pending_prs":
715
+ prs = store.get_open_prs()
716
+ return json.dumps({
717
+ "count": len(prs),
718
+ "open_prs": [
719
+ {
720
+ "repo": p["repo_name"],
721
+ "branch": p["branch"],
722
+ "pr_url": p["pr_url"],
723
+ "timestamp": p.get("timestamp", ""),
724
+ }
725
+ for p in prs
726
+ ],
727
+ })
728
+
729
+ if name == "pause_sentinel":
730
+ Path("SENTINEL_PAUSE").touch()
731
+ logger.info("Boss: SENTINEL_PAUSE created")
732
+ return json.dumps({"status": "paused"})
733
+
734
+ if name == "resume_sentinel":
735
+ p = Path("SENTINEL_PAUSE")
736
+ if p.exists():
737
+ p.unlink()
738
+ logger.info("Boss: SENTINEL_PAUSE removed")
739
+ return json.dumps({"status": "resumed"})
740
+
741
+ if name == "list_projects":
742
+ projects = []
743
+ for d in _find_project_dirs():
744
+ repo_cfg_dir = d / "config" / "repo-configs"
745
+ repos_in_project = []
746
+ if repo_cfg_dir.exists():
747
+ for p in sorted(repo_cfg_dir.glob("*.properties")):
748
+ if p.name.startswith("_"):
749
+ continue
750
+ repo_url = ""
751
+ for line in p.read_text(encoding="utf-8", errors="ignore").splitlines():
752
+ if line.startswith("REPO_URL"):
753
+ repo_url = line.split("=", 1)[-1].strip()
754
+ break
755
+ repos_in_project.append({"repo": p.stem, "url": repo_url})
756
+ projects.append({
757
+ "project": _read_project_name(d),
758
+ "dir": d.name,
759
+ "running": (d / "sentinel.pid").exists(),
760
+ "this": d.resolve() == Path(".").resolve(),
761
+ "repos": repos_in_project,
762
+ })
763
+ return json.dumps({"projects": projects})
764
+
765
+ if name == "search_logs":
766
+ query = inputs.get("query", "")
767
+ source = inputs.get("source", "").lower()
768
+ max_matches = int(inputs.get("max_matches", 30))
769
+
770
+ # ── Live fetch path: SSH to servers and grep in real time ──────────────
771
+ script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
772
+ log_cfg_dir = Path("config") / "log-configs"
773
+ if script.exists() and log_cfg_dir.exists():
774
+ props_files = sorted(log_cfg_dir.glob("*.properties"))
775
+ if source:
776
+ props_files = [p for p in props_files if source in p.stem.lower()]
777
+ if props_files:
778
+ live_results = []
779
+ for props in props_files:
780
+ env = os.environ.copy()
781
+ env["GREP_FILTER"] = query
782
+ try:
783
+ r = subprocess.run(
784
+ ["bash", str(script), str(props)],
785
+ capture_output=True, text=True, timeout=60, env=env,
786
+ )
787
+ lines = (r.stdout or "").strip().splitlines()
788
+ matches = [ln[:300] for ln in lines if ln.strip()][:max_matches]
789
+ if matches:
790
+ live_results.append({"source": props.stem, "matches": matches})
791
+ logger.info("Boss search_logs live %s rc=%d found=%d", props.stem, r.returncode, len(matches))
792
+ except subprocess.TimeoutExpired:
793
+ live_results.append({"source": props.stem, "error": "timed out"})
794
+ except Exception as e:
795
+ live_results.append({"source": props.stem, "error": str(e)})
796
+ total = sum(len(r.get("matches", [])) for r in live_results)
797
+ return json.dumps({
798
+ "query": query,
799
+ "mode": "live",
800
+ "total_matches": total,
801
+ "results": live_results,
802
+ })
803
+
804
+ # ── Fallback: search locally-cached log files ──────────────────────────
805
+ fetched_dir = Path("workspace/fetched")
806
+ if not fetched_dir.exists():
807
+ return json.dumps({"error": "No fetched logs found and fetch_log.sh unavailable"})
808
+ try:
809
+ pattern = re.compile(query, re.IGNORECASE)
810
+ except re.error as e:
811
+ return json.dumps({"error": f"Invalid regex: {e}"})
812
+ results = []
813
+ for log_file in sorted(fetched_dir.glob("*.log")):
814
+ if source and source not in log_file.name.lower():
815
+ continue
816
+ try:
817
+ lines = log_file.read_text(encoding="utf-8", errors="ignore").splitlines()
818
+ matches = [
819
+ {"line": i + 1, "text": line[:300]}
820
+ for i, line in enumerate(lines)
821
+ if pattern.search(line)
822
+ ][:max_matches]
823
+ if matches:
824
+ results.append({"file": log_file.name, "matches": matches})
825
+ except Exception:
826
+ pass
827
+ total = sum(len(r["matches"]) for r in results)
828
+ return json.dumps({
829
+ "query": query,
830
+ "mode": "cached",
831
+ "total_matches": total,
832
+ "files_searched": len(list(fetched_dir.glob("*.log"))),
833
+ "results": results,
834
+ })
835
+
836
+ if name == "trigger_poll":
837
+ Path("SENTINEL_POLL_NOW").touch()
838
+ logger.info("Boss: immediate poll requested")
839
+ return json.dumps({"status": "triggered", "note": "Sentinel will run a poll cycle within seconds"})
840
+
841
+ if name == "get_repo_status":
842
+ hours = int(inputs.get("hours", 24))
843
+ fixes = store.get_recent_fixes(hours)
844
+ errors = store.get_recent_errors(hours)
845
+ by_repo: dict = {}
846
+ for fix in fixes:
847
+ repo = fix.get("repo_name", "unknown")
848
+ s = by_repo.setdefault(repo, {"applied": 0, "pending": 0, "failed": 0, "skipped": 0})
849
+ key = fix.get("status", "failed")
850
+ s[key] = s.get(key, 0) + 1
851
+ return json.dumps({"window_hours": hours, "total_errors": len(errors), "by_repo": by_repo})
852
+
853
+ if name == "list_recent_commits":
854
+ limit = int(inputs.get("limit", 5))
855
+ results = []
856
+ for repo_name, repo in cfg_loader.repos.items():
857
+ local = Path(repo.local_path)
858
+ if not local.exists():
859
+ continue
860
+ try:
861
+ r = subprocess.run(
862
+ ["git", "log", "--oneline", "--grep=sentinel", "-n", str(limit)],
863
+ cwd=str(local), capture_output=True, text=True, timeout=10,
864
+ )
865
+ commits = r.stdout.strip().splitlines()
866
+ if commits:
867
+ results.append({"repo": repo_name, "commits": commits})
868
+ except Exception:
869
+ pass
870
+ return json.dumps({"sentinel_commits": results})
871
+
872
+ if name == "pull_repo":
873
+ target = inputs.get("repo", "").lower()
874
+ results = []
875
+ for repo_name, repo in cfg_loader.repos.items():
876
+ if target and target not in repo_name.lower():
877
+ continue
878
+ local = Path(repo.local_path)
879
+ if not local.exists():
880
+ results.append({"repo": repo_name, "status": "error", "detail": "local path not found"})
881
+ continue
882
+ try:
883
+ r = subprocess.run(
884
+ ["git", "pull", "--rebase", "origin", repo.branch],
885
+ cwd=str(local), capture_output=True, text=True, timeout=60,
886
+ )
887
+ last_line = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else "already up to date"
888
+ if r.returncode == 0:
889
+ results.append({"repo": repo_name, "status": "ok", "detail": last_line})
890
+ else:
891
+ results.append({"repo": repo_name, "status": "error", "detail": r.stderr.strip()})
892
+ except Exception as e:
893
+ results.append({"repo": repo_name, "status": "error", "detail": str(e)})
894
+ return json.dumps({"results": results})
895
+
896
+ if name == "pull_config":
897
+ target = inputs.get("project", "")
898
+ dirs = _find_project_dirs(target)
899
+ if not dirs:
900
+ return json.dumps({"error": f"No project found matching '{target}'"})
901
+ results = []
902
+ for d in dirs:
903
+ res = _git_pull(d)
904
+ results.append({"project": _read_project_name(d), "dir": d.name, **res})
905
+ logger.info("Boss: pull_config %s → %s", d.name, res["status"])
906
+ return json.dumps({"results": results})
907
+
908
+ if name == "fetch_logs":
909
+ source_filter = inputs.get("source", "").lower()
910
+ debug = bool(inputs.get("debug", False))
911
+ tail_override = inputs.get("tail")
912
+ grep_override = inputs.get("grep_filter", "")
913
+
914
+ # Find fetch_log.sh relative to this file
915
+ script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
916
+ if not script.exists():
917
+ return json.dumps({"error": f"fetch_log.sh not found at {script}"})
918
+
919
+ log_cfg_dir = Path("config") / "log-configs"
920
+ if not log_cfg_dir.exists():
921
+ return json.dumps({"error": "config/log-configs/ not found"})
922
+
923
+ props_files = sorted(log_cfg_dir.glob("*.properties"))
924
+ if source_filter:
925
+ props_files = [p for p in props_files if source_filter in p.stem.lower()]
926
+ if not props_files:
927
+ return json.dumps({"error": f"No log-config found matching '{source_filter}'"})
928
+
929
+ results = []
930
+ for props in props_files:
931
+ env = os.environ.copy()
932
+ if tail_override:
933
+ env["TAIL"] = str(tail_override)
934
+ if grep_override:
935
+ env["GREP_FILTER"] = grep_override
936
+
937
+ cmd = ["bash", str(script)]
938
+ if debug:
939
+ cmd.append("--debug")
940
+ cmd.append(str(props))
941
+
942
+ try:
943
+ r = subprocess.run(
944
+ cmd, capture_output=True, text=True, timeout=120, env=env,
945
+ )
946
+ output = (r.stdout or "").strip()
947
+ stderr = (r.stderr or "").strip()
948
+ results.append({
949
+ "source": props.stem,
950
+ "returncode": r.returncode,
951
+ "output": output[-2000:] if output else "",
952
+ "stderr": stderr[-1000:] if stderr else "",
953
+ })
954
+ logger.info("Boss fetch_logs %s rc=%d", props.stem, r.returncode)
955
+ except subprocess.TimeoutExpired:
956
+ results.append({"source": props.stem, "error": "timed out after 120s"})
957
+ except Exception as e:
958
+ results.append({"source": props.stem, "error": str(e)})
959
+
960
+ return json.dumps({"fetched": len(results), "results": results})
961
+
962
+ if name == "watch_bot":
963
+ user_ids = inputs.get("user_ids", [])
964
+ project_arg = inputs.get("project", "").strip()
965
+ if not user_ids:
966
+ return json.dumps({"error": "No user_ids provided"})
967
+
968
+ # Resolve + validate project — required for bot issue routing
969
+ resolved_project = ""
970
+ if project_arg:
971
+ project_dirs = _find_project_dirs(project_arg)
972
+ if not project_dirs:
973
+ all_names = [_read_project_name(d) for d in _find_project_dirs()]
974
+ return json.dumps({
975
+ "error": f"No project found matching '{project_arg}'",
976
+ "available_projects": all_names,
977
+ "action_needed": "Ask the user which project these bot alerts belong to.",
978
+ })
979
+ if len(project_dirs) > 1:
980
+ matches = [_read_project_name(d) for d in project_dirs]
981
+ return json.dumps({
982
+ "error": f"Ambiguous project name '{project_arg}' matches: {matches}",
983
+ "action_needed": "Ask the user to clarify which project.",
984
+ })
985
+ resolved_project = _read_project_name(project_dirs[0])
986
+ else:
987
+ all_projects = _find_project_dirs()
988
+ if len(all_projects) == 1:
989
+ # Single project in workspace — auto-assign
990
+ resolved_project = _read_project_name(all_projects[0])
991
+ elif all_projects:
992
+ all_names = [_read_project_name(d) for d in all_projects]
993
+ return json.dumps({
994
+ "error": "Cannot determine which project these bot alerts belong to.",
995
+ "available_projects": all_names,
996
+ "action_needed": "Ask the user to specify the project, then retry with project filled in.",
997
+ })
998
+
999
+ results = []
1000
+ for uid in user_ids:
1001
+ if not slack_client:
1002
+ results.append({"user_id": uid, "status": "error", "reason": "no Slack client available"})
1003
+ continue
1004
+ try:
1005
+ info = await slack_client.users_info(user=uid)
1006
+ user = info.get("user", {})
1007
+ if not user.get("is_bot", False):
1008
+ results.append({"user_id": uid, "status": "skipped", "reason": "not a bot — only bots can be watched passively"})
1009
+ continue
1010
+ bot_name = user.get("real_name") or user.get("name") or uid
1011
+ store.add_watched_bot(uid, bot_name, added_by="boss", project_name=resolved_project)
1012
+ logger.info("Boss: now watching bot %s (%s) → project '%s'", bot_name, uid, resolved_project or "unset")
1013
+ results.append({"user_id": uid, "bot_name": bot_name, "project": resolved_project, "status": "watching"})
1014
+ except Exception as e:
1015
+ results.append({"user_id": uid, "status": "error", "reason": str(e)})
1016
+ return json.dumps({"results": results})
1017
+
1018
+ if name == "unwatch_bot":
1019
+ user_ids = inputs.get("user_ids", [])
1020
+ if not user_ids:
1021
+ return json.dumps({"error": "No user_ids provided"})
1022
+ results = []
1023
+ for uid in user_ids:
1024
+ removed = store.remove_watched_bot(uid)
1025
+ logger.info("Boss: unwatch bot %s → %s", uid, "removed" if removed else "not found")
1026
+ results.append({"user_id": uid, "status": "removed" if removed else "not found"})
1027
+ return json.dumps({"results": results})
1028
+
1029
+ if name == "list_watched_bots":
1030
+ bots = store.get_watched_bots()
1031
+ return json.dumps({
1032
+ "count": len(bots),
1033
+ "bots": [
1034
+ {
1035
+ "bot_id": b["bot_id"],
1036
+ "bot_name": b["bot_name"],
1037
+ "project": b.get("project_name") or "",
1038
+ "added_by": b["added_by"],
1039
+ "added_at": b["added_at"],
1040
+ }
1041
+ for b in bots
1042
+ ],
1043
+ })
1044
+
1045
+ if name == "upgrade_sentinel":
1046
+ import threading
1047
+
1048
+ # Sentinel is installed via npm — use `sentinel upgrade` which handles
1049
+ # npm install + Python bundle copy + restart via stopAll/startAll.
1050
+ # Run it in the background after a short delay so the Slack reply is
1051
+ # sent before the process is replaced.
1052
+ try:
1053
+ r = subprocess.run(
1054
+ ["sentinel", "--version"],
1055
+ capture_output=True, text=True, timeout=10,
1056
+ )
1057
+ sentinel_bin_ok = r.returncode == 0
1058
+ except Exception:
1059
+ sentinel_bin_ok = False
1060
+
1061
+ if not sentinel_bin_ok:
1062
+ return json.dumps({
1063
+ "status": "error",
1064
+ "note": "`sentinel` CLI not found. Run: npm install -g @misterhuydo/sentinel",
1065
+ })
1066
+
1067
+ def _do_upgrade():
1068
+ import time
1069
+ time.sleep(10) # give Slack time to post the reply
1070
+ subprocess.Popen(["sentinel", "upgrade"], close_fds=True)
1071
+
1072
+ threading.Thread(target=_do_upgrade, daemon=True).start()
1073
+ logger.info("Boss: upgrade_sentinel scheduled via `sentinel upgrade`")
1074
+ return json.dumps({
1075
+ "status": "ok",
1076
+ "note": "Upgrade started — pulling latest version via npm and restarting. Give me ~30 seconds then I'll be back.",
1077
+ })
1078
+
1079
+ if name == "ask_codebase":
1080
+ target = inputs.get("repo", "").lower()
1081
+ question = inputs.get("question", "")
1082
+
1083
+ # 1. Find repos whose name contains the target (e.g. "STS", "elprint-sales")
1084
+ matched = [(rn, r) for rn, r in cfg_loader.repos.items() if target in rn.lower()]
1085
+
1086
+ # 2. No repo match — check if target is a project name → use ALL repos in cfg_loader
1087
+ # (each Sentinel instance is scoped to one project, so all repos belong to it)
1088
+ if not matched:
1089
+ current_project = _read_project_name(Path("."))
1090
+ if target in current_project.lower() or current_project.lower() in target:
1091
+ matched = list(cfg_loader.repos.items())
1092
+
1093
+ if not matched:
1094
+ return json.dumps({
1095
+ "error": f"No repo or project found matching '{target}'",
1096
+ "available_repos": list(cfg_loader.repos.keys()),
1097
+ })
1098
+
1099
+ cfg = cfg_loader.sentinel
1100
+ env = os.environ.copy()
1101
+ if cfg.anthropic_api_key:
1102
+ env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
1103
+
1104
+ def _ask_one(repo_name, repo_cfg) -> dict:
1105
+ local_path = Path(repo_cfg.local_path)
1106
+ if not local_path.exists():
1107
+ return {"repo": repo_name, "error": f"not cloned yet at {local_path}"}
1108
+ prompt = (
1109
+ f"You are a code analyst. Answer the following question about the codebase at: {local_path}\n\n"
1110
+ f"Question: {question}\n\n"
1111
+ f"Use whatever tools you need to answer accurately. Be concise and direct. Plain text only."
1112
+ )
1113
+ try:
1114
+ r = subprocess.run(
1115
+ ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
1116
+ if os.getuid() != 0 else
1117
+ [cfg.claude_code_bin, "--print", prompt]),
1118
+ capture_output=True, text=True, timeout=180, env=env,
1119
+ cwd=str(local_path),
1120
+ )
1121
+ output = (r.stdout or "").strip()
1122
+ logger.info("Boss ask_codebase %s rc=%d len=%d", repo_name, r.returncode, len(output))
1123
+ if r.returncode != 0 and not output:
1124
+ return {"repo": repo_name, "error": f"claude --print failed (rc={r.returncode}): {(r.stderr or '')[:200]}"}
1125
+ return {"repo": repo_name, "answer": output[:3000]}
1126
+ except subprocess.TimeoutExpired:
1127
+ return {"repo": repo_name, "error": "timed out after 180s"}
1128
+ except Exception as e:
1129
+ return {"repo": repo_name, "error": str(e)}
1130
+
1131
+ if len(matched) == 1:
1132
+ result = _ask_one(*matched[0])
1133
+ # Unwrap single-repo result for cleaner response
1134
+ return json.dumps(result)
1135
+
1136
+ # Multiple repos — query each and combine
1137
+ results = [_ask_one(rn, r) for rn, r in matched]
1138
+ return json.dumps({"project": target, "repos_queried": len(results), "results": results})
1139
+
1140
+ if name == "restart_project":
1141
+ project_arg = inputs.get("project", "").lower()
1142
+ dirs = _find_project_dirs(project_arg)
1143
+ if not dirs:
1144
+ return json.dumps({"error": f"No project found matching '{project_arg}'"})
1145
+ results = []
1146
+ for d in dirs:
1147
+ stop_sh = d / "stop.sh"
1148
+ start_sh = d / "start.sh"
1149
+ if not stop_sh.exists() or not start_sh.exists():
1150
+ results.append({"project": d.name, "status": "error", "detail": "stop.sh or start.sh not found"})
1151
+ continue
1152
+ try:
1153
+ subprocess.run(["bash", str(stop_sh)], cwd=str(d), timeout=30)
1154
+ subprocess.run(["bash", str(start_sh)], cwd=str(d), timeout=30)
1155
+ results.append({"project": d.name, "status": "restarted"})
1156
+ logger.info("Boss: restarted project %s", d.name)
1157
+ except Exception as e:
1158
+ results.append({"project": d.name, "status": "error", "detail": str(e)})
1159
+ return json.dumps({"results": results})
1160
+
1161
+ if name == "tail_log":
1162
+ source = inputs.get("source", "").lower()
1163
+ lines = int(inputs.get("lines", 100))
1164
+ script = Path(__file__).resolve().parent.parent / "scripts" / "fetch_log.sh"
1165
+ log_cfg_dir = Path("config") / "log-configs"
1166
+
1167
+ if not script.exists():
1168
+ return json.dumps({"error": "fetch_log.sh not found"})
1169
+ if not log_cfg_dir.exists():
1170
+ return json.dumps({"error": "config/log-configs/ not found"})
1171
+
1172
+ props_files = sorted(log_cfg_dir.glob("*.properties"))
1173
+ if source:
1174
+ props_files = [p for p in props_files if source in p.stem.lower()]
1175
+ if not props_files:
1176
+ return json.dumps({"error": f"No log-config found matching '{source}'"})
1177
+
1178
+ results = []
1179
+ for props in props_files:
1180
+ env = os.environ.copy()
1181
+ env["TAIL"] = str(lines)
1182
+ env["GREP_FILTER"] = "" # no filter — show everything
1183
+ try:
1184
+ r = subprocess.run(
1185
+ ["bash", str(script), str(props)],
1186
+ capture_output=True, text=True, timeout=60, env=env,
1187
+ )
1188
+ tail_lines = (r.stdout or "").strip().splitlines()[-lines:]
1189
+ results.append({
1190
+ "source": props.stem,
1191
+ "lines": len(tail_lines),
1192
+ "content": "\n".join(tail_lines),
1193
+ })
1194
+ logger.info("Boss tail_log %s rc=%d lines=%d", props.stem, r.returncode, len(tail_lines))
1195
+ except subprocess.TimeoutExpired:
1196
+ results.append({"source": props.stem, "error": "timed out"})
1197
+ except Exception as e:
1198
+ results.append({"source": props.stem, "error": str(e)})
1199
+ return json.dumps({"results": results})
1200
+
1201
+ if name == "my_stats":
1202
+ hours = int(inputs.get("hours", 168))
1203
+ errors = store.get_recent_errors(hours)
1204
+ fixes = store.get_recent_fixes(hours)
1205
+ prs = store.get_open_prs()
1206
+ pending_conf = store.get_fixes_pending_confirmation()
1207
+ # Conversation stats
1208
+ history = store.load_conversation(user_id) if user_id else []
1209
+ hist_len = len(history)
1210
+ # Load conversation updated_at from DB
1211
+ conv_updated = ""
1212
+ try:
1213
+ import sqlite3 as _sqlite3
1214
+ with _sqlite3.connect(store.db_path) as _db:
1215
+ row = _db.execute(
1216
+ "SELECT updated_at FROM conversations WHERE user_id=?", (user_id,)
1217
+ ).fetchone()
1218
+ if row:
1219
+ conv_updated = row[0]
1220
+ except Exception:
1221
+ pass
1222
+ # Tally fix statuses
1223
+ by_status: dict = {}
1224
+ for fix in fixes:
1225
+ s = fix.get("status", "unknown")
1226
+ by_status[s] = by_status.get(s, 0) + 1
1227
+ # Fixes confirmed via sentinel marker in prod
1228
+ confirmed = [f for f in fixes if f.get("fix_outcome") == "confirmed"]
1229
+ regressed = [f for f in fixes if f.get("fix_outcome") == "regressed"]
1230
+ submitted = store.get_submitted_issues(user_id, hours=hours) if user_id else []
1231
+ submitted_recent = store.get_submitted_issues(user_id, hours=hours) if user_id else []
1232
+ return json.dumps({
1233
+ "conversation": {
1234
+ "messages_in_history": hist_len,
1235
+ "turns": hist_len // 2,
1236
+ "last_active": conv_updated or "no history",
1237
+ },
1238
+ "issues_you_submitted": {
1239
+ "total_in_window": len(submitted_recent),
1240
+ "all_time": len(store.get_submitted_issues(user_id) if user_id else []),
1241
+ "recent": [
1242
+ {"project": i["project"], "description": i["description"][:80],
1243
+ "submitted_at": i["submitted_at"]}
1244
+ for i in submitted_recent[:5]
1245
+ ],
1246
+ },
1247
+ "window_hours": hours,
1248
+ "errors_detected": len(errors),
1249
+ "fixes": {
1250
+ "applied": by_status.get("applied", 0),
1251
+ "pending_pr": len(prs),
1252
+ "failed": by_status.get("failed", 0),
1253
+ "skipped": by_status.get("skipped", 0),
1254
+ "error": by_status.get("error", 0),
1255
+ },
1256
+ "confirmed_in_prod": len(confirmed),
1257
+ "regressed_after_fix": len(regressed),
1258
+ "awaiting_confirmation": len(pending_conf),
1259
+ "open_prs": [
1260
+ {"repo": p["repo_name"], "pr_url": p["pr_url"], "timestamp": p["timestamp"]}
1261
+ for p in prs
1262
+ ],
1263
+ "top_errors": [
1264
+ {"message": e["message"][:100], "count": e["count"], "source": e["source"]}
1265
+ for e in errors[:5]
1266
+ ],
1267
+ })
1268
+ if name == "clear_my_history":
1269
+ if user_id:
1270
+ store.save_conversation(user_id, [])
1271
+ logger.info("Boss: cleared conversation history for user %s", user_id)
1272
+ return json.dumps({
1273
+ "status": "cleared",
1274
+ "note": "Your conversation history has been wiped. Next session starts fresh. [DONE]",
1275
+ })
1276
+ return json.dumps({"error": "cannot determine user — not clearing"})
1277
+ return json.dumps({"error": f"unknown tool: {name}"})
1278
+
1279
+
1280
+ # ── CLI fallback (OAuth / no API key) ────────────────────────────────────────
1281
+
1282
+ def _attachments_to_text(attachments: list[dict]) -> str:
1283
+ """Produce a plain-text summary of attachments to append to CLI prompts."""
1284
+ if not attachments:
1285
+ return ""
1286
+ parts = []
1287
+ for att in attachments:
1288
+ if att["type"] == "text":
1289
+ parts.append(
1290
+ f"[Attached file: {att['name']}]\n{att['content']}"
1291
+ )
1292
+ elif att["type"] == "image":
1293
+ parts.append(
1294
+ f"[Attached image: {att['name']}] (saved at {att['path']})"
1295
+ )
1296
+ else:
1297
+ parts.append(
1298
+ f"[Attached file: {att['name']}] (saved at {att['path']} read it if relevant)"
1299
+ )
1300
+ return "\n\nATTACHMENTS:\n" + "\n---\n".join(parts)
1301
+
1302
+
1303
+ def _attachments_to_api_blocks(attachments: list[dict]) -> list[dict]:
1304
+ """Convert attachments into Anthropic API message content blocks."""
1305
+ blocks: list[dict] = []
1306
+ for att in attachments:
1307
+ if att["type"] == "image":
1308
+ blocks.append({
1309
+ "type": "image",
1310
+ "source": {
1311
+ "type": "base64",
1312
+ "media_type": att.get("mime", "image/png"),
1313
+ "data": att["content"],
1314
+ },
1315
+ })
1316
+ elif att["type"] == "text":
1317
+ blocks.append({
1318
+ "type": "text",
1319
+ "text": f"[Attached file: {att['name']}]\n{att['content']}",
1320
+ })
1321
+ else:
1322
+ blocks.append({
1323
+ "type": "text",
1324
+ "text": f"[Attached file: {att['name']}] saved at {att['path']}",
1325
+ })
1326
+ return blocks
1327
+
1328
+
1329
+ _ACTION_RE = re.compile(r"^ACTION:\s*(\{.*\})", re.MULTILINE)
1330
+
1331
+
1332
+ async def _handle_with_cli(
1333
+ message: str,
1334
+ history: list,
1335
+ cfg_loader,
1336
+ store,
1337
+ slack_client=None,
1338
+ user_name: str = "",
1339
+ user_id: str = "",
1340
+ attachments: list | None = None,
1341
+ ) -> tuple[str, bool]:
1342
+ """Fallback: use `claude --print` for users without an Anthropic API key."""
1343
+ status_json = await _run_tool("get_status", {"hours": 24}, cfg_loader, store)
1344
+ prs_json = await _run_tool("list_pending_prs", {}, cfg_loader, store)
1345
+
1346
+ # Pre-fetch log search if the message is a search request.
1347
+ # Use quoted strings as the query, or fall back to the full message.
1348
+ # Never hardcode field names — the query is whatever the user said.
1349
+ search_json = ""
1350
+ _search_kws = ("search", "find", "look for", "show me log", "grep", "entries for")
1351
+ if any(kw in message.lower() for kw in _search_kws):
1352
+ quoted = re.findall(r'"([^"]+)"', message)
1353
+ query = quoted[0] if quoted else message
1354
+ search_json = await _run_tool("search_logs", {"query": query}, cfg_loader, store)
1355
+
1356
+ paused = Path("SENTINEL_PAUSE").exists()
1357
+ repos = list(cfg_loader.repos.keys())
1358
+ log_sources = list(cfg_loader.log_sources.keys())
1359
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
1360
+
1361
+ history_text = ""
1362
+ for msg in history[-8:]:
1363
+ role = msg["role"].upper()
1364
+ content = msg["content"]
1365
+ if isinstance(content, list):
1366
+ content = " ".join(
1367
+ (b.get("text", "") if isinstance(b, dict) else getattr(b, "text", ""))
1368
+ for b in content
1369
+ if (isinstance(b, dict) and b.get("type") == "text")
1370
+ or (hasattr(b, "type") and b.type == "text")
1371
+ )
1372
+ history_text += f"\n{role}: {content}"
1373
+
1374
+ prompt = (
1375
+ _SYSTEM
1376
+ + (f"\nYou are speaking with: {user_name}" if user_name else "")
1377
+ + f"\n\nCurrent time: {ts}"
1378
+ + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
1379
+ + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
1380
+ + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
1381
+ + f"\n\nCurrent status (last 24 h):\n{status_json}"
1382
+ + f"\n\nOpen PRs:\n{prs_json}"
1383
+ + (f"\n\nLog search results:\n{search_json}" if search_json else "")
1384
+ + (f"\n\nConversation so far:{history_text}" if history_text else "")
1385
+ + _attachments_to_text(attachments or [])
1386
+ + f"\n\nUSER: {message}"
1387
+ + "\n\nIf you need to take an action, include a line like:\n"
1388
+ + " ACTION: {\"action\": \"pause_sentinel\"}\n"
1389
+ + " ACTION: {\"action\": \"resume_sentinel\"}\n"
1390
+ + " ACTION: {\"action\": \"trigger_poll\"}\n"
1391
+ + " ACTION: {\"action\": \"create_issue\", \"description\": \"...\", \"target_repo\": \"\"}\n"
1392
+ + " ACTION: {\"action\": \"search_logs\", \"query\": \"<whatever the user asked to find>\"}\n"
1393
+ + "End with [DONE] if the request is fully handled."
1394
+ )
1395
+
1396
+ cfg = cfg_loader.sentinel
1397
+ env = os.environ.copy()
1398
+ if cfg.anthropic_api_key:
1399
+ env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
1400
+
1401
+ try:
1402
+ result = subprocess.run(
1403
+ ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
1404
+ if os.getuid() != 0 else
1405
+ [cfg.claude_code_bin, "--print", prompt]),
1406
+ capture_output=True, text=True, timeout=180, env=env,
1407
+ )
1408
+ output = (result.stdout or "").strip()
1409
+ if result.returncode != 0 or not output:
1410
+ stderr = (result.stderr or "").strip()
1411
+ logger.error(
1412
+ "Boss CLI call failed (rc=%d): stdout=%r stderr=%r",
1413
+ result.returncode, output[:200], stderr[:200],
1414
+ )
1415
+ if result.returncode != 0 and not output:
1416
+ return f":warning: `claude --print` failed (exit {result.returncode}): {(result.stderr or '').strip()[:300]}", True
1417
+ except Exception as e:
1418
+ logger.error("Boss CLI call failed: %s", e)
1419
+ return f":warning: Boss unavailable: {e}", True
1420
+
1421
+ for m in _ACTION_RE.finditer(output):
1422
+ try:
1423
+ action = json.loads(m.group(1))
1424
+ name = action.pop("action", "")
1425
+ if name:
1426
+ result_str = await _run_tool(name, action, cfg_loader, store, user_id=user_id)
1427
+ logger.info("Boss CLI action: %s → %s", name, result_str[:80])
1428
+ except Exception as e:
1429
+ logger.warning("Boss action parse error: %s", e)
1430
+
1431
+ reply = _ACTION_RE.sub("", output).strip()
1432
+ is_done = "[DONE]" in reply
1433
+ reply = reply.replace("[DONE]", "").strip()
1434
+ if not reply:
1435
+ greeting = f"Hi {user_name}! " if user_name else "Hi! "
1436
+ reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
1437
+
1438
+ history.append({"role": "user", "content": message})
1439
+ history.append({"role": "assistant", "content": reply})
1440
+ return reply, is_done
1441
+
1442
+
1443
+ # ── API-key path (structured tools, full agentic loop) ────────────────────────
1444
+
1445
+ async def _handle_with_api(
1446
+ message: str,
1447
+ history: list,
1448
+ cfg_loader,
1449
+ store,
1450
+ slack_client=None,
1451
+ user_name: str = "",
1452
+ user_id: str = "",
1453
+ attachments: list | None = None,
1454
+ ) -> tuple[str, bool]:
1455
+ import anthropic
1456
+
1457
+ api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
1458
+ client = anthropic.Anthropic(api_key=api_key)
1459
+
1460
+ paused = Path("SENTINEL_PAUSE").exists()
1461
+ repos = list(cfg_loader.repos.keys())
1462
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
1463
+ known_projects = [_read_project_name(d) for d in _find_project_dirs()]
1464
+ log_sources = list(cfg_loader.log_sources.keys())
1465
+ system = (
1466
+ _SYSTEM
1467
+ + (f"\nYou are speaking with: {user_name}" if user_name else "")
1468
+ + f"\n\nCurrent time: {ts}"
1469
+ + f"\nSentinel status: {'⏸ PAUSED' if paused else '▶ RUNNING'}"
1470
+ + f"\nManaged repos: {', '.join(repos) if repos else '(none configured)'}"
1471
+ + (f"\nLog sources: {', '.join(log_sources)}" if log_sources else "")
1472
+ + (f"\nKnown projects in workspace: {', '.join(known_projects)}" if known_projects else "")
1473
+ )
1474
+
1475
+ # Build user content — include attachment blocks if any
1476
+ attach_blocks = _attachments_to_api_blocks(attachments or [])
1477
+ if attach_blocks:
1478
+ user_content = attach_blocks + [{"type": "text", "text": message}]
1479
+ else:
1480
+ user_content = message
1481
+ history.append({"role": "user", "content": user_content})
1482
+ messages = list(history)
1483
+
1484
+ while True:
1485
+ response = client.messages.create(
1486
+ model="claude-opus-4-6",
1487
+ max_tokens=1024,
1488
+ system=system,
1489
+ tools=_TOOLS,
1490
+ messages=messages,
1491
+ )
1492
+
1493
+ text_parts = []
1494
+ tool_blocks = []
1495
+ for block in response.content:
1496
+ if block.type == "text":
1497
+ text_parts.append(block.text)
1498
+ elif block.type == "tool_use":
1499
+ tool_blocks.append(block)
1500
+
1501
+ if not tool_blocks:
1502
+ reply = " ".join(text_parts).strip()
1503
+ is_done = "[DONE]" in reply
1504
+ reply = reply.replace("[DONE]", "").strip()
1505
+ if not reply:
1506
+ greeting = f"Hi {user_name}! " if user_name else "Hi! "
1507
+ reply = f"{greeting}I'm Sentinel, your autonomous DevOps agent. How can I help you?"
1508
+ history.append({"role": "assistant", "content": response.content})
1509
+ return reply, is_done
1510
+
1511
+ messages.append({"role": "assistant", "content": response.content})
1512
+ tool_results = []
1513
+ for tc in tool_blocks:
1514
+ result = await _run_tool(tc.name, tc.input, cfg_loader, store, slack_client=slack_client, user_id=user_id)
1515
+ logger.info("Boss tool: %s(%s) → %s", tc.name, tc.input, result[:120])
1516
+ tool_results.append({
1517
+ "type": "tool_result",
1518
+ "tool_use_id": tc.id,
1519
+ "content": result,
1520
+ })
1521
+ messages.append({"role": "user", "content": tool_results})
1522
+
1523
+
1524
+ # ── Main entry point ──────────────────────────────────────────────────────────
1525
+
1526
+ async def handle_message(
1527
+ message: str,
1528
+ history: list,
1529
+ cfg_loader,
1530
+ store,
1531
+ slack_client=None,
1532
+ user_name: str = "",
1533
+ user_id: str = "",
1534
+ attachments: list | None = None,
1535
+ ) -> tuple[str, bool]:
1536
+ """
1537
+ Process one user message through the Sentinel Boss (Claude with tool use).
1538
+
1539
+ Priority:
1540
+ 1. Claude Pro / OAuth via `claude --print` (CLI path — no API key needed)
1541
+ 2. ANTHROPIC_API_KEY fallback (structured tools, full agentic loop)
1542
+
1543
+ Returns:
1544
+ (reply_text, is_done)
1545
+ is_done=True → session complete, release the Slack queue slot.
1546
+ is_done=False → waiting for user follow-up, keep the slot.
1547
+ """
1548
+ # 1st priority: Claude Pro / OAuth via CLI
1549
+ cli_reply, cli_done = await _handle_with_cli(
1550
+ message, history, cfg_loader, store, slack_client=slack_client, user_name=user_name,
1551
+ user_id=user_id, attachments=attachments,
1552
+ )
1553
+ if not cli_reply.startswith(":warning:"):
1554
+ return cli_reply, cli_done
1555
+
1556
+ # CLI failed — try ANTHROPIC_API_KEY fallback
1557
+ try:
1558
+ import anthropic # noqa: F401
1559
+ except ImportError:
1560
+ return (
1561
+ ":warning: `anthropic` package not installed. Run: `pip install anthropic`",
1562
+ True,
1563
+ )
1564
+
1565
+ api_key = cfg_loader.sentinel.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
1566
+ if not api_key:
1567
+ return cli_reply, cli_done # No fallback available
1568
+
1569
+ logger.info("Boss: CLI path failed (%s…), falling back to ANTHROPIC_API_KEY", cli_reply[:60])
1570
+ return await _handle_with_api(
1571
+ message, history, cfg_loader, store, slack_client=slack_client, user_name=user_name,
1572
+ user_id=user_id, attachments=attachments,
1573
+ )