@misterhuydo/sentinel 1.5.52 → 1.5.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.cairn/.hint-lock CHANGED
@@ -1 +1 @@
1
- 2026-04-20T17:18:46.660Z
1
+ 2026-04-21T05:28:47.362Z
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-04-20T17:16:42.377Z",
3
- "checkpoint_at": "2026-04-20T17:16:42.404Z",
2
+ "message": "Auto-checkpoint at 2026-04-21T05:29:15.578Z",
3
+ "checkpoint_at": "2026-04-21T05:29:15.579Z",
4
4
  "active_files": [
5
5
  "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
6
6
  "J:\\Projects\\Sentinel\\cli\\lib\\test.js",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.5.52",
3
+ "version": "1.5.53",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -1 +1 @@
1
- __version__ = "1.5.52"
1
+ __version__ = "1.5.53"
@@ -1730,6 +1730,107 @@ async def _patch_soak_monitor(cfg_loader: ConfigLoader) -> None:
1730
1730
  logger.info("Patch soak complete — hash=%s clean=True auto_publish=%s", patch_hash[:8], auto_publish)
1731
1731
 
1732
1732
 
1733
+ async def _execute_monitor(monitor: dict, cfg_loader: ConfigLoader, store: StateStore) -> None:
1734
+ """Execute one monitor run: call all steps, post combined output to Slack."""
1735
+ import json as _json
1736
+ from datetime import datetime, timezone, timedelta
1737
+ from .sentinel_boss import _run_tool, _format_duration
1738
+
1739
+ mon_id = monitor["id"]
1740
+ channel = monitor.get("channel", "")
1741
+ user_id = monitor.get("user_id", "")
1742
+ steps = _json.loads(monitor.get("steps_json") or "[]")
1743
+ interval_s = int(monitor.get("interval_seconds", 300))
1744
+ stop_at = monitor.get("stop_at")
1745
+ max_runs = monitor.get("max_runs")
1746
+ runs_so_far = int(monitor.get("runs_so_far") or 0)
1747
+ mon_name = monitor.get("name") or " → ".join(s.get("tool", "") for s in steps)
1748
+
1749
+ cfg = cfg_loader.sentinel
1750
+
1751
+ # Build a minimal Slack client for posting
1752
+ slack_client = None
1753
+ if cfg.slack_bot_token and channel:
1754
+ try:
1755
+ from slack_sdk.web.async_client import AsyncWebClient as _AsyncWebClient
1756
+ slack_client = _AsyncWebClient(token=cfg.slack_bot_token)
1757
+ except Exception:
1758
+ pass
1759
+
1760
+ # Run each step
1761
+ outputs: list[str] = []
1762
+ for step in steps:
1763
+ tool = step.get("tool", "")
1764
+ inputs = step.get("inputs", {})
1765
+ try:
1766
+ result = await asyncio.wait_for(
1767
+ _run_tool(tool, inputs, cfg_loader, store,
1768
+ slack_client=slack_client, user_id=user_id, channel=channel),
1769
+ timeout=300,
1770
+ )
1771
+ outputs.append(result)
1772
+ except asyncio.TimeoutError:
1773
+ outputs.append(f"_Step `{tool}` timed out after 5 minutes._")
1774
+ except Exception as e:
1775
+ outputs.append(f"_Step `{tool}` error: {e}_")
1776
+
1777
+ runs_after = runs_so_far + 1
1778
+ now = datetime.now(timezone.utc)
1779
+
1780
+ # Determine if this was the final run
1781
+ done = False
1782
+ if max_runs and runs_after >= int(max_runs):
1783
+ done = True
1784
+ if stop_at:
1785
+ try:
1786
+ stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
1787
+ next_dt = now + timedelta(seconds=interval_s)
1788
+ if next_dt >= stop_dt:
1789
+ done = True
1790
+ except Exception:
1791
+ pass
1792
+
1793
+ next_run_at = (now + timedelta(seconds=interval_s)).isoformat()
1794
+ store.mark_monitor_ran(mon_id, next_run_at, done=done)
1795
+
1796
+ if not slack_client or not channel:
1797
+ return
1798
+
1799
+ combined = "\n\n".join(o for o in outputs if o.strip())
1800
+ MAX_LEN = 3000
1801
+ if len(combined) > MAX_LEN:
1802
+ combined = combined[:MAX_LEN] + f"\n_(output truncated — {len(combined)} chars total)_"
1803
+
1804
+ header = f":repeat: *Monitor `{mon_id}`* ({mon_name}) — run #{runs_after}"
1805
+ if done:
1806
+ header += " _(final run)_"
1807
+
1808
+ text = f"{header}\n{combined}" if combined.strip() else f"{header}\n_no output_"
1809
+
1810
+ try:
1811
+ await slack_client.chat_postMessage(channel=channel, text=text)
1812
+ if done:
1813
+ await slack_client.chat_postMessage(
1814
+ channel=channel,
1815
+ text=f":checkered_flag: Monitor `{mon_id}` finished after {runs_after} run(s).",
1816
+ )
1817
+ except Exception as e:
1818
+ logger.warning("Monitor %s: failed to post result to Slack: %s", mon_id, e)
1819
+
1820
+
1821
+ async def _monitor_runner_loop(cfg_loader: ConfigLoader, store: StateStore) -> None:
1822
+ """Check every 30 s for due monitors and dispatch them."""
1823
+ await asyncio.sleep(15) # brief startup delay
1824
+ while True:
1825
+ try:
1826
+ due = store.get_due_monitors()
1827
+ for monitor in due:
1828
+ asyncio.ensure_future(_execute_monitor(monitor, cfg_loader, store))
1829
+ except Exception as e:
1830
+ logger.warning("Monitor runner: error checking due monitors: %s", e)
1831
+ await asyncio.sleep(30)
1832
+
1833
+
1733
1834
  async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
1734
1835
  interval = cfg_loader.sentinel.poll_interval_seconds
1735
1836
  logger.info("Sentinel starting — poll interval: %ds, repos: %s",
@@ -1774,6 +1875,7 @@ async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
1774
1875
  asyncio.ensure_future(_dev_poll_loop(cfg_loader, store))
1775
1876
  asyncio.ensure_future(_patch_soak_monitor(cfg_loader))
1776
1877
  asyncio.ensure_future(_repo_task_poll_loop(cfg_loader, store))
1878
+ asyncio.ensure_future(_monitor_runner_loop(cfg_loader, store))
1777
1879
 
1778
1880
  while True:
1779
1881
  try:
@@ -367,6 +367,19 @@ reply with a grouped summary like this:
367
367
  – The user explicitly says "STS is Whydah-SecurityTokenService" or "route STS to ..."
368
368
  After saving, Sentinel automatically reprocesses any pending routing for that service.
369
369
 
370
+ *Scheduled monitors*
371
+ • `start_monitor` — run a tool repeatedly at an interval, posting results to this channel.
372
+ Supported patterns:
373
+ – "every X min/hours/days until I say stop"
374
+ – "every X min for Y hours/days" → calculate stop_at = now + duration
375
+ – "every X min for N times" → set max_runs=N
376
+ – "every X min until <datetime>" → set stop_at
377
+ Minimum interval: 60 seconds. Allowed tools: fetch_logs, filter_logs, get_status,
378
+ ask_logs, list_recent_commits, check_health.
379
+ Always confirm to the user with the monitor ID and stop condition before creating.
380
+ • `stop_monitor` — cancel a monitor by ID, or pass "all" to cancel all in this channel
381
+ • `list_monitors` — show all active and recent monitors with their status and next-run time
382
+
370
383
  *File sharing*
371
384
  • `post_file` — upload any output as a Slack file (logs, diffs, reports)
372
385
 
@@ -1318,6 +1331,79 @@ _TOOLS = [
1318
1331
  "required": ["service_name", "repo_name"],
1319
1332
  },
1320
1333
  },
1334
+ {
1335
+ "name": "start_monitor",
1336
+ "description": (
1337
+ "Create a recurring scheduled task that runs a tool at a regular interval and posts "
1338
+ "results to this Slack channel. Supports: run indefinitely (until stopped), run for "
1339
+ "a fixed duration (stop_at), or run N times (max_runs). "
1340
+ "steps is a list of {tool, inputs} objects — most monitors are a single step. "
1341
+ "Allowed tools: fetch_logs, filter_logs, get_status, ask_logs, list_recent_commits, check_health. "
1342
+ "Boss calculates stop_at from phrases like 'within 2 hours' / 'for 30 minutes' using "
1343
+ "the current UTC time in the system prompt. "
1344
+ "Examples: 'fetch SSOLWA logs filtered by provision/phone every 5 min for 2 hours', "
1345
+ "'check STS health every 10 min until I say stop', 'get status every hour for 3 times'."
1346
+ ),
1347
+ "input_schema": {
1348
+ "type": "object",
1349
+ "properties": {
1350
+ "name": {
1351
+ "type": "string",
1352
+ "description": "Short label for this monitor (e.g. 'SSOLWA provision/phone')",
1353
+ },
1354
+ "steps": {
1355
+ "type": "array",
1356
+ "description": "Ordered list of tool calls to execute each interval",
1357
+ "items": {
1358
+ "type": "object",
1359
+ "properties": {
1360
+ "tool": {"type": "string"},
1361
+ "inputs": {"type": "object"},
1362
+ },
1363
+ "required": ["tool", "inputs"],
1364
+ },
1365
+ },
1366
+ "interval_seconds": {
1367
+ "type": "integer",
1368
+ "description": "How often to run in seconds (minimum 60)",
1369
+ },
1370
+ "stop_at": {
1371
+ "type": "string",
1372
+ "description": "ISO datetime (UTC) to stop, or null to run indefinitely",
1373
+ },
1374
+ "max_runs": {
1375
+ "type": "integer",
1376
+ "description": "Maximum number of runs, or null for unlimited",
1377
+ },
1378
+ },
1379
+ "required": ["steps", "interval_seconds"],
1380
+ },
1381
+ },
1382
+ {
1383
+ "name": "stop_monitor",
1384
+ "description": (
1385
+ "Cancel a running monitor by ID. Pass 'all' to cancel every active monitor in this channel. "
1386
+ "Use for: 'stop monitor m-abc123', 'stop all monitors', 'cancel the log watch'."
1387
+ ),
1388
+ "input_schema": {
1389
+ "type": "object",
1390
+ "properties": {
1391
+ "monitor_id": {
1392
+ "type": "string",
1393
+ "description": "Monitor ID (e.g. 'm-abc123') or 'all'",
1394
+ },
1395
+ },
1396
+ "required": ["monitor_id"],
1397
+ },
1398
+ },
1399
+ {
1400
+ "name": "list_monitors",
1401
+ "description": (
1402
+ "List active and recently completed scheduled monitors. "
1403
+ "Use for: 'what monitors are running?', 'show scheduled tasks', 'list active monitors'."
1404
+ ),
1405
+ "input_schema": {"type": "object", "properties": {}},
1406
+ },
1321
1407
  {
1322
1408
  "name": "upgrade_sentinel",
1323
1409
  "description": (
@@ -2118,6 +2204,20 @@ def _auto_health_check(source_hint: str, cfg_loader) -> dict | None:
2118
2204
  return None
2119
2205
 
2120
2206
 
2207
+ def _format_duration(seconds: int) -> str:
2208
+ """Convert seconds to a human-readable duration string."""
2209
+ if seconds < 60:
2210
+ return f"{seconds}s"
2211
+ if seconds < 3600:
2212
+ m = seconds // 60
2213
+ return f"{m} min" + ("s" if m != 1 else "")
2214
+ if seconds < 86400:
2215
+ h = seconds // 3600
2216
+ return f"{h} hour" + ("s" if h != 1 else "")
2217
+ d = seconds // 86400
2218
+ return f"{d} day" + ("s" if d != 1 else "")
2219
+
2220
+
2121
2221
  # ── Tool execution ────────────────────────────────────────────────────────────
2122
2222
 
2123
2223
  async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "", channel: str = "", is_admin: bool = False) -> str:
@@ -3367,6 +3467,108 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
3367
3467
  ),
3368
3468
  })
3369
3469
 
3470
+ if name == "start_monitor":
3471
+ steps = inputs.get("steps") or []
3472
+ interval_s = int(inputs.get("interval_seconds", 0))
3473
+ stop_at = inputs.get("stop_at") or None
3474
+ max_runs = inputs.get("max_runs") or None
3475
+ mon_name = (inputs.get("name") or "").strip()
3476
+
3477
+ if not steps:
3478
+ return json.dumps({"error": "steps is required — list at least one {tool, inputs} step"})
3479
+ if interval_s < 60:
3480
+ return json.dumps({"error": f"interval_seconds must be >= 60 (minimum 1 minute), got {interval_s}"})
3481
+
3482
+ _MONITOR_ALLOWED = {"fetch_logs", "filter_logs", "get_status", "ask_logs",
3483
+ "list_recent_commits", "check_health"}
3484
+ for _step in steps:
3485
+ _t = _step.get("tool", "")
3486
+ if _t not in _MONITOR_ALLOWED:
3487
+ return json.dumps({
3488
+ "error": f"Tool '{_t}' is not allowed in monitors.",
3489
+ "allowed": sorted(_MONITOR_ALLOWED),
3490
+ })
3491
+
3492
+ if stop_at:
3493
+ try:
3494
+ datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
3495
+ except ValueError:
3496
+ return json.dumps({"error": f"Invalid stop_at datetime: '{stop_at}'. Use ISO 8601 format."})
3497
+
3498
+ if max_runs is not None:
3499
+ max_runs = int(max_runs)
3500
+ if max_runs < 1:
3501
+ return json.dumps({"error": "max_runs must be >= 1"})
3502
+
3503
+ mon_id = "m-" + uuid.uuid4().hex[:6]
3504
+ steps_json = json.dumps(steps)
3505
+
3506
+ store.create_monitor(
3507
+ id=mon_id, name=mon_name, steps_json=steps_json,
3508
+ interval_seconds=interval_s, stop_at=stop_at, max_runs=max_runs,
3509
+ channel=channel, user_id=user_id or "",
3510
+ )
3511
+
3512
+ interval_str = _format_duration(interval_s)
3513
+ if stop_at:
3514
+ _stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
3515
+ stop_desc = f"until {_stop_dt.strftime('%Y-%m-%d %H:%M UTC')}"
3516
+ if max_runs:
3517
+ stop_desc += f" or {max_runs} run(s) — whichever comes first"
3518
+ elif max_runs:
3519
+ stop_desc = f"for {max_runs} run{'s' if max_runs > 1 else ''}"
3520
+ else:
3521
+ stop_desc = "until you say stop"
3522
+
3523
+ steps_desc = " → ".join(s["tool"] for s in steps)
3524
+ label = mon_name or steps_desc
3525
+ return json.dumps({
3526
+ "monitor_id": mon_id,
3527
+ "label": label,
3528
+ "interval": interval_str,
3529
+ "stop": stop_desc,
3530
+ "first_run_in": interval_str,
3531
+ "status": "active",
3532
+ "message": (
3533
+ f"Monitor `{mon_id}` created — running `{label}` every {interval_str} {stop_desc}. "
3534
+ f"First run in {interval_str}. Say `stop monitor {mon_id}` to cancel."
3535
+ ),
3536
+ })
3537
+
3538
+ if name == "stop_monitor":
3539
+ mon_id = (inputs.get("monitor_id") or "").strip()
3540
+ if not mon_id:
3541
+ return json.dumps({"error": "monitor_id is required"})
3542
+ if mon_id.lower() == "all":
3543
+ count = store.cancel_all_monitors(channel=channel)
3544
+ return json.dumps({"cancelled": count, "message": f"Cancelled {count} active monitor(s)."})
3545
+ ok = store.cancel_monitor(mon_id)
3546
+ if ok:
3547
+ return json.dumps({"status": "cancelled", "monitor_id": mon_id})
3548
+ return json.dumps({"error": f"Monitor '{mon_id}' not found or already stopped."})
3549
+
3550
+ if name == "list_monitors":
3551
+ monitors = store.list_all_monitors()
3552
+ if not monitors:
3553
+ return json.dumps({"monitors": [], "message": "No monitors found."})
3554
+ result = []
3555
+ for _m in monitors:
3556
+ _runs_left = None
3557
+ if _m.get("max_runs"):
3558
+ _runs_left = _m["max_runs"] - _m["runs_so_far"]
3559
+ result.append({
3560
+ "id": _m["id"],
3561
+ "name": _m.get("name") or "",
3562
+ "status": _m["status"],
3563
+ "interval": _format_duration(_m["interval_seconds"]),
3564
+ "runs_so_far": _m["runs_so_far"],
3565
+ "runs_left": _runs_left,
3566
+ "next_run_at": _m.get("next_run_at") or "",
3567
+ "stop_at": _m.get("stop_at") or "",
3568
+ "steps": json.loads(_m.get("steps_json") or "[]"),
3569
+ })
3570
+ return json.dumps({"monitors": result})
3571
+
3370
3572
  if name == "upgrade_sentinel":
3371
3573
  if not is_admin:
3372
3574
  return json.dumps({"error": "upgrade is admin-only. Ask a Sentinel admin to perform the upgrade."})
@@ -433,6 +433,108 @@ class StateStore:
433
433
  rows = conn.execute("SELECT * FROM service_aliases ORDER BY service_name").fetchall()
434
434
  return [dict(r) for r in rows]
435
435
 
436
+ # ── Scheduled monitors ────────────────────────────────────────────────────
437
+
438
+ def _ensure_monitors_table(self, conn):
439
+ conn.execute(
440
+ "CREATE TABLE IF NOT EXISTS monitors ("
441
+ "id TEXT PRIMARY KEY, "
442
+ "name TEXT, "
443
+ "steps_json TEXT, "
444
+ "interval_seconds INTEGER, "
445
+ "stop_at TEXT, "
446
+ "max_runs INTEGER, "
447
+ "runs_so_far INTEGER DEFAULT 0, "
448
+ "last_run_at TEXT, "
449
+ "next_run_at TEXT, "
450
+ "channel TEXT, "
451
+ "user_id TEXT, "
452
+ "status TEXT DEFAULT 'active', "
453
+ "created_at TEXT)"
454
+ )
455
+
456
+ def create_monitor(self, id: str, name: str, steps_json: str,
457
+ interval_seconds: int, stop_at, max_runs,
458
+ channel: str, user_id: str) -> None:
459
+ from datetime import datetime, timezone, timedelta
460
+ next_run = (datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)).isoformat()
461
+ with self._conn() as conn:
462
+ self._ensure_monitors_table(conn)
463
+ conn.execute(
464
+ "INSERT INTO monitors (id, name, steps_json, interval_seconds, stop_at, "
465
+ "max_runs, runs_so_far, last_run_at, next_run_at, channel, user_id, status, created_at) "
466
+ "VALUES (?, ?, ?, ?, ?, ?, 0, NULL, ?, ?, ?, 'active', ?)",
467
+ (id, name, steps_json, interval_seconds, stop_at, max_runs,
468
+ next_run, channel, user_id, _now()),
469
+ )
470
+
471
+ def get_due_monitors(self) -> list[dict]:
472
+ from datetime import datetime, timezone
473
+ now = datetime.now(timezone.utc).isoformat()
474
+ with self._conn() as conn:
475
+ self._ensure_monitors_table(conn)
476
+ rows = conn.execute(
477
+ "SELECT * FROM monitors WHERE status = 'active' AND next_run_at <= ?",
478
+ (now,),
479
+ ).fetchall()
480
+ return [dict(r) for r in rows]
481
+
482
+ def get_monitor(self, id: str) -> dict | None:
483
+ with self._conn() as conn:
484
+ self._ensure_monitors_table(conn)
485
+ row = conn.execute("SELECT * FROM monitors WHERE id = ?", (id,)).fetchone()
486
+ return dict(row) if row else None
487
+
488
+ def list_active_monitors(self) -> list[dict]:
489
+ with self._conn() as conn:
490
+ self._ensure_monitors_table(conn)
491
+ rows = conn.execute(
492
+ "SELECT * FROM monitors WHERE status = 'active' ORDER BY created_at DESC"
493
+ ).fetchall()
494
+ return [dict(r) for r in rows]
495
+
496
+ def list_all_monitors(self) -> list[dict]:
497
+ with self._conn() as conn:
498
+ self._ensure_monitors_table(conn)
499
+ rows = conn.execute(
500
+ "SELECT * FROM monitors ORDER BY created_at DESC LIMIT 50"
501
+ ).fetchall()
502
+ return [dict(r) for r in rows]
503
+
504
+ def mark_monitor_ran(self, id: str, next_run_at: str, done: bool = False) -> None:
505
+ status = "done" if done else "active"
506
+ with self._conn() as conn:
507
+ self._ensure_monitors_table(conn)
508
+ conn.execute(
509
+ "UPDATE monitors SET runs_so_far = runs_so_far + 1, last_run_at = ?, "
510
+ "next_run_at = ?, status = ? WHERE id = ?",
511
+ (_now(), next_run_at, status, id),
512
+ )
513
+
514
+ def cancel_monitor(self, id: str) -> bool:
515
+ with self._conn() as conn:
516
+ self._ensure_monitors_table(conn)
517
+ cur = conn.execute(
518
+ "UPDATE monitors SET status = 'cancelled' WHERE id = ? AND status = 'active'",
519
+ (id,),
520
+ )
521
+ return cur.rowcount > 0
522
+
523
+ def cancel_all_monitors(self, channel: str = "") -> int:
524
+ with self._conn() as conn:
525
+ self._ensure_monitors_table(conn)
526
+ if channel:
527
+ cur = conn.execute(
528
+ "UPDATE monitors SET status = 'cancelled' "
529
+ "WHERE status = 'active' AND channel = ?",
530
+ (channel,),
531
+ )
532
+ else:
533
+ cur = conn.execute(
534
+ "UPDATE monitors SET status = 'cancelled' WHERE status = 'active'"
535
+ )
536
+ return cur.rowcount
537
+
436
538
  # ── Pending bot-message routing questions ─────────────────────────────────
437
539
 
438
540
  def _ensure_pending_routings_table(self, conn):