npm - @misterhuydo/sentinel - Versions diffs - 1.5.52 → 1.5.53 - Mend

@misterhuydo/sentinel 1.5.52 → 1.5.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.cairn/.hint-lock +1 -1
package/.cairn/session.json +2 -2
package/package.json +1 -1
package/python/sentinel/__init__.py +1 -1
package/python/sentinel/main.py +102 -0
package/python/sentinel/sentinel_boss.py +202 -0
package/python/sentinel/state_store.py +102 -0

package/.cairn/.hint-lock CHANGED Viewed

	@@ -1 +1 @@
1	- 2026-04-~~20T17~~:18:46.~~660Z~~
1	+ 2026-04-21T05:28:47.362Z

package/.cairn/session.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "message": "Auto-checkpoint at 2026-04-20T17:16:42.377Z",
-  "checkpoint_at": "2026-04-20T17:16:42.404Z",
+  "message": "Auto-checkpoint at 2026-04-21T05:29:15.578Z",
+  "checkpoint_at": "2026-04-21T05:29:15.579Z",
   "active_files": [
     "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
     "J:\\Projects\\Sentinel\\cli\\lib\\test.js",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@misterhuydo/sentinel",
-  "version": "1.5.52",
+  "version": "1.5.53",
   "description": "Sentinel — Autonomous DevOps Agent installer and manager",
   "bin": {
     "sentinel": "./bin/sentinel.js"

package/python/sentinel/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.5.52"
1	+ __version__ = "1.5.53"

package/python/sentinel/main.py CHANGED Viewed

@@ -1730,6 +1730,107 @@ async def _patch_soak_monitor(cfg_loader: ConfigLoader) -> None:
     logger.info("Patch soak complete — hash=%s clean=True auto_publish=%s", patch_hash[:8], auto_publish)
+async def _execute_monitor(monitor: dict, cfg_loader: ConfigLoader, store: StateStore) -> None:
+    """Execute one monitor run: call all steps, post combined output to Slack."""
+    import json as _json
+    from datetime import datetime, timezone, timedelta
+    from .sentinel_boss import _run_tool, _format_duration
+    mon_id          = monitor["id"]
+    channel         = monitor.get("channel", "")
+    user_id         = monitor.get("user_id", "")
+    steps           = _json.loads(monitor.get("steps_json") or "[]")
+    interval_s      = int(monitor.get("interval_seconds", 300))
+    stop_at         = monitor.get("stop_at")
+    max_runs        = monitor.get("max_runs")
+    runs_so_far     = int(monitor.get("runs_so_far") or 0)
+    mon_name        = monitor.get("name") or " → ".join(s.get("tool", "") for s in steps)
+    cfg = cfg_loader.sentinel
+    # Build a minimal Slack client for posting
+    slack_client = None
+    if cfg.slack_bot_token and channel:
+        try:
+            from slack_sdk.web.async_client import AsyncWebClient as _AsyncWebClient
+            slack_client = _AsyncWebClient(token=cfg.slack_bot_token)
+        except Exception:
+            pass
+    # Run each step
+    outputs: list[str] = []
+    for step in steps:
+        tool   = step.get("tool", "")
+        inputs = step.get("inputs", {})
+        try:
+            result = await asyncio.wait_for(
+                _run_tool(tool, inputs, cfg_loader, store,
+                          slack_client=slack_client, user_id=user_id, channel=channel),
+                timeout=300,
+            )
+            outputs.append(result)
+        except asyncio.TimeoutError:
+            outputs.append(f"_Step `{tool}` timed out after 5 minutes._")
+        except Exception as e:
+            outputs.append(f"_Step `{tool}` error: {e}_")
+    runs_after = runs_so_far + 1
+    now        = datetime.now(timezone.utc)
+    # Determine if this was the final run
+    done = False
+    if max_runs and runs_after >= int(max_runs):
+        done = True
+    if stop_at:
+        try:
+            stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
+            next_dt = now + timedelta(seconds=interval_s)
+            if next_dt >= stop_dt:
+                done = True
+        except Exception:
+            pass
+    next_run_at = (now + timedelta(seconds=interval_s)).isoformat()
+    store.mark_monitor_ran(mon_id, next_run_at, done=done)
+    if not slack_client or not channel:
+        return
+    combined = "\n\n".join(o for o in outputs if o.strip())
+    MAX_LEN  = 3000
+    if len(combined) > MAX_LEN:
+        combined = combined[:MAX_LEN] + f"\n_(output truncated — {len(combined)} chars total)_"
+    header = f":repeat: *Monitor `{mon_id}`* ({mon_name}) — run #{runs_after}"
+    if done:
+        header += " _(final run)_"
+    text = f"{header}\n{combined}" if combined.strip() else f"{header}\n_no output_"
+    try:
+        await slack_client.chat_postMessage(channel=channel, text=text)
+        if done:
+            await slack_client.chat_postMessage(
+                channel=channel,
+                text=f":checkered_flag: Monitor `{mon_id}` finished after {runs_after} run(s).",
+            )
+    except Exception as e:
+        logger.warning("Monitor %s: failed to post result to Slack: %s", mon_id, e)
+async def _monitor_runner_loop(cfg_loader: ConfigLoader, store: StateStore) -> None:
+    """Check every 30 s for due monitors and dispatch them."""
+    await asyncio.sleep(15)   # brief startup delay
+    while True:
+        try:
+            due = store.get_due_monitors()
+            for monitor in due:
+                asyncio.ensure_future(_execute_monitor(monitor, cfg_loader, store))
+        except Exception as e:
+            logger.warning("Monitor runner: error checking due monitors: %s", e)
+        await asyncio.sleep(30)
 async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
     interval = cfg_loader.sentinel.poll_interval_seconds
     logger.info("Sentinel starting — poll interval: %ds, repos: %s",
@@ -1774,6 +1875,7 @@ async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
         asyncio.ensure_future(_dev_poll_loop(cfg_loader, store))
         asyncio.ensure_future(_patch_soak_monitor(cfg_loader))
     asyncio.ensure_future(_repo_task_poll_loop(cfg_loader, store))
+    asyncio.ensure_future(_monitor_runner_loop(cfg_loader, store))
     while True:
         try:

package/python/sentinel/sentinel_boss.py CHANGED Viewed

@@ -367,6 +367,19 @@ reply with a grouped summary like this:
     – The user explicitly says "STS is Whydah-SecurityTokenService" or "route STS to ..."
   After saving, Sentinel automatically reprocesses any pending routing for that service.
+*Scheduled monitors*
+• `start_monitor` — run a tool repeatedly at an interval, posting results to this channel.
+  Supported patterns:
+    – "every X min/hours/days until I say stop"
+    – "every X min for Y hours/days" → calculate stop_at = now + duration
+    – "every X min for N times" → set max_runs=N
+    – "every X min until <datetime>" → set stop_at
+  Minimum interval: 60 seconds. Allowed tools: fetch_logs, filter_logs, get_status,
+  ask_logs, list_recent_commits, check_health.
+  Always confirm to the user with the monitor ID and stop condition before creating.
+• `stop_monitor` — cancel a monitor by ID, or pass "all" to cancel all in this channel
+• `list_monitors` — show all active and recent monitors with their status and next-run time
 *File sharing*
 • `post_file` — upload any output as a Slack file (logs, diffs, reports)
@@ -1318,6 +1331,79 @@ _TOOLS = [
             "required": ["service_name", "repo_name"],
         },
     },
+    {
+        "name": "start_monitor",
+        "description": (
+            "Create a recurring scheduled task that runs a tool at a regular interval and posts "
+            "results to this Slack channel. Supports: run indefinitely (until stopped), run for "
+            "a fixed duration (stop_at), or run N times (max_runs). "
+            "steps is a list of {tool, inputs} objects — most monitors are a single step. "
+            "Allowed tools: fetch_logs, filter_logs, get_status, ask_logs, list_recent_commits, check_health. "
+            "Boss calculates stop_at from phrases like 'within 2 hours' / 'for 30 minutes' using "
+            "the current UTC time in the system prompt. "
+            "Examples: 'fetch SSOLWA logs filtered by provision/phone every 5 min for 2 hours', "
+            "'check STS health every 10 min until I say stop', 'get status every hour for 3 times'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "description": "Short label for this monitor (e.g. 'SSOLWA provision/phone')",
+                },
+                "steps": {
+                    "type": "array",
+                    "description": "Ordered list of tool calls to execute each interval",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "tool":   {"type": "string"},
+                            "inputs": {"type": "object"},
+                        },
+                        "required": ["tool", "inputs"],
+                    },
+                },
+                "interval_seconds": {
+                    "type": "integer",
+                    "description": "How often to run in seconds (minimum 60)",
+                },
+                "stop_at": {
+                    "type": "string",
+                    "description": "ISO datetime (UTC) to stop, or null to run indefinitely",
+                },
+                "max_runs": {
+                    "type": "integer",
+                    "description": "Maximum number of runs, or null for unlimited",
+                },
+            },
+            "required": ["steps", "interval_seconds"],
+        },
+    },
+    {
+        "name": "stop_monitor",
+        "description": (
+            "Cancel a running monitor by ID. Pass 'all' to cancel every active monitor in this channel. "
+            "Use for: 'stop monitor m-abc123', 'stop all monitors', 'cancel the log watch'."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "monitor_id": {
+                    "type": "string",
+                    "description": "Monitor ID (e.g. 'm-abc123') or 'all'",
+                },
+            },
+            "required": ["monitor_id"],
+        },
+    },
+    {
+        "name": "list_monitors",
+        "description": (
+            "List active and recently completed scheduled monitors. "
+            "Use for: 'what monitors are running?', 'show scheduled tasks', 'list active monitors'."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
     {
         "name": "upgrade_sentinel",
         "description": (
@@ -2118,6 +2204,20 @@ def _auto_health_check(source_hint: str, cfg_loader) -> dict | None:
         return None
+def _format_duration(seconds: int) -> str:
+    """Convert seconds to a human-readable duration string."""
+    if seconds < 60:
+        return f"{seconds}s"
+    if seconds < 3600:
+        m = seconds // 60
+        return f"{m} min" + ("s" if m != 1 else "")
+    if seconds < 86400:
+        h = seconds // 3600
+        return f"{h} hour" + ("s" if h != 1 else "")
+    d = seconds // 86400
+    return f"{d} day" + ("s" if d != 1 else "")
 # ── Tool execution ────────────────────────────────────────────────────────────
 async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "", channel: str = "", is_admin: bool = False) -> str:
@@ -3367,6 +3467,108 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
             ),
         })
+    if name == "start_monitor":
+        steps       = inputs.get("steps") or []
+        interval_s  = int(inputs.get("interval_seconds", 0))
+        stop_at     = inputs.get("stop_at") or None
+        max_runs    = inputs.get("max_runs") or None
+        mon_name    = (inputs.get("name") or "").strip()
+        if not steps:
+            return json.dumps({"error": "steps is required — list at least one {tool, inputs} step"})
+        if interval_s < 60:
+            return json.dumps({"error": f"interval_seconds must be >= 60 (minimum 1 minute), got {interval_s}"})
+        _MONITOR_ALLOWED = {"fetch_logs", "filter_logs", "get_status", "ask_logs",
+                            "list_recent_commits", "check_health"}
+        for _step in steps:
+            _t = _step.get("tool", "")
+            if _t not in _MONITOR_ALLOWED:
+                return json.dumps({
+                    "error": f"Tool '{_t}' is not allowed in monitors.",
+                    "allowed": sorted(_MONITOR_ALLOWED),
+                })
+        if stop_at:
+            try:
+                datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
+            except ValueError:
+                return json.dumps({"error": f"Invalid stop_at datetime: '{stop_at}'. Use ISO 8601 format."})
+        if max_runs is not None:
+            max_runs = int(max_runs)
+            if max_runs < 1:
+                return json.dumps({"error": "max_runs must be >= 1"})
+        mon_id     = "m-" + uuid.uuid4().hex[:6]
+        steps_json = json.dumps(steps)
+        store.create_monitor(
+            id=mon_id, name=mon_name, steps_json=steps_json,
+            interval_seconds=interval_s, stop_at=stop_at, max_runs=max_runs,
+            channel=channel, user_id=user_id or "",
+        )
+        interval_str = _format_duration(interval_s)
+        if stop_at:
+            _stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
+            stop_desc = f"until {_stop_dt.strftime('%Y-%m-%d %H:%M UTC')}"
+            if max_runs:
+                stop_desc += f" or {max_runs} run(s) — whichever comes first"
+        elif max_runs:
+            stop_desc = f"for {max_runs} run{'s' if max_runs > 1 else ''}"
+        else:
+            stop_desc = "until you say stop"
+        steps_desc = " → ".join(s["tool"] for s in steps)
+        label = mon_name or steps_desc
+        return json.dumps({
+            "monitor_id":   mon_id,
+            "label":        label,
+            "interval":     interval_str,
+            "stop":         stop_desc,
+            "first_run_in": interval_str,
+            "status":       "active",
+            "message": (
+                f"Monitor `{mon_id}` created — running `{label}` every {interval_str} {stop_desc}. "
+                f"First run in {interval_str}. Say `stop monitor {mon_id}` to cancel."
+            ),
+        })
+    if name == "stop_monitor":
+        mon_id = (inputs.get("monitor_id") or "").strip()
+        if not mon_id:
+            return json.dumps({"error": "monitor_id is required"})
+        if mon_id.lower() == "all":
+            count = store.cancel_all_monitors(channel=channel)
+            return json.dumps({"cancelled": count, "message": f"Cancelled {count} active monitor(s)."})
+        ok = store.cancel_monitor(mon_id)
+        if ok:
+            return json.dumps({"status": "cancelled", "monitor_id": mon_id})
+        return json.dumps({"error": f"Monitor '{mon_id}' not found or already stopped."})
+    if name == "list_monitors":
+        monitors = store.list_all_monitors()
+        if not monitors:
+            return json.dumps({"monitors": [], "message": "No monitors found."})
+        result = []
+        for _m in monitors:
+            _runs_left = None
+            if _m.get("max_runs"):
+                _runs_left = _m["max_runs"] - _m["runs_so_far"]
+            result.append({
+                "id":          _m["id"],
+                "name":        _m.get("name") or "",
+                "status":      _m["status"],
+                "interval":    _format_duration(_m["interval_seconds"]),
+                "runs_so_far": _m["runs_so_far"],
+                "runs_left":   _runs_left,
+                "next_run_at": _m.get("next_run_at") or "",
+                "stop_at":     _m.get("stop_at") or "",
+                "steps":       json.loads(_m.get("steps_json") or "[]"),
+            })
+        return json.dumps({"monitors": result})
     if name == "upgrade_sentinel":
         if not is_admin:
             return json.dumps({"error": "upgrade is admin-only. Ask a Sentinel admin to perform the upgrade."})

package/python/sentinel/state_store.py CHANGED Viewed

@@ -433,6 +433,108 @@ class StateStore:
             rows = conn.execute("SELECT * FROM service_aliases ORDER BY service_name").fetchall()
             return [dict(r) for r in rows]
+    # ── Scheduled monitors ────────────────────────────────────────────────────
+    def _ensure_monitors_table(self, conn):
+        conn.execute(
+            "CREATE TABLE IF NOT EXISTS monitors ("
+            "id TEXT PRIMARY KEY, "
+            "name TEXT, "
+            "steps_json TEXT, "
+            "interval_seconds INTEGER, "
+            "stop_at TEXT, "
+            "max_runs INTEGER, "
+            "runs_so_far INTEGER DEFAULT 0, "
+            "last_run_at TEXT, "
+            "next_run_at TEXT, "
+            "channel TEXT, "
+            "user_id TEXT, "
+            "status TEXT DEFAULT 'active', "
+            "created_at TEXT)"
+        )
+    def create_monitor(self, id: str, name: str, steps_json: str,
+                       interval_seconds: int, stop_at, max_runs,
+                       channel: str, user_id: str) -> None:
+        from datetime import datetime, timezone, timedelta
+        next_run = (datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)).isoformat()
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            conn.execute(
+                "INSERT INTO monitors (id, name, steps_json, interval_seconds, stop_at, "
+                "max_runs, runs_so_far, last_run_at, next_run_at, channel, user_id, status, created_at) "
+                "VALUES (?, ?, ?, ?, ?, ?, 0, NULL, ?, ?, ?, 'active', ?)",
+                (id, name, steps_json, interval_seconds, stop_at, max_runs,
+                 next_run, channel, user_id, _now()),
+            )
+    def get_due_monitors(self) -> list[dict]:
+        from datetime import datetime, timezone
+        now = datetime.now(timezone.utc).isoformat()
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            rows = conn.execute(
+                "SELECT * FROM monitors WHERE status = 'active' AND next_run_at <= ?",
+                (now,),
+            ).fetchall()
+            return [dict(r) for r in rows]
+    def get_monitor(self, id: str) -> dict | None:
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            row = conn.execute("SELECT * FROM monitors WHERE id = ?", (id,)).fetchone()
+            return dict(row) if row else None
+    def list_active_monitors(self) -> list[dict]:
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            rows = conn.execute(
+                "SELECT * FROM monitors WHERE status = 'active' ORDER BY created_at DESC"
+            ).fetchall()
+            return [dict(r) for r in rows]
+    def list_all_monitors(self) -> list[dict]:
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            rows = conn.execute(
+                "SELECT * FROM monitors ORDER BY created_at DESC LIMIT 50"
+            ).fetchall()
+            return [dict(r) for r in rows]
+    def mark_monitor_ran(self, id: str, next_run_at: str, done: bool = False) -> None:
+        status = "done" if done else "active"
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            conn.execute(
+                "UPDATE monitors SET runs_so_far = runs_so_far + 1, last_run_at = ?, "
+                "next_run_at = ?, status = ? WHERE id = ?",
+                (_now(), next_run_at, status, id),
+            )
+    def cancel_monitor(self, id: str) -> bool:
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            cur = conn.execute(
+                "UPDATE monitors SET status = 'cancelled' WHERE id = ? AND status = 'active'",
+                (id,),
+            )
+            return cur.rowcount > 0
+    def cancel_all_monitors(self, channel: str = "") -> int:
+        with self._conn() as conn:
+            self._ensure_monitors_table(conn)
+            if channel:
+                cur = conn.execute(
+                    "UPDATE monitors SET status = 'cancelled' "
+                    "WHERE status = 'active' AND channel = ?",
+                    (channel,),
+                )
+            else:
+                cur = conn.execute(
+                    "UPDATE monitors SET status = 'cancelled' WHERE status = 'active'"
+                )
+            return cur.rowcount
     # ── Pending bot-message routing questions ─────────────────────────────────
     def _ensure_pending_routings_table(self, conn):