@misterhuydo/sentinel 1.5.52 → 1.5.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cairn/.hint-lock +1 -1
- package/.cairn/session.json +2 -2
- package/package.json +1 -1
- package/python/sentinel/__init__.py +1 -1
- package/python/sentinel/main.py +151 -0
- package/python/sentinel/sentinel_boss.py +249 -3
- package/python/sentinel/state_store.py +100 -0
package/.cairn/.hint-lock
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2026-04-
|
|
1
|
+
2026-04-21T05:59:33.396Z
|
package/.cairn/session.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"message": "Auto-checkpoint at 2026-04-
|
|
3
|
-
"checkpoint_at": "2026-04-
|
|
2
|
+
"message": "Auto-checkpoint at 2026-04-21T06:02:37.029Z",
|
|
3
|
+
"checkpoint_at": "2026-04-21T06:02:37.031Z",
|
|
4
4
|
"active_files": [
|
|
5
5
|
"J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
|
|
6
6
|
"J:\\Projects\\Sentinel\\cli\\lib\\test.js",
|
package/package.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.5.
|
|
1
|
+
__version__ = "1.5.54"
|
package/python/sentinel/main.py
CHANGED
|
@@ -1730,6 +1730,156 @@ async def _patch_soak_monitor(cfg_loader: ConfigLoader) -> None:
|
|
|
1730
1730
|
logger.info("Patch soak complete — hash=%s clean=True auto_publish=%s", patch_hash[:8], auto_publish)
|
|
1731
1731
|
|
|
1732
1732
|
|
|
1733
|
+
def _format_monitor_step_output(tool: str, raw: str) -> str | None:
|
|
1734
|
+
"""
|
|
1735
|
+
Convert a _run_tool JSON result into a human-readable Slack string.
|
|
1736
|
+
Returns None if there are no meaningful results to post (e.g. empty filter match).
|
|
1737
|
+
"""
|
|
1738
|
+
import json as _json
|
|
1739
|
+
try:
|
|
1740
|
+
data = _json.loads(raw)
|
|
1741
|
+
except Exception:
|
|
1742
|
+
return raw.strip() or None
|
|
1743
|
+
|
|
1744
|
+
# ── fetch_logs ────────────────────────────────────────────────────────────
|
|
1745
|
+
if tool == "fetch_logs":
|
|
1746
|
+
results = data.get("results", [])
|
|
1747
|
+
all_lines: list[str] = []
|
|
1748
|
+
for r in results:
|
|
1749
|
+
for line in (r.get("lines") or []):
|
|
1750
|
+
all_lines.append(line.strip())
|
|
1751
|
+
if not all_lines:
|
|
1752
|
+
return None # no matches — skip posting this cycle
|
|
1753
|
+
lines_text = "\n".join(all_lines[:200])
|
|
1754
|
+
if len(all_lines) > 200:
|
|
1755
|
+
lines_text += f"\n_…and {len(all_lines) - 200} more lines_"
|
|
1756
|
+
return f"```\n{lines_text}\n```"
|
|
1757
|
+
|
|
1758
|
+
# ── filter_logs ───────────────────────────────────────────────────────────
|
|
1759
|
+
if tool == "filter_logs":
|
|
1760
|
+
matches = data.get("matches") or data.get("results") or []
|
|
1761
|
+
if not matches:
|
|
1762
|
+
return None
|
|
1763
|
+
lines = [
|
|
1764
|
+
f"[{m.get('source','?')}:{m.get('file','?')}] {m.get('line', m)}"
|
|
1765
|
+
if isinstance(m, dict) else str(m)
|
|
1766
|
+
for m in matches[:200]
|
|
1767
|
+
]
|
|
1768
|
+
text = "\n".join(lines)
|
|
1769
|
+
if len(matches) > 200:
|
|
1770
|
+
text += f"\n_…and {len(matches) - 200} more_"
|
|
1771
|
+
return f"```\n{text}\n```"
|
|
1772
|
+
|
|
1773
|
+
# ── get_status / check_health / others — always show ─────────────────────
|
|
1774
|
+
if "error" in data:
|
|
1775
|
+
return f":warning: `{tool}` error: {data['error']}"
|
|
1776
|
+
# Generic: strip JSON, show as compact text
|
|
1777
|
+
return raw.strip()
|
|
1778
|
+
|
|
1779
|
+
|
|
1780
|
+
async def _execute_monitor(monitor: dict, cfg_loader: ConfigLoader, store: StateStore) -> None:
|
|
1781
|
+
"""Execute one monitor run: call all steps, post formatted output to Slack."""
|
|
1782
|
+
import json as _json
|
|
1783
|
+
from datetime import datetime, timezone, timedelta
|
|
1784
|
+
from .sentinel_boss import _run_tool, _format_duration
|
|
1785
|
+
|
|
1786
|
+
mon_id = monitor["id"]
|
|
1787
|
+
channel = monitor.get("channel", "")
|
|
1788
|
+
user_id = monitor.get("user_id", "")
|
|
1789
|
+
steps = _json.loads(monitor.get("steps_json") or "[]")
|
|
1790
|
+
interval_s = int(monitor.get("interval_seconds", 300))
|
|
1791
|
+
stop_at = monitor.get("stop_at")
|
|
1792
|
+
max_runs = monitor.get("max_runs")
|
|
1793
|
+
runs_so_far = int(monitor.get("runs_so_far") or 0)
|
|
1794
|
+
mon_name = monitor.get("name") or " → ".join(s.get("tool", "") for s in steps)
|
|
1795
|
+
|
|
1796
|
+
cfg = cfg_loader.sentinel
|
|
1797
|
+
|
|
1798
|
+
slack_client = None
|
|
1799
|
+
if cfg.slack_bot_token and channel:
|
|
1800
|
+
try:
|
|
1801
|
+
from slack_sdk.web.async_client import AsyncWebClient as _AsyncWebClient
|
|
1802
|
+
slack_client = _AsyncWebClient(token=cfg.slack_bot_token)
|
|
1803
|
+
except Exception:
|
|
1804
|
+
pass
|
|
1805
|
+
|
|
1806
|
+
# Run each step and collect formatted output
|
|
1807
|
+
formatted_parts: list[str] = []
|
|
1808
|
+
for step in steps:
|
|
1809
|
+
tool = step.get("tool", "")
|
|
1810
|
+
inputs = step.get("inputs", {})
|
|
1811
|
+
try:
|
|
1812
|
+
raw = await asyncio.wait_for(
|
|
1813
|
+
_run_tool(tool, inputs, cfg_loader, store,
|
|
1814
|
+
slack_client=slack_client, user_id=user_id, channel=channel),
|
|
1815
|
+
timeout=300,
|
|
1816
|
+
)
|
|
1817
|
+
part = _format_monitor_step_output(tool, raw)
|
|
1818
|
+
if part:
|
|
1819
|
+
formatted_parts.append(part)
|
|
1820
|
+
except asyncio.TimeoutError:
|
|
1821
|
+
formatted_parts.append(f":warning: `{tool}` timed out after 5 minutes.")
|
|
1822
|
+
except Exception as e:
|
|
1823
|
+
formatted_parts.append(f":warning: `{tool}` error: {e}")
|
|
1824
|
+
|
|
1825
|
+
runs_after = runs_so_far + 1
|
|
1826
|
+
now = datetime.now(timezone.utc)
|
|
1827
|
+
|
|
1828
|
+
# Determine if this was the final run
|
|
1829
|
+
done = False
|
|
1830
|
+
if max_runs and runs_after >= int(max_runs):
|
|
1831
|
+
done = True
|
|
1832
|
+
if stop_at:
|
|
1833
|
+
try:
|
|
1834
|
+
stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
|
|
1835
|
+
if now + timedelta(seconds=interval_s) >= stop_dt:
|
|
1836
|
+
done = True
|
|
1837
|
+
except Exception:
|
|
1838
|
+
pass
|
|
1839
|
+
|
|
1840
|
+
next_run_at = (now + timedelta(seconds=interval_s)).isoformat()
|
|
1841
|
+
store.mark_monitor_ran(mon_id, next_run_at, done=done)
|
|
1842
|
+
|
|
1843
|
+
if not slack_client or not channel:
|
|
1844
|
+
return
|
|
1845
|
+
|
|
1846
|
+
# Only post if there is something to show
|
|
1847
|
+
if formatted_parts:
|
|
1848
|
+
combined = "\n".join(formatted_parts)
|
|
1849
|
+
MAX_LEN = 3800
|
|
1850
|
+
if len(combined) > MAX_LEN:
|
|
1851
|
+
combined = combined[:MAX_LEN] + f"\n_…truncated ({len(combined)} chars total)_"
|
|
1852
|
+
header = f":repeat: *Monitor `{mon_id}`* ({mon_name}) — run #{runs_after}"
|
|
1853
|
+
if done:
|
|
1854
|
+
header += " _(final)_"
|
|
1855
|
+
try:
|
|
1856
|
+
await slack_client.chat_postMessage(channel=channel, text=f"{header}\n{combined}")
|
|
1857
|
+
except Exception as e:
|
|
1858
|
+
logger.warning("Monitor %s: Slack post failed: %s", mon_id, e)
|
|
1859
|
+
|
|
1860
|
+
if done:
|
|
1861
|
+
try:
|
|
1862
|
+
await slack_client.chat_postMessage(
|
|
1863
|
+
channel=channel,
|
|
1864
|
+
text=f":checkered_flag: Monitor `{mon_id}` ({mon_name}) finished after {runs_after} run(s).",
|
|
1865
|
+
)
|
|
1866
|
+
except Exception as e:
|
|
1867
|
+
logger.warning("Monitor %s: Slack done-post failed: %s", mon_id, e)
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
async def _monitor_runner_loop(cfg_loader: ConfigLoader, store: StateStore) -> None:
|
|
1871
|
+
"""Check every 30 s for due monitors and dispatch them."""
|
|
1872
|
+
await asyncio.sleep(15) # brief startup delay
|
|
1873
|
+
while True:
|
|
1874
|
+
try:
|
|
1875
|
+
due = store.get_due_monitors()
|
|
1876
|
+
for monitor in due:
|
|
1877
|
+
asyncio.ensure_future(_execute_monitor(monitor, cfg_loader, store))
|
|
1878
|
+
except Exception as e:
|
|
1879
|
+
logger.warning("Monitor runner: error checking due monitors: %s", e)
|
|
1880
|
+
await asyncio.sleep(30)
|
|
1881
|
+
|
|
1882
|
+
|
|
1733
1883
|
async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
|
|
1734
1884
|
interval = cfg_loader.sentinel.poll_interval_seconds
|
|
1735
1885
|
logger.info("Sentinel starting — poll interval: %ds, repos: %s",
|
|
@@ -1774,6 +1924,7 @@ async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
|
|
|
1774
1924
|
asyncio.ensure_future(_dev_poll_loop(cfg_loader, store))
|
|
1775
1925
|
asyncio.ensure_future(_patch_soak_monitor(cfg_loader))
|
|
1776
1926
|
asyncio.ensure_future(_repo_task_poll_loop(cfg_loader, store))
|
|
1927
|
+
asyncio.ensure_future(_monitor_runner_loop(cfg_loader, store))
|
|
1777
1928
|
|
|
1778
1929
|
while True:
|
|
1779
1930
|
try:
|
|
@@ -367,6 +367,19 @@ reply with a grouped summary like this:
|
|
|
367
367
|
– The user explicitly says "STS is Whydah-SecurityTokenService" or "route STS to ..."
|
|
368
368
|
After saving, Sentinel automatically reprocesses any pending routing for that service.
|
|
369
369
|
|
|
370
|
+
*Scheduled monitors*
|
|
371
|
+
• `start_monitor` — run a tool repeatedly at an interval, posting results to this channel.
|
|
372
|
+
Supported patterns:
|
|
373
|
+
– "every X min/hours/days until I say stop"
|
|
374
|
+
– "every X min for Y hours/days" → calculate stop_at = now + duration
|
|
375
|
+
– "every X min for N times" → set max_runs=N
|
|
376
|
+
– "every X min until <datetime>" → set stop_at
|
|
377
|
+
Minimum interval: 60 seconds. Allowed tools: fetch_logs, filter_logs, get_status,
|
|
378
|
+
ask_logs, list_recent_commits, check_health.
|
|
379
|
+
Always confirm to the user with the monitor ID and stop condition before creating.
|
|
380
|
+
• `stop_monitor` — cancel a monitor by ID, or pass "all" to cancel all in this channel
|
|
381
|
+
• `list_monitors` — show all active monitors
|
|
382
|
+
|
|
370
383
|
*File sharing*
|
|
371
384
|
• `post_file` — upload any output as a Slack file (logs, diffs, reports)
|
|
372
385
|
|
|
@@ -524,6 +537,15 @@ Response length:
|
|
|
524
537
|
- Status/health data: one line. Don't re-list every field from the JSON.
|
|
525
538
|
- Actions (fix, merge, release): brief confirmation of what happened.
|
|
526
539
|
|
|
540
|
+
Formatting — always use Slack code blocks (triple backticks) for:
|
|
541
|
+
- Log lines / log output
|
|
542
|
+
- JSON or structured data
|
|
543
|
+
- Stack traces
|
|
544
|
+
- Code snippets
|
|
545
|
+
- File diffs
|
|
546
|
+
- Any multi-line technical content
|
|
547
|
+
Never paste raw JSON or log text as plain prose.
|
|
548
|
+
|
|
527
549
|
When to act vs. when to ask:
|
|
528
550
|
- Any read/investigate tool → call immediately without asking permission.
|
|
529
551
|
Never say "Want me to check?" — just check and report results.
|
|
@@ -1318,6 +1340,79 @@ _TOOLS = [
|
|
|
1318
1340
|
"required": ["service_name", "repo_name"],
|
|
1319
1341
|
},
|
|
1320
1342
|
},
|
|
1343
|
+
{
|
|
1344
|
+
"name": "start_monitor",
|
|
1345
|
+
"description": (
|
|
1346
|
+
"Create a recurring scheduled task that runs a tool at a regular interval and posts "
|
|
1347
|
+
"results to this Slack channel. Supports: run indefinitely (until stopped), run for "
|
|
1348
|
+
"a fixed duration (stop_at), or run N times (max_runs). "
|
|
1349
|
+
"steps is a list of {tool, inputs} objects — most monitors are a single step. "
|
|
1350
|
+
"Allowed tools: fetch_logs, filter_logs, get_status, ask_logs, list_recent_commits, check_health. "
|
|
1351
|
+
"Boss calculates stop_at from phrases like 'within 2 hours' / 'for 30 minutes' using "
|
|
1352
|
+
"the current UTC time in the system prompt. "
|
|
1353
|
+
"Examples: 'fetch SSOLWA logs filtered by provision/phone every 5 min for 2 hours', "
|
|
1354
|
+
"'check STS health every 10 min until I say stop', 'get status every hour for 3 times'."
|
|
1355
|
+
),
|
|
1356
|
+
"input_schema": {
|
|
1357
|
+
"type": "object",
|
|
1358
|
+
"properties": {
|
|
1359
|
+
"name": {
|
|
1360
|
+
"type": "string",
|
|
1361
|
+
"description": "Short label for this monitor (e.g. 'SSOLWA provision/phone')",
|
|
1362
|
+
},
|
|
1363
|
+
"steps": {
|
|
1364
|
+
"type": "array",
|
|
1365
|
+
"description": "Ordered list of tool calls to execute each interval",
|
|
1366
|
+
"items": {
|
|
1367
|
+
"type": "object",
|
|
1368
|
+
"properties": {
|
|
1369
|
+
"tool": {"type": "string"},
|
|
1370
|
+
"inputs": {"type": "object"},
|
|
1371
|
+
},
|
|
1372
|
+
"required": ["tool", "inputs"],
|
|
1373
|
+
},
|
|
1374
|
+
},
|
|
1375
|
+
"interval_seconds": {
|
|
1376
|
+
"type": "integer",
|
|
1377
|
+
"description": "How often to run in seconds (minimum 60)",
|
|
1378
|
+
},
|
|
1379
|
+
"stop_at": {
|
|
1380
|
+
"type": "string",
|
|
1381
|
+
"description": "ISO datetime (UTC) to stop, or null to run indefinitely",
|
|
1382
|
+
},
|
|
1383
|
+
"max_runs": {
|
|
1384
|
+
"type": "integer",
|
|
1385
|
+
"description": "Maximum number of runs, or null for unlimited",
|
|
1386
|
+
},
|
|
1387
|
+
},
|
|
1388
|
+
"required": ["steps", "interval_seconds"],
|
|
1389
|
+
},
|
|
1390
|
+
},
|
|
1391
|
+
{
|
|
1392
|
+
"name": "stop_monitor",
|
|
1393
|
+
"description": (
|
|
1394
|
+
"Cancel a running monitor by ID. Pass 'all' to cancel every active monitor in this channel. "
|
|
1395
|
+
"Use for: 'stop monitor m-abc123', 'stop all monitors', 'cancel the log watch'."
|
|
1396
|
+
),
|
|
1397
|
+
"input_schema": {
|
|
1398
|
+
"type": "object",
|
|
1399
|
+
"properties": {
|
|
1400
|
+
"monitor_id": {
|
|
1401
|
+
"type": "string",
|
|
1402
|
+
"description": "Monitor ID (e.g. 'm-abc123') or 'all'",
|
|
1403
|
+
},
|
|
1404
|
+
},
|
|
1405
|
+
"required": ["monitor_id"],
|
|
1406
|
+
},
|
|
1407
|
+
},
|
|
1408
|
+
{
|
|
1409
|
+
"name": "list_monitors",
|
|
1410
|
+
"description": (
|
|
1411
|
+
"List all active scheduled monitors. "
|
|
1412
|
+
"Use for: 'what monitors are running?', 'show scheduled tasks', 'list active monitors'."
|
|
1413
|
+
),
|
|
1414
|
+
"input_schema": {"type": "object", "properties": {}},
|
|
1415
|
+
},
|
|
1321
1416
|
{
|
|
1322
1417
|
"name": "upgrade_sentinel",
|
|
1323
1418
|
"description": (
|
|
@@ -2048,6 +2143,41 @@ def _git_pull(path: Path) -> dict:
|
|
|
2048
2143
|
|
|
2049
2144
|
# ── Log-source name resolver ──────────────────────────────────────────────────
|
|
2050
2145
|
|
|
2146
|
+
def _resolve_source_hint(hint: str, cfg_loader, store=None) -> str:
|
|
2147
|
+
"""
|
|
2148
|
+
Translate a short alias (e.g. 'SSOLWA') to the canonical log-source / repo name
|
|
2149
|
+
so that _filter_log_sources and _auto_health_check can match it.
|
|
2150
|
+
|
|
2151
|
+
Resolution order:
|
|
2152
|
+
1. Already matches a log-source name → return as-is
|
|
2153
|
+
2. Matches a repo's SERVICE_ALIASES → return the repo name
|
|
2154
|
+
3. DB aliases → return the stored repo name
|
|
2155
|
+
4. Fallback → return hint unchanged
|
|
2156
|
+
"""
|
|
2157
|
+
if not hint:
|
|
2158
|
+
return hint
|
|
2159
|
+
hint_lower = hint.lower()
|
|
2160
|
+
|
|
2161
|
+
# 1. Direct log-source match — no translation needed
|
|
2162
|
+
for src_name in cfg_loader.log_sources:
|
|
2163
|
+
if hint_lower in src_name.lower():
|
|
2164
|
+
return hint
|
|
2165
|
+
|
|
2166
|
+
# 2. Config-declared SERVICE_ALIASES
|
|
2167
|
+
for repo_name, repo in cfg_loader.repos.items():
|
|
2168
|
+
declared = getattr(repo, "service_aliases", [])
|
|
2169
|
+
if any(hint_lower == a.lower() for a in declared):
|
|
2170
|
+
return repo_name
|
|
2171
|
+
|
|
2172
|
+
# 3. DB aliases
|
|
2173
|
+
if store:
|
|
2174
|
+
alias = store.get_service_alias(hint)
|
|
2175
|
+
if alias:
|
|
2176
|
+
return alias
|
|
2177
|
+
|
|
2178
|
+
return hint
|
|
2179
|
+
|
|
2180
|
+
|
|
2051
2181
|
def _filter_log_sources(props_files: list, source_hint: str) -> list:
|
|
2052
2182
|
"""
|
|
2053
2183
|
Return the subset of props_files whose log source matches source_hint.
|
|
@@ -2118,6 +2248,20 @@ def _auto_health_check(source_hint: str, cfg_loader) -> dict | None:
|
|
|
2118
2248
|
return None
|
|
2119
2249
|
|
|
2120
2250
|
|
|
2251
|
+
def _format_duration(seconds: int) -> str:
|
|
2252
|
+
"""Convert seconds to a human-readable duration string."""
|
|
2253
|
+
if seconds < 60:
|
|
2254
|
+
return f"{seconds}s"
|
|
2255
|
+
if seconds < 3600:
|
|
2256
|
+
m = seconds // 60
|
|
2257
|
+
return f"{m} min" + ("s" if m != 1 else "")
|
|
2258
|
+
if seconds < 86400:
|
|
2259
|
+
h = seconds // 3600
|
|
2260
|
+
return f"{h} hour" + ("s" if h != 1 else "")
|
|
2261
|
+
d = seconds // 86400
|
|
2262
|
+
return f"{d} day" + ("s" if d != 1 else "")
|
|
2263
|
+
|
|
2264
|
+
|
|
2121
2265
|
# ── Tool execution ────────────────────────────────────────────────────────────
|
|
2122
2266
|
|
|
2123
2267
|
async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=None, user_id: str = "", channel: str = "", is_admin: bool = False) -> str:
|
|
@@ -2756,7 +2900,7 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
|
|
|
2756
2900
|
|
|
2757
2901
|
if name == "search_logs":
|
|
2758
2902
|
query = inputs.get("query", "")
|
|
2759
|
-
source = inputs.get("source", "").lower()
|
|
2903
|
+
source = _resolve_source_hint(inputs.get("source", "").lower(), cfg_loader, store)
|
|
2760
2904
|
max_matches = int(inputs.get("max_matches", 30))
|
|
2761
2905
|
tail_override = inputs.get("tail")
|
|
2762
2906
|
|
|
@@ -2907,7 +3051,7 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
|
|
|
2907
3051
|
return line.strip()[:40]
|
|
2908
3052
|
|
|
2909
3053
|
query_f = inputs.get("query", "")
|
|
2910
|
-
source_f = inputs.get("source", "").lower()
|
|
3054
|
+
source_f = _resolve_source_hint(inputs.get("source", "").lower(), cfg_loader, store)
|
|
2911
3055
|
since_hours = inputs.get("since_hours")
|
|
2912
3056
|
max_matches = int(inputs.get("max_matches", 300))
|
|
2913
3057
|
case_flag = 0 if inputs.get("case_sensitive") else _re.IGNORECASE
|
|
@@ -3127,7 +3271,7 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
|
|
|
3127
3271
|
return json.dumps({"results": results})
|
|
3128
3272
|
|
|
3129
3273
|
if name == "fetch_logs":
|
|
3130
|
-
source_filter = inputs.get("source", "").lower()
|
|
3274
|
+
source_filter = _resolve_source_hint(inputs.get("source", "").lower(), cfg_loader, store)
|
|
3131
3275
|
debug = bool(inputs.get("debug", False))
|
|
3132
3276
|
tail_override = inputs.get("tail")
|
|
3133
3277
|
grep_override = inputs.get("grep_filter", "")
|
|
@@ -3367,6 +3511,108 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
|
|
|
3367
3511
|
),
|
|
3368
3512
|
})
|
|
3369
3513
|
|
|
3514
|
+
if name == "start_monitor":
|
|
3515
|
+
steps = inputs.get("steps") or []
|
|
3516
|
+
interval_s = int(inputs.get("interval_seconds", 0))
|
|
3517
|
+
stop_at = inputs.get("stop_at") or None
|
|
3518
|
+
max_runs = inputs.get("max_runs") or None
|
|
3519
|
+
mon_name = (inputs.get("name") or "").strip()
|
|
3520
|
+
|
|
3521
|
+
if not steps:
|
|
3522
|
+
return json.dumps({"error": "steps is required — list at least one {tool, inputs} step"})
|
|
3523
|
+
if interval_s < 60:
|
|
3524
|
+
return json.dumps({"error": f"interval_seconds must be >= 60 (minimum 1 minute), got {interval_s}"})
|
|
3525
|
+
|
|
3526
|
+
_MONITOR_ALLOWED = {"fetch_logs", "filter_logs", "get_status", "ask_logs",
|
|
3527
|
+
"list_recent_commits", "check_health"}
|
|
3528
|
+
for _step in steps:
|
|
3529
|
+
_t = _step.get("tool", "")
|
|
3530
|
+
if _t not in _MONITOR_ALLOWED:
|
|
3531
|
+
return json.dumps({
|
|
3532
|
+
"error": f"Tool '{_t}' is not allowed in monitors.",
|
|
3533
|
+
"allowed": sorted(_MONITOR_ALLOWED),
|
|
3534
|
+
})
|
|
3535
|
+
|
|
3536
|
+
if stop_at:
|
|
3537
|
+
try:
|
|
3538
|
+
datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
|
|
3539
|
+
except ValueError:
|
|
3540
|
+
return json.dumps({"error": f"Invalid stop_at datetime: '{stop_at}'. Use ISO 8601 format."})
|
|
3541
|
+
|
|
3542
|
+
if max_runs is not None:
|
|
3543
|
+
max_runs = int(max_runs)
|
|
3544
|
+
if max_runs < 1:
|
|
3545
|
+
return json.dumps({"error": "max_runs must be >= 1"})
|
|
3546
|
+
|
|
3547
|
+
mon_id = "m-" + uuid.uuid4().hex[:6]
|
|
3548
|
+
steps_json = json.dumps(steps)
|
|
3549
|
+
|
|
3550
|
+
store.create_monitor(
|
|
3551
|
+
id=mon_id, name=mon_name, steps_json=steps_json,
|
|
3552
|
+
interval_seconds=interval_s, stop_at=stop_at, max_runs=max_runs,
|
|
3553
|
+
channel=channel, user_id=user_id or "",
|
|
3554
|
+
)
|
|
3555
|
+
|
|
3556
|
+
interval_str = _format_duration(interval_s)
|
|
3557
|
+
if stop_at:
|
|
3558
|
+
_stop_dt = datetime.fromisoformat(stop_at.replace("Z", "+00:00"))
|
|
3559
|
+
stop_desc = f"until {_stop_dt.strftime('%Y-%m-%d %H:%M UTC')}"
|
|
3560
|
+
if max_runs:
|
|
3561
|
+
stop_desc += f" or {max_runs} run(s) — whichever comes first"
|
|
3562
|
+
elif max_runs:
|
|
3563
|
+
stop_desc = f"for {max_runs} run{'s' if max_runs > 1 else ''}"
|
|
3564
|
+
else:
|
|
3565
|
+
stop_desc = "until you say stop"
|
|
3566
|
+
|
|
3567
|
+
steps_desc = " → ".join(s["tool"] for s in steps)
|
|
3568
|
+
label = mon_name or steps_desc
|
|
3569
|
+
return json.dumps({
|
|
3570
|
+
"monitor_id": mon_id,
|
|
3571
|
+
"label": label,
|
|
3572
|
+
"interval": interval_str,
|
|
3573
|
+
"stop": stop_desc,
|
|
3574
|
+
"first_run_in": interval_str,
|
|
3575
|
+
"status": "active",
|
|
3576
|
+
"message": (
|
|
3577
|
+
f"Monitor `{mon_id}` created — running `{label}` every {interval_str} {stop_desc}. "
|
|
3578
|
+
f"First run in {interval_str}. Say `stop monitor {mon_id}` to cancel."
|
|
3579
|
+
),
|
|
3580
|
+
})
|
|
3581
|
+
|
|
3582
|
+
if name == "stop_monitor":
|
|
3583
|
+
mon_id = (inputs.get("monitor_id") or "").strip()
|
|
3584
|
+
if not mon_id:
|
|
3585
|
+
return json.dumps({"error": "monitor_id is required"})
|
|
3586
|
+
if mon_id.lower() == "all":
|
|
3587
|
+
count = store.cancel_all_monitors(channel=channel)
|
|
3588
|
+
return json.dumps({"cancelled": count, "message": f"Cancelled {count} active monitor(s)."})
|
|
3589
|
+
ok = store.cancel_monitor(mon_id)
|
|
3590
|
+
if ok:
|
|
3591
|
+
return json.dumps({"status": "cancelled", "monitor_id": mon_id})
|
|
3592
|
+
return json.dumps({"error": f"Monitor '{mon_id}' not found or already stopped."})
|
|
3593
|
+
|
|
3594
|
+
if name == "list_monitors":
|
|
3595
|
+
monitors = store.list_active_monitors()
|
|
3596
|
+
if not monitors:
|
|
3597
|
+
return json.dumps({"monitors": [], "message": "No active monitors."})
|
|
3598
|
+
result = []
|
|
3599
|
+
for _m in monitors:
|
|
3600
|
+
_runs_left = None
|
|
3601
|
+
if _m.get("max_runs"):
|
|
3602
|
+
_runs_left = _m["max_runs"] - _m["runs_so_far"]
|
|
3603
|
+
result.append({
|
|
3604
|
+
"id": _m["id"],
|
|
3605
|
+
"name": _m.get("name") or "",
|
|
3606
|
+
"status": _m["status"],
|
|
3607
|
+
"interval": _format_duration(_m["interval_seconds"]),
|
|
3608
|
+
"runs_so_far": _m["runs_so_far"],
|
|
3609
|
+
"runs_left": _runs_left,
|
|
3610
|
+
"next_run_at": _m.get("next_run_at") or "",
|
|
3611
|
+
"stop_at": _m.get("stop_at") or "",
|
|
3612
|
+
"steps": json.loads(_m.get("steps_json") or "[]"),
|
|
3613
|
+
})
|
|
3614
|
+
return json.dumps({"monitors": result})
|
|
3615
|
+
|
|
3370
3616
|
if name == "upgrade_sentinel":
|
|
3371
3617
|
if not is_admin:
|
|
3372
3618
|
return json.dumps({"error": "upgrade is admin-only. Ask a Sentinel admin to perform the upgrade."})
|
|
@@ -433,6 +433,106 @@ class StateStore:
|
|
|
433
433
|
rows = conn.execute("SELECT * FROM service_aliases ORDER BY service_name").fetchall()
|
|
434
434
|
return [dict(r) for r in rows]
|
|
435
435
|
|
|
436
|
+
# ── Scheduled monitors ────────────────────────────────────────────────────
|
|
437
|
+
|
|
438
|
+
def _ensure_monitors_table(self, conn):
|
|
439
|
+
conn.execute(
|
|
440
|
+
"CREATE TABLE IF NOT EXISTS monitors ("
|
|
441
|
+
"id TEXT PRIMARY KEY, "
|
|
442
|
+
"name TEXT, "
|
|
443
|
+
"steps_json TEXT, "
|
|
444
|
+
"interval_seconds INTEGER, "
|
|
445
|
+
"stop_at TEXT, "
|
|
446
|
+
"max_runs INTEGER, "
|
|
447
|
+
"runs_so_far INTEGER DEFAULT 0, "
|
|
448
|
+
"last_run_at TEXT, "
|
|
449
|
+
"next_run_at TEXT, "
|
|
450
|
+
"channel TEXT, "
|
|
451
|
+
"user_id TEXT, "
|
|
452
|
+
"status TEXT DEFAULT 'active', "
|
|
453
|
+
"created_at TEXT)"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
def create_monitor(self, id: str, name: str, steps_json: str,
|
|
457
|
+
interval_seconds: int, stop_at, max_runs,
|
|
458
|
+
channel: str, user_id: str) -> None:
|
|
459
|
+
from datetime import datetime, timezone, timedelta
|
|
460
|
+
next_run = (datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)).isoformat()
|
|
461
|
+
with self._conn() as conn:
|
|
462
|
+
self._ensure_monitors_table(conn)
|
|
463
|
+
conn.execute(
|
|
464
|
+
"INSERT INTO monitors (id, name, steps_json, interval_seconds, stop_at, "
|
|
465
|
+
"max_runs, runs_so_far, last_run_at, next_run_at, channel, user_id, status, created_at) "
|
|
466
|
+
"VALUES (?, ?, ?, ?, ?, ?, 0, NULL, ?, ?, ?, 'active', ?)",
|
|
467
|
+
(id, name, steps_json, interval_seconds, stop_at, max_runs,
|
|
468
|
+
next_run, channel, user_id, _now()),
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
def get_due_monitors(self) -> list[dict]:
|
|
472
|
+
from datetime import datetime, timezone
|
|
473
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
474
|
+
with self._conn() as conn:
|
|
475
|
+
self._ensure_monitors_table(conn)
|
|
476
|
+
rows = conn.execute(
|
|
477
|
+
"SELECT * FROM monitors WHERE status = 'active' AND next_run_at <= ?",
|
|
478
|
+
(now,),
|
|
479
|
+
).fetchall()
|
|
480
|
+
return [dict(r) for r in rows]
|
|
481
|
+
|
|
482
|
+
def get_monitor(self, id: str) -> dict | None:
|
|
483
|
+
with self._conn() as conn:
|
|
484
|
+
self._ensure_monitors_table(conn)
|
|
485
|
+
row = conn.execute("SELECT * FROM monitors WHERE id = ?", (id,)).fetchone()
|
|
486
|
+
return dict(row) if row else None
|
|
487
|
+
|
|
488
|
+
def list_active_monitors(self) -> list[dict]:
|
|
489
|
+
with self._conn() as conn:
|
|
490
|
+
self._ensure_monitors_table(conn)
|
|
491
|
+
rows = conn.execute(
|
|
492
|
+
"SELECT * FROM monitors WHERE status = 'active' ORDER BY created_at DESC"
|
|
493
|
+
).fetchall()
|
|
494
|
+
return [dict(r) for r in rows]
|
|
495
|
+
|
|
496
|
+
def list_all_monitors(self) -> list[dict]:
|
|
497
|
+
with self._conn() as conn:
|
|
498
|
+
self._ensure_monitors_table(conn)
|
|
499
|
+
rows = conn.execute(
|
|
500
|
+
"SELECT * FROM monitors ORDER BY created_at DESC LIMIT 50"
|
|
501
|
+
).fetchall()
|
|
502
|
+
return [dict(r) for r in rows]
|
|
503
|
+
|
|
504
|
+
def mark_monitor_ran(self, id: str, next_run_at: str, done: bool = False) -> None:
|
|
505
|
+
with self._conn() as conn:
|
|
506
|
+
self._ensure_monitors_table(conn)
|
|
507
|
+
if done:
|
|
508
|
+
conn.execute("DELETE FROM monitors WHERE id = ?", (id,))
|
|
509
|
+
else:
|
|
510
|
+
conn.execute(
|
|
511
|
+
"UPDATE monitors SET runs_so_far = runs_so_far + 1, last_run_at = ?, "
|
|
512
|
+
"next_run_at = ? WHERE id = ?",
|
|
513
|
+
(_now(), next_run_at, id),
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
def cancel_monitor(self, id: str) -> bool:
|
|
517
|
+
with self._conn() as conn:
|
|
518
|
+
self._ensure_monitors_table(conn)
|
|
519
|
+
cur = conn.execute(
|
|
520
|
+
"DELETE FROM monitors WHERE id = ? AND status = 'active'", (id,)
|
|
521
|
+
)
|
|
522
|
+
return cur.rowcount > 0
|
|
523
|
+
|
|
524
|
+
def cancel_all_monitors(self, channel: str = "") -> int:
|
|
525
|
+
with self._conn() as conn:
|
|
526
|
+
self._ensure_monitors_table(conn)
|
|
527
|
+
if channel:
|
|
528
|
+
cur = conn.execute(
|
|
529
|
+
"DELETE FROM monitors WHERE status = 'active' AND channel = ?",
|
|
530
|
+
(channel,),
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
cur = conn.execute("DELETE FROM monitors WHERE status = 'active'")
|
|
534
|
+
return cur.rowcount
|
|
535
|
+
|
|
436
536
|
# ── Pending bot-message routing questions ─────────────────────────────────
|
|
437
537
|
|
|
438
538
|
def _ensure_pending_routings_table(self, conn):
|