@misterhuydo/sentinel 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.cairn/.hint-lock CHANGED
@@ -1 +1 @@
1
- 2026-03-24T07:49:55.081Z
1
+ 2026-03-24T08:21:39.465Z
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-03-24T08:01:08.778Z",
3
- "checkpoint_at": "2026-03-24T08:01:08.779Z",
2
+ "message": "Auto-checkpoint at 2026-03-24T08:34:58.068Z",
3
+ "checkpoint_at": "2026-03-24T08:34:58.069Z",
4
4
  "active_files": [],
5
5
  "notes": [],
6
6
  "mtime_snapshot": {}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.4.0",
3
+ "version": "1.4.2",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -28,7 +28,7 @@ _DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
28
28
  _DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
29
29
 
30
30
 
31
- def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
31
+ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None, synced_files: list = None) -> str:
32
32
  if log_file and log_file.exists():
33
33
  ctx = (
34
34
  "LOG FILE: " + str(log_file) + "\n"
@@ -44,6 +44,15 @@ def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers:
44
44
  )
45
45
  step1 = "Use the issue description above as your primary context."
46
46
 
47
+ if synced_files:
48
+ paths = "\n".join(f" {p}" for p in synced_files)
49
+ ctx += (
50
+ "\n\nFULL LOG HISTORY (rsync'd from remote, unfiltered):\n"
51
+ + paths + "\n"
52
+ "These files contain the complete unfiltered log history. "
53
+ "Use them to search for patterns, related errors, or context preceding this issue."
54
+ )
55
+
47
56
  marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
48
57
  marker_instruction = "\n".join([
49
58
  "For EVERY method and constructor you modify, add this as the FIRST executable line:",
@@ -205,7 +214,9 @@ def generate_fix(
205
214
  log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
206
215
  if not log_file.exists():
207
216
  log_file = None
208
- prompt = _build_prompt(event, repo, log_file, marker)
217
+ from .log_syncer import get_synced_files
218
+ synced = get_synced_files(event.source, cfg.workspace_dir)
219
+ prompt = _build_prompt(event, repo, log_file, marker, synced_files=synced or None)
209
220
 
210
221
  # -- Cross-source dedup: skip if fingerprint already fixed in recent git commits ------
211
222
  if repo.local_path:
@@ -196,5 +196,15 @@ def _fetch_cloudflare(src, cfg):
196
196
 
197
197
  rolling_path = workspace / f"{src.name}.log"
198
198
  _rolling_update(rolling_path, "\n".join(lines), cfg.log_retention_hours)
199
- logger.info("Cloudflare fetch %s: %d new lines -> %s", src.name, len(lines), rolling_path)
199
+
200
+ # Accumulate into workspace/synced/ with longer retention so Claude Code
201
+ # (and ask_logs / filter_logs) can inspect full history — same as SSH rsync.
202
+ retention_hours = getattr(cfg, "sync_retention_days", 7) * 24
203
+ synced_dir = Path(cfg.workspace_dir) / "synced" / src.name
204
+ synced_dir.mkdir(parents=True, exist_ok=True)
205
+ synced_path = synced_dir / "cloudflare.log"
206
+ _rolling_update(synced_path, "\n".join(lines), retention_hours)
207
+
208
+ logger.info("Cloudflare fetch %s: %d new lines -> %s (synced: %s)",
209
+ src.name, len(lines), rolling_path, synced_path)
200
210
  return [rolling_path]
@@ -108,7 +108,16 @@ What you can do (tools available):
108
108
  e.g. "what does the 1881 backend do?", "find PIN validation in elprint",
109
109
  "any TODOs in cairn?", "are there security issues in elprint-sales?"
110
110
 
111
- 20. restart_project Stop and restart a specific Sentinel monitoring instance (stop.sh + start.sh).
111
+ 20. ask_logs Ask Claude Code to search and summarize logs for a source.
112
+ Claude Code reads the full log history (synced + rolling) and answers.
113
+ Use when the user asks something that requires reading and reasoning
114
+ over log content — not just a grep match.
115
+ e.g. "what errors happened yesterday in SSOLWA?",
116
+ "summarize the last week of STS logs",
117
+ "what's been causing 400s in the 1881 logs?",
118
+ "any unusual patterns in elprint logs recently?"
119
+
120
+ 21. restart_project — Stop and restart a specific Sentinel monitoring instance (stop.sh + start.sh).
112
121
  This restarts the Sentinel agent for that project, NOT the application itself.
113
122
  e.g. "restart sentinel for 1881", "restart the 1881 monitor", "reload elprint sentinel"
114
123
 
@@ -132,6 +141,7 @@ reply with a short summary grouped by category:
132
141
  • `search_logs` — live SSH grep on production servers — "search logs for illegal PIN in 1881"
133
142
  • `filter_logs` — instant grep on locally-synced logs (no SSH) — "filter logs for TryDig", "show errors from last 24h"
134
143
  • `tail_log` — last N lines of a log source, no filter — "show recent SSOLWA logs"
144
+ • `ask_logs` — ask Claude Code to read and summarize logs — "what happened in SSOLWA yesterday?", "summarize last week of STS logs"
135
145
 
136
146
  *Codebase questions*
137
147
  • `ask_codebase` — any question about a repo's code — "what does 1881 do?", "find PIN validation", "any TODOs?", "security issues?"
@@ -769,6 +779,33 @@ _TOOLS = [
769
779
  "required": ["source"],
770
780
  },
771
781
  },
782
+ {
783
+ "name": "ask_logs",
784
+ "description": (
785
+ "Ask Claude Code to search and summarize log files for a source. "
786
+ "Claude Code reads the full log history (rsync'd synced logs + rolling window) "
787
+ "and answers the question using its file tools — not just a regex match. "
788
+ "Use for analysis questions that require reading and reasoning over log content. "
789
+ "e.g. 'what errors happened yesterday in SSOLWA?', "
790
+ "'summarize last week of STS logs', "
791
+ "'what's been causing 400s in 1881 logs?', "
792
+ "'any unusual patterns in elprint logs recently?'"
793
+ ),
794
+ "input_schema": {
795
+ "type": "object",
796
+ "properties": {
797
+ "source": {
798
+ "type": "string",
799
+ "description": "Log source name (partial match, e.g. 'SSOLWA', 'STS'). Leave blank to query all sources.",
800
+ },
801
+ "question": {
802
+ "type": "string",
803
+ "description": "Natural language question about the logs",
804
+ },
805
+ },
806
+ "required": ["question"],
807
+ },
808
+ },
772
809
  {
773
810
  "name": "post_file",
774
811
  "description": (
@@ -1878,6 +1915,96 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
1878
1915
  results = [_ask_one(rn, r) for rn, r in matched]
1879
1916
  return json.dumps({"project": target, "repos_queried": len(results), "results": results})
1880
1917
 
1918
+ if name == "ask_logs":
1919
+ question = inputs.get("question", "")
1920
+ source_arg = inputs.get("source", "").lower()
1921
+
1922
+ cfg = cfg_loader.sentinel
1923
+ workspace = Path(cfg.workspace_dir)
1924
+ synced_base = workspace / "synced"
1925
+ fetched_base = workspace / "fetched"
1926
+
1927
+ # Collect all relevant log files for the requested source(s)
1928
+ log_files = []
1929
+
1930
+ if source_arg:
1931
+ # Synced history
1932
+ if synced_base.exists():
1933
+ for d in sorted(synced_base.iterdir()):
1934
+ if d.is_dir() and source_arg in d.name.lower():
1935
+ log_files.extend(sorted(d.glob("*")))
1936
+ # Rolling fetched window
1937
+ for f in sorted(fetched_base.glob("*.log")):
1938
+ if source_arg in f.stem.lower() and f not in log_files:
1939
+ log_files.append(f)
1940
+ else:
1941
+ # All sources
1942
+ if synced_base.exists():
1943
+ for d in sorted(synced_base.iterdir()):
1944
+ if d.is_dir():
1945
+ log_files.extend(sorted(d.glob("*")))
1946
+ for f in sorted(fetched_base.glob("*.log")):
1947
+ if f not in log_files:
1948
+ log_files.append(f)
1949
+
1950
+ if not log_files:
1951
+ hint = (
1952
+ f"No log files found for source '{source_arg}'."
1953
+ if source_arg else "No log files found."
1954
+ )
1955
+ available = (
1956
+ [d.name for d in synced_base.iterdir() if d.is_dir()]
1957
+ if synced_base.exists() else []
1958
+ )
1959
+ return json.dumps({
1960
+ "error": hint,
1961
+ "available_sources": available,
1962
+ "hint": "Run fetch_logs first, or wait for the next poll cycle.",
1963
+ })
1964
+
1965
+ file_list = "\n".join(f" {p}" for p in log_files)
1966
+ prompt = (
1967
+ f"You are analyzing production logs.\n\n"
1968
+ f"QUESTION: {question}\n\n"
1969
+ f"LOG FILES (use your Read and Grep tools to search these):\n{file_list}\n\n"
1970
+ f"Search the log files and answer the question. "
1971
+ f"Be concise and direct. Plain text only — no markdown."
1972
+ )
1973
+
1974
+ env = os.environ.copy()
1975
+ if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
1976
+ env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
1977
+
1978
+ try:
1979
+ skip_flag = []
1980
+ try:
1981
+ if os.getuid() != 0:
1982
+ skip_flag = ["--dangerously-skip-permissions"]
1983
+ except AttributeError:
1984
+ skip_flag = ["--dangerously-skip-permissions"]
1985
+
1986
+ r = subprocess.run(
1987
+ [cfg.claude_code_bin] + skip_flag + ["--print", prompt],
1988
+ capture_output=True, text=True, timeout=240, env=env,
1989
+ cwd=str(workspace),
1990
+ )
1991
+ output = (r.stdout or "").strip()
1992
+ logger.info("Boss ask_logs source=%s rc=%d len=%d", source_arg or "all", r.returncode, len(output))
1993
+ if r.returncode != 0 and not output:
1994
+ raw_err = (r.stderr or "")
1995
+ alert_if_rate_limited(cfg.slack_bot_token, cfg.slack_channel,
1996
+ f"ask_logs/{source_arg or 'all'}", raw_err)
1997
+ return json.dumps({"error": f"claude --print failed (rc={r.returncode}): {raw_err[:300]}"})
1998
+ return json.dumps({
1999
+ "source": source_arg or "all",
2000
+ "files_searched": len(log_files),
2001
+ "answer": output[:4000],
2002
+ })
2003
+ except subprocess.TimeoutExpired:
2004
+ return json.dumps({"error": "timed out after 240s"})
2005
+ except Exception as e:
2006
+ return json.dumps({"error": str(e)})
2007
+
1881
2008
  if name == "restart_project":
1882
2009
  if not is_admin:
1883
2010
  return json.dumps({"error": "Admin access required to restart a project."})
@@ -13,8 +13,8 @@ SOURCE_TYPE=ssh
13
13
 
14
14
  # ── SSH source (SOURCE_TYPE=ssh) ──────────────────────────────────────────────
15
15
 
16
- # Path to the SSH private key (.pem) used to connect to the remote hosts
17
- KEY=/home/<user>/.ssh/<key>.pem
16
+ # SSH private key (.pem). Relative path is resolved from the config dir, then ~/.ssh/
17
+ KEY=prod.pem
18
18
 
19
19
  # Comma-separated list of hostnames or user@host entries.
20
20
  # Hosts without a user@ prefix default to ec2-user@<host>
@@ -38,6 +38,13 @@ GREP_FILTER=WARN|ERROR
38
38
  # Drop lines matching this regex (grep -iv)
39
39
  GREP_EXCLUDE=SSLTool|CommandValidate|hystrix
40
40
 
41
+ # ── Routing ───────────────────────────────────────────────────────────────────
42
+
43
+ # Which repo-config to route errors from this log source to.
44
+ # The filename stem is the default match (e.g. "MyService.properties" → "MyService" repo-config).
45
+ # Set TARGET_REPO to override with the exact repo-config filename stem.
46
+ # TARGET_REPO=MyService
47
+
41
48
  # ── Cloudflare source (SOURCE_TYPE=cloudflare) ────────────────────────────────
42
49
 
43
50
  # Full URL of the Cloudflare Worker log endpoint
@@ -19,6 +19,12 @@ BRANCH=main
19
19
  # true → Sentinel pushes directly to BRANCH and triggers CI/CD
20
20
  AUTO_PUBLISH=false
21
21
 
22
+ # ── Health check (optional) ───────────────────────────────────────────────────
23
+
24
+ # HTTP endpoint returning {"Status": "true"} when healthy.
25
+ # Sentinel polls this after each fix to detect 502/503 before the next log cycle.
26
+ # HEALTH_URL=https://myservice.example.com/health
27
+
22
28
  # ── CI/CD (optional) ──────────────────────────────────────────────────────────
23
29
  # Leave blank if this repo has no deploy pipeline (e.g. shared libraries)
24
30
 
@@ -18,6 +18,10 @@ REPORT_INTERVAL_HOURS=1
18
18
  # Uncomment here only if this project needs a different token.
19
19
  # GITHUB_TOKEN=<github-pat>
20
20
 
21
+ # Fix confirmation: hours of silence after a fix marker appears in production logs before
22
+ # the fix is declared confirmed. Increase for services that deploy infrequently.
23
+ # MARKER_CONFIRM_HOURS=24
24
+
21
25
  # State DB and workspace paths (relative to this project dir)
22
26
  STATE_DB=./sentinel.db
23
27
  WORKSPACE_DIR=./workspace
@@ -64,6 +64,17 @@ UPGRADE_CHECK_HOURS=6
64
64
  # Config repo polling: if the project dir is a git repo, pull for config changes every N seconds
65
65
  CONFIG_POLL_INTERVAL=60
66
66
 
67
+ # Fix confirmation: hours of silence after a fix marker appears in production logs before
68
+ # the fix is declared confirmed. Increase for services that deploy infrequently.
69
+ MARKER_CONFIRM_HOURS=24
70
+
71
+ # Log sync: rsync remote logs to workspace/synced/ for full searchable history
72
+ # Set SYNC_ENABLED=false to disable entirely
73
+ SYNC_ENABLED=true
74
+ SYNC_INTERVAL_SECONDS=300
75
+ SYNC_RETENTION_DAYS=30
76
+ SYNC_MAX_FILE_MB=200
77
+
67
78
  # Slack Bot (Sentinel Boss) — shared across all projects
68
79
  # SLACK_BOT_TOKEN=xoxb-...
69
80
  # SLACK_APP_TOKEN=xapp-...