@misterhuydo/sentinel 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-03-23T08:28:23.743Z",
3
- "checkpoint_at": "2026-03-23T08:28:23.744Z",
2
+ "message": "Auto-checkpoint at 2026-03-23T08:33:20.221Z",
3
+ "checkpoint_at": "2026-03-23T08:33:20.223Z",
4
4
  "active_files": [],
5
5
  "notes": [],
6
6
  "mtime_snapshot": {}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.1.2",
3
+ "version": "1.1.4",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -99,42 +99,109 @@ def _validate_patch(patch: str) -> tuple[bool, str]:
99
99
  return True, ""
100
100
 
101
101
 
102
+ _AUTH_ERROR_HINTS = (
103
+ "not logged in", "please run claude login", "authentication failed",
104
+ "api key is not set", "invalid x-api-key", "unauthorized", "please authenticate",
105
+ "unauthenticated", "auth_required", "no auth", "login required",
106
+ )
107
+
108
+
109
+ def _is_auth_error(output: str) -> bool:
110
+ low = output.lower()
111
+ return any(hint in low for hint in _AUTH_ERROR_HINTS)
112
+
113
+
114
+ def _claude_cmd(bin_path: str, prompt: str) -> list[str]:
115
+ import os as _os
116
+ try:
117
+ skip = _os.getuid() != 0
118
+ except AttributeError:
119
+ skip = True # Windows — always pass flag
120
+ if skip:
121
+ return [bin_path, "--dangerously-skip-permissions", "--print", prompt]
122
+ return [bin_path, "--print", prompt]
123
+
124
+
125
+ def _run_claude_attempt(bin_path: str, prompt: str, env: dict) -> tuple[str, bool]:
126
+ """
127
+ Run claude CLI with the given env. Returns (output, timed_out).
128
+ Raises FileNotFoundError if binary is missing.
129
+ """
130
+ try:
131
+ result = subprocess.run(
132
+ _claude_cmd(bin_path, prompt),
133
+ capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
134
+ )
135
+ return (result.stdout or "") + (result.stderr or ""), False
136
+ except subprocess.TimeoutExpired:
137
+ return "", True
138
+
139
+
102
140
  def generate_fix(
103
141
  event: ErrorEvent,
104
142
  repo: RepoConfig,
105
143
  cfg: SentinelConfig,
106
144
  patches_dir: Path,
107
- ) -> tuple[str, Path | None]:
145
+ ) -> tuple[str, Path | None, str]:
108
146
  """
109
147
  Generate a fix for the given error event.
110
148
 
111
149
  Returns:
112
- (status, patch_path)
150
+ (status, patch_path, marker)
113
151
  status: "patch" | "skip" | "error"
152
+
153
+ Auth strategy — API key and Claude Pro (OAuth) are interchangeable:
154
+ Primary : Claude Pro (OAuth) if claude_pro_for_tasks=True, else API key
155
+ Fallback : the other method, if primary fails with an auth error
156
+ On total auth failure: notify Slack admins + email report recipients
114
157
  """
115
- # Issues have source like "issues/filename" — no rolling log file exists
158
+ import os as _os
159
+
160
+ marker = f"sentinel-{event.fingerprint[:8]}"
116
161
  log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
117
162
  if not log_file.exists():
118
163
  log_file = None
119
- prompt = _build_prompt(event, repo, log_file)
164
+ prompt = _build_prompt(event, repo, log_file, marker)
120
165
 
121
166
  logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
122
- import os as _os
123
- env = _os.environ.copy()
124
- # Inject API key only when Claude Pro is NOT preferred for tasks
125
- # (when claude_pro_for_tasks=True and API key is set, let claude CLI use OAuth/Pro)
126
- if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
127
- env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
167
+
168
+ base_env = _os.environ.copy()
169
+ api_env = {**base_env, "ANTHROPIC_API_KEY": cfg.anthropic_api_key} if cfg.anthropic_api_key else None
170
+ oauth_env = base_env # relies on cached `claude login` session no key injected
171
+
172
+ # Choose primary/fallback order based on config
173
+ if cfg.claude_pro_for_tasks and cfg.anthropic_api_key:
174
+ attempts = [("Claude Pro (OAuth)", oauth_env), ("API key", api_env)]
175
+ elif cfg.claude_pro_for_tasks:
176
+ attempts = [("Claude Pro (OAuth)", oauth_env)]
177
+ elif cfg.anthropic_api_key:
178
+ attempts = [("API key", api_env), ("Claude Pro (OAuth)", oauth_env)]
179
+ else:
180
+ attempts = [("Claude Pro (OAuth)", oauth_env)]
181
+
182
+ output = ""
128
183
  try:
129
- result = subprocess.run(
130
- ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
131
- if os.getuid() != 0 else
132
- [cfg.claude_code_bin, "--print", prompt]),
133
- capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
134
- )
135
- except subprocess.TimeoutExpired:
136
- logger.error("Claude Code timed out for %s", event.fingerprint)
137
- return "error", None, ""
184
+ for label, env in attempts:
185
+ if env is None:
186
+ continue
187
+ logger.info("fix_engine: trying %s for %s", label, event.fingerprint)
188
+ output, timed_out = _run_claude_attempt(cfg.claude_code_bin, prompt, env)
189
+ if timed_out:
190
+ logger.error("Claude Code timed out for %s", event.fingerprint)
191
+ return "error", None, ""
192
+ if not _is_auth_error(output):
193
+ break
194
+ logger.warning("fix_engine: %s auth error for %s — trying next method", label, event.fingerprint)
195
+ else:
196
+ # All attempts failed with auth errors
197
+ msg = (
198
+ ":warning: *Sentinel — Fix Engine auth failure*\n"
199
+ f"Both API key and Claude Pro (OAuth) failed authentication for `{event.fingerprint}`.\n"
200
+ "• Check that `ANTHROPIC_API_KEY` is valid, or run `claude login` to refresh the OAuth session."
201
+ )
202
+ logger.error("fix_engine: all auth methods failed for %s", event.fingerprint)
203
+ slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
204
+ return "error", None, ""
138
205
  except FileNotFoundError:
139
206
  msg = (
140
207
  f":warning: *Sentinel — Claude CLI not found*\n"
@@ -145,9 +212,7 @@ def generate_fix(
145
212
  slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
146
213
  return "error", None, ""
147
214
 
148
- output = (result.stdout or "") + (result.stderr or "")
149
-
150
- # Alert Slack immediately on rate-limit / auth failure — never stay silent
215
+ # Alert Slack immediately on rate-limit never stay silent
151
216
  alert_if_rate_limited(
152
217
  cfg.slack_bot_token,
153
218
  cfg.slack_channel,
@@ -549,13 +549,14 @@ def _log_auth_status(cfg: SentinelConfig) -> None:
549
549
 
550
550
  if has_api_key and pro_for_tasks:
551
551
  logger.info(
552
- "Claude auth: API key ✓ (Boss) + Claude Pro preferred for Fix Engine/Ask Codebase. "
552
+ "Claude auth: API key ✓ + Claude Pro (OAuth) "
553
+ "Fix Engine will try Claude Pro first, falls back to API key on auth error. "
553
554
  "Run `claude login` if not already authenticated."
554
555
  )
555
556
  elif has_api_key and not pro_for_tasks:
556
557
  logger.info(
557
- "Claude auth: API key ✓ (Boss + Fix Engine). "
558
- "CLAUDE_PRO_FOR_TASKS=false all tasks billed to API quota."
558
+ "Claude auth: API key ✓ Boss + Fix Engine use API key. "
559
+ "CLAUDE_PRO_FOR_TASKS=false; falls back to Claude Pro (OAuth) if key auth fails."
559
560
  )
560
561
  elif not has_api_key and has_claude_bin:
561
562
  logger.warning(
@@ -1722,6 +1722,80 @@ async def _handle_with_cli(
1722
1722
  return reply, is_done
1723
1723
 
1724
1724
 
1725
+ # ── History serialization helpers ────────────────────────────────────────────
1726
+
1727
+ def _serialize_content(content) -> list:
1728
+ """Convert Anthropic SDK response content (Pydantic objects) to plain dicts.
1729
+
1730
+ The SDK returns TextBlock / ToolUseBlock instances. json.dumps(..., default=str)
1731
+ turns them into useless strings like "TextBlock(type='text', text='...')".
1732
+ This converts them to proper dicts so history round-trips through SQLite safely.
1733
+ """
1734
+ if not isinstance(content, list):
1735
+ return content
1736
+ result = []
1737
+ for block in content:
1738
+ if isinstance(block, dict):
1739
+ result.append(block)
1740
+ elif hasattr(block, "model_dump"):
1741
+ result.append(block.model_dump())
1742
+ elif hasattr(block, "dict"):
1743
+ result.append(block.dict())
1744
+ elif hasattr(block, "type"):
1745
+ if block.type == "text":
1746
+ result.append({"type": "text", "text": getattr(block, "text", "")})
1747
+ elif block.type == "tool_use":
1748
+ result.append({
1749
+ "type": "tool_use",
1750
+ "id": getattr(block, "id", ""),
1751
+ "name": getattr(block, "name", ""),
1752
+ "input": getattr(block, "input", {}),
1753
+ })
1754
+ else:
1755
+ result.append({"type": "text", "text": str(block)})
1756
+ return result
1757
+
1758
+
1759
+ def _clean_history(history: list) -> list:
1760
+ """Remove turns that would cause a 400 from the Anthropic API.
1761
+
1762
+ Strips orphaned tool_use blocks (assistant turn with tool_use but no
1763
+ following tool_result turn) and consecutive same-role turns that result
1764
+ from a previous session that crashed mid-tool-loop.
1765
+ """
1766
+ cleaned = []
1767
+ i = 0
1768
+ while i < len(history):
1769
+ turn = history[i]
1770
+ role = turn.get("role", "")
1771
+ content = turn.get("content", [])
1772
+
1773
+ # Drop assistant turns that contain tool_use if the next turn isn't tool_result
1774
+ if role == "assistant" and isinstance(content, list):
1775
+ has_tool_use = any(
1776
+ (isinstance(b, dict) and b.get("type") == "tool_use")
1777
+ for b in content
1778
+ )
1779
+ if has_tool_use:
1780
+ next_turn = history[i + 1] if i + 1 < len(history) else None
1781
+ next_content = (next_turn or {}).get("content", [])
1782
+ has_result = isinstance(next_content, list) and any(
1783
+ (isinstance(b, dict) and b.get("type") == "tool_result")
1784
+ for b in next_content
1785
+ )
1786
+ if not has_result:
1787
+ i += 1 # skip orphaned tool_use turn
1788
+ continue
1789
+
1790
+ # Drop consecutive same-role turns (keep the last one)
1791
+ if cleaned and cleaned[-1].get("role") == role:
1792
+ cleaned[-1] = turn
1793
+ else:
1794
+ cleaned.append(turn)
1795
+ i += 1
1796
+ return cleaned
1797
+
1798
+
1725
1799
  # ── API-key path (structured tools, full agentic loop) ────────────────────────
1726
1800
 
1727
1801
  async def _handle_with_api(
@@ -1769,13 +1843,15 @@ async def _handle_with_api(
1769
1843
  user_content = attach_blocks + [{"type": "text", "text": message}]
1770
1844
  else:
1771
1845
  user_content = message
1772
- history.append({"role": "user", "content": user_content})
1773
- messages = list(history)
1846
+
1847
+ # Work on a local copy — only commit to history on success to prevent
1848
+ # cascading 400s if the API rejects a malformed/corrupted history.
1849
+ messages = list(history) + [{"role": "user", "content": user_content}]
1774
1850
 
1775
1851
  while True:
1776
1852
  response = client.messages.create(
1777
1853
  model="claude-opus-4-6",
1778
- max_tokens=1024,
1854
+ max_tokens=2048,
1779
1855
  system=system,
1780
1856
  tools=_TOOLS,
1781
1857
  messages=messages,
@@ -1799,10 +1875,12 @@ async def _handle_with_api(
1799
1875
  # Heuristic override: if reply ends with a question, Claude is waiting for input
1800
1876
  if is_done and re.search(r'\?\s*$', reply):
1801
1877
  is_done = False
1802
- history.append({"role": "assistant", "content": response.content})
1878
+ # Commit to history only on success — serialize SDK objects to plain dicts
1879
+ history.append({"role": "user", "content": user_content})
1880
+ history.append({"role": "assistant", "content": _serialize_content(response.content)})
1803
1881
  return reply, is_done
1804
1882
 
1805
- messages.append({"role": "assistant", "content": response.content})
1883
+ messages.append({"role": "assistant", "content": _serialize_content(response.content)})
1806
1884
  tool_results = []
1807
1885
  for tc in tool_blocks:
1808
1886
  result = await _run_tool(tc.name, tc.input, cfg_loader, store, slack_client=slack_client, user_id=user_id, channel=channel, is_admin=is_admin)
@@ -23,7 +23,7 @@ from dataclasses import dataclass, field
23
23
  from pathlib import Path
24
24
  from typing import Optional
25
25
 
26
- from .sentinel_boss import handle_message
26
+ from .sentinel_boss import handle_message, _clean_history
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
@@ -370,9 +370,10 @@ _MAX_HISTORY_TURNS = 20 # keep last 20 exchanges (~40 messages) to stay well w
370
370
  async def _run_turn(session: _Session, message: str, client, cfg_loader, store, attachments: list | None = None, is_admin: bool = False) -> None:
371
371
  channel = session.channel
372
372
 
373
- # Load persisted history from DB on the first turn of a new session
373
+ # Load persisted history from DB on the first turn of a new session.
374
+ # Clean it to strip any orphaned tool_use turns from a previous crashed session.
374
375
  if not session.history_loaded:
375
- session.history = store.load_conversation(session.user_id)
376
+ session.history = _clean_history(store.load_conversation(session.user_id))
376
377
  session.history_loaded = True
377
378
 
378
379
  # Trim history to avoid context overflow on long conversations