npm - @misterhuydo/sentinel - Versions diffs - 1.0.77 → 1.0.83 - Mend

@misterhuydo/sentinel 1.0.77 → 1.0.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/.cairn/.hint-lock +1 -1
package/.cairn/session.json +2 -2
package/lib/generate.js +15 -1
package/lib/init.js +381 -319
package/lib/upgrade.js +40 -0
package/package.json +21 -21
package/python/scripts/patch_notify.js +200 -0
package/python/sentinel/config_loader.py +6 -0
package/python/sentinel/fix_engine.py +177 -160
package/python/sentinel/main.py +35 -0
package/python/sentinel/notify.py +88 -0
package/python/sentinel/sentinel_boss.py +1605 -1371
package/python/sentinel/slack_bot.py +427 -384
package/python/sentinel/state_store.py +423 -341
package/templates/sentinel.properties +3 -1
package/templates/workspace-sentinel.properties +33 -0
package/.cairn/views/2a85cc_init.js +0 -273

package/python/sentinel/fix_engine.py CHANGED Viewed

@@ -1,160 +1,177 @@
-"""
-fix_engine.py — Generate code fixes via Claude Code (headless).
-Invokes: claude --print "<prompt>" 2>&1
-Cairn MCP context is fetched automatically by Claude Code via its MCP tool
-connection — Sentinel does not need to query or inject it explicitly.
-"""
-import logging
-import re
-import subprocess
-import textwrap
-from pathlib import Path
-from .config_loader import RepoConfig, SentinelConfig
-from .log_parser import ErrorEvent
-logger = logging.getLogger(__name__)
-SUBPROCESS_TIMEOUT = 120
-MAX_FILES_IN_PATCH = 5
-MAX_LINES_IN_PATCH = 200
-_DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
-_DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
-def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
-    if log_file and log_file.exists():
-        ctx = (
-            "LOG FILE: " + str(log_file) + "\n"
-            "Read this file first -- it contains the last 48h of logs from "
-            + event.source + ".\n"
-            "Use it to understand frequency, context, and preceding warnings."
-        )
-        step1 = "Read the log file above to understand what led up to this error."
-    else:
-        ctx = (
-            "SOURCE: " + event.source + "\n"
-            "No rolling log file available. The full issue description is below."
-        )
-        step1 = "Use the issue description above as your primary context."
-    marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
-    marker_instruction = "\n".join([
-        "For EVERY method and constructor you modify, add this as the FIRST executable line:",
-        f'  Java/Kotlin : log.info("{marker_label}");',
-        f'  Python      : logger.info("{marker_label}")',
-        f'  Node.js     : logger.info("{marker_label}")',
-        "Use the logger already present in the file. Do not add new imports.",
-        "This applies to ALL modified methods and constructors without exception.",
-    ])
-    cleanup = ""
-    if stale_markers:
-        marker_list = "\n".join(f"  - {m}" for m in stale_markers)
-        cleanup = (
-            "CLEANUP (do this first, before the fix):\n"
-            "Remove any log lines containing these stale Sentinel markers from the codebase:\n"
-            + marker_list + "\n"
-            "Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
-        )
-    lines_out = [
-        f"You are fixing a production bug in the repository at {repo.local_path}.",
-        f"Repository: {repo.repo_name}",
-        "",
-    ]
-    if cleanup:
-        lines_out += [cleanup, ""]
-    lines_out += [
-        ctx,
-        "",
-        f"ISSUE TO FIX (from {event.source}):",
-        event.full_text(),
-        "",
-        "Task:",
-        f"1. {step1}",
-        "2. Use your available tools to explore the codebase and identify the root cause.",
-        f"3. {marker_instruction}",
-        "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
-        "5. Do not explain. Output only the patch.",
-        "6. If you cannot determine a safe fix, output: SKIP: <reason>",
-    ]
-    return "\n".join(lines_out)
-def _validate_patch(patch: str) -> tuple[bool, str]:
-    files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
-    lines_changed = len([
-        l for l in patch.splitlines()
-        if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
-    ])
-    if files_changed > MAX_FILES_IN_PATCH:
-        return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
-    if lines_changed > MAX_LINES_IN_PATCH:
-        return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
-    return True, ""
-def generate_fix(
-    event: ErrorEvent,
-    repo: RepoConfig,
-    cfg: SentinelConfig,
-    patches_dir: Path,
-) -> tuple[str, Path | None]:
-    """
-    Generate a fix for the given error event.
-    Returns:
-        (status, patch_path)
-        status: "patch" | "skip" | "error"
-    """
-    # Issues have source like "issues/filename" — no rolling log file exists
-    log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
-    if not log_file.exists():
-        log_file = None
-    prompt = _build_prompt(event, repo, log_file)
-    logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
-    import os as _os
-    env = _os.environ.copy()
-    if cfg.anthropic_api_key:
-        env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
-    try:
-        result = subprocess.run(
-            ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
-            if os.getuid() != 0 else
-            [cfg.claude_code_bin, "--print", prompt]),
-            capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
-        )
-    except subprocess.TimeoutExpired:
-        logger.error("Claude Code timed out for %s", event.fingerprint)
-        return "error", None, ""
-    except FileNotFoundError:
-        logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
-        return "error", None, ""
-    output = (result.stdout or "") + (result.stderr or "")
-    if output.strip().upper().startswith("SKIP:"):
-        reason = output.strip()[5:].strip()
-        logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
-        return "skip", None, ""
-    patch = _extract_patch(output)
-    if not patch:
-        logger.warning("No patch found in Claude output for %s", event.fingerprint)
-        return "error", None, ""
-    ok, reason = _validate_patch(patch)
-    if not ok:
-        logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
-        return "skip", None, ""
-    patches_dir.mkdir(parents=True, exist_ok=True)
-    patch_path = patches_dir / f"{event.fingerprint}.diff"
-    patch_path.write_text(patch, encoding="utf-8")
-    logger.info("Patch written to %s", patch_path)
-    return "patch", patch_path, marker
+"""
+fix_engine.py — Generate code fixes via Claude Code (headless).
+Invokes: claude --print "<prompt>" 2>&1
+Cairn MCP context is fetched automatically by Claude Code via its MCP tool
+connection — Sentinel does not need to query or inject it explicitly.
+"""
+import logging
+import re
+import subprocess
+import textwrap
+from pathlib import Path
+from .config_loader import RepoConfig, SentinelConfig
+from .log_parser import ErrorEvent
+from .notify import alert_if_rate_limited, slack_alert
+logger = logging.getLogger(__name__)
+SUBPROCESS_TIMEOUT = 120
+MAX_FILES_IN_PATCH = 5
+MAX_LINES_IN_PATCH = 200
+_DIFF_BLOCK = re.compile(r"```(?:diff|patch)?\n(.*?)```", re.DOTALL)
+_DIFF_HEADER = re.compile(r"^diff --git|^---\s+\S+|^\+\+\+\s+\S+", re.MULTILINE)
+def _build_prompt(event, repo: RepoConfig, log_file, marker: str, stale_markers: list[str] = None) -> str:
+    if log_file and log_file.exists():
+        ctx = (
+            "LOG FILE: " + str(log_file) + "\n"
+            "Read this file first -- it contains the last 48h of logs from "
+            + event.source + ".\n"
+            "Use it to understand frequency, context, and preceding warnings."
+        )
+        step1 = "Read the log file above to understand what led up to this error."
+    else:
+        ctx = (
+            "SOURCE: " + event.source + "\n"
+            "No rolling log file available. The full issue description is below."
+        )
+        step1 = "Use the issue description above as your primary context."
+    marker_label = marker + " sentinel-auto-fix [safe to remove after verification]"
+    marker_instruction = "\n".join([
+        "For EVERY method and constructor you modify, add this as the FIRST executable line:",
+        f'  Java/Kotlin : log.info("{marker_label}");',
+        f'  Python      : logger.info("{marker_label}")',
+        f'  Node.js     : logger.info("{marker_label}")',
+        "Use the logger already present in the file. Do not add new imports.",
+        "This applies to ALL modified methods and constructors without exception.",
+    ])
+    cleanup = ""
+    if stale_markers:
+        marker_list = "\n".join(f"  - {m}" for m in stale_markers)
+        cleanup = (
+            "CLEANUP (do this first, before the fix):\n"
+            "Remove any log lines containing these stale Sentinel markers from the codebase:\n"
+            + marker_list + "\n"
+            "Commit the cleanup separately with message: 'chore(sentinel): remove stale markers'\n"
+        )
+    lines_out = [
+        f"You are fixing a production bug in the repository at {repo.local_path}.",
+        f"Repository: {repo.repo_name}",
+        "",
+    ]
+    if cleanup:
+        lines_out += [cleanup, ""]
+    lines_out += [
+        ctx,
+        "",
+        f"ISSUE TO FIX (from {event.source}):",
+        event.full_text(),
+        "",
+        "Task:",
+        f"1. {step1}",
+        "2. Use your available tools to explore the codebase and identify the root cause.",
+        f"3. {marker_instruction}",
+        "4. Output ONLY a unified diff patch (git diff format) fixing the issue.",
+        "5. Do not explain. Output only the patch.",
+        "6. If you cannot determine a safe fix, output: SKIP: <reason>",
+    ]
+    return "\n".join(lines_out)
+def _validate_patch(patch: str) -> tuple[bool, str]:
+    files_changed = len(re.findall(r"^diff --git", patch, re.MULTILINE))
+    lines_changed = len([
+        l for l in patch.splitlines()
+        if l.startswith(("+", "-")) and not l.startswith(("+++", "---"))
+    ])
+    if files_changed > MAX_FILES_IN_PATCH:
+        return False, f"Patch touches {files_changed} files (limit {MAX_FILES_IN_PATCH})"
+    if lines_changed > MAX_LINES_IN_PATCH:
+        return False, f"Patch changes {lines_changed} lines (limit {MAX_LINES_IN_PATCH})"
+    return True, ""
+def generate_fix(
+    event: ErrorEvent,
+    repo: RepoConfig,
+    cfg: SentinelConfig,
+    patches_dir: Path,
+) -> tuple[str, Path | None]:
+    """
+    Generate a fix for the given error event.
+    Returns:
+        (status, patch_path)
+        status: "patch" | "skip" | "error"
+    """
+    # Issues have source like "issues/filename" — no rolling log file exists
+    log_file = Path(cfg.workspace_dir) / "fetched" / f"{event.source}.log"
+    if not log_file.exists():
+        log_file = None
+    prompt = _build_prompt(event, repo, log_file)
+    logger.info("Invoking Claude Code for %s (fp=%s)", event.source, event.fingerprint)
+    import os as _os
+    env = _os.environ.copy()
+    # Inject API key only when Claude Pro is NOT preferred for tasks
+    # (when claude_pro_for_tasks=True and API key is set, let claude CLI use OAuth/Pro)
+    if cfg.anthropic_api_key and not cfg.claude_pro_for_tasks:
+        env["ANTHROPIC_API_KEY"] = cfg.anthropic_api_key
+    try:
+        result = subprocess.run(
+            ([cfg.claude_code_bin, "--dangerously-skip-permissions", "--print", prompt]
+            if os.getuid() != 0 else
+            [cfg.claude_code_bin, "--print", prompt]),
+            capture_output=True, text=True, timeout=SUBPROCESS_TIMEOUT, env=env,
+        )
+    except subprocess.TimeoutExpired:
+        logger.error("Claude Code timed out for %s", event.fingerprint)
+        return "error", None, ""
+    except FileNotFoundError:
+        msg = (
+            f":warning: *Sentinel — Claude CLI not found*\n"
+            f"`{cfg.claude_code_bin}` not found. Run: `npm install -g @anthropic-ai/claude-code`\n"
+            f"Fix engine is disabled until this is resolved."
+        )
+        logger.error("Claude Code binary not found at '%s'", cfg.claude_code_bin)
+        slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
+        return "error", None, ""
+    output = (result.stdout or "") + (result.stderr or "")
+    # Alert Slack immediately on rate-limit / auth failure — never stay silent
+    alert_if_rate_limited(
+        cfg.slack_bot_token,
+        cfg.slack_channel,
+        source=f"fix_engine/{event.fingerprint}",
+        output=output,
+    )
+    if output.strip().upper().startswith("SKIP:"):
+        reason = output.strip()[5:].strip()
+        logger.info("Claude skipped fix for %s: %s", event.fingerprint, reason)
+        return "skip", None, ""
+    patch = _extract_patch(output)
+    if not patch:
+        logger.warning("No patch found in Claude output for %s", event.fingerprint)
+        return "error", None, ""
+    ok, reason = _validate_patch(patch)
+    if not ok:
+        logger.warning("Patch rejected for %s: %s", event.fingerprint, reason)
+        return "skip", None, ""
+    patches_dir.mkdir(parents=True, exist_ok=True)
+    patch_path = patches_dir / f"{event.fingerprint}.diff"
+    patch_path.write_text(patch, encoding="utf-8")
+    logger.info("Patch written to %s", patch_path)
+    return "patch", patch_path, marker

package/python/sentinel/main.py CHANGED Viewed

@@ -541,10 +541,45 @@ async def _upgrade_check_loop(cfg_loader: ConfigLoader):
 # ── Entry point ──────────────────────────────────────────────────────────────────────────────────
+def _log_auth_status(cfg: SentinelConfig) -> None:
+    """Log Claude auth configuration at startup and post to Slack if nothing is configured."""
+    has_api_key   = bool(cfg.anthropic_api_key)
+    has_claude_bin = bool(shutil.which(cfg.claude_code_bin))
+    pro_for_tasks = cfg.claude_pro_for_tasks
+    if has_api_key and pro_for_tasks:
+        logger.info(
+            "Claude auth: API key ✓ (Boss) + Claude Pro preferred for Fix Engine/Ask Codebase. "
+            "Run `claude login` if not already authenticated."
+        )
+    elif has_api_key and not pro_for_tasks:
+        logger.info(
+            "Claude auth: API key ✓ (Boss + Fix Engine). "
+            "CLAUDE_PRO_FOR_TASKS=false — all tasks billed to API quota."
+        )
+    elif not has_api_key and has_claude_bin:
+        logger.warning(
+            "Claude auth: no ANTHROPIC_API_KEY — Boss will use CLI fallback (limited tools). "
+            "Fix Engine uses Claude Pro via `claude` CLI."
+        )
+    else:
+        msg = (
+            ":warning: *Sentinel — no Claude authentication configured*\n"
+            "Sentinel needs at least one of:\n"
+            "• `ANTHROPIC_API_KEY` in `sentinel.properties` — full Boss tools, API billing\n"
+            "• Claude Pro OAuth: run `claude login` on the server — required for Fix Engine\n"
+            "See the auth section in your workspace `sentinel.properties` for guidance."
+        )
+        logger.error("Claude auth: NOTHING configured — Boss and Fix Engine will fail!")
+        from .notify import slack_alert
+        slack_alert(cfg.slack_bot_token, cfg.slack_channel, msg)
 async def run_loop(cfg_loader: ConfigLoader, store: StateStore):
     interval = cfg_loader.sentinel.poll_interval_seconds
     logger.info("Sentinel starting — poll interval: %ds, repos: %s",
                 interval, list(cfg_loader.repos.keys()))
+    _log_auth_status(cfg_loader.sentinel)
     results = await _startup_checks(cfg_loader)

package/python/sentinel/notify.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""
+notify.py — Best-effort Slack alerts from any Sentinel module.
+Uses the Slack Web API directly (no Bolt / Socket Mode required).
+Calls never raise — failures are logged and silently dropped.
+"""
+import logging
+import re
+import requests
+logger = logging.getLogger(__name__)
+# ── Rate-limit / auth-failure detector ────────────────────────────────────────
+_RATE_LIMIT_RE = re.compile(
+    r"rate.?limit|usage.?limit|too many requests|quota.?exceeded"
+    r"|overloaded|credit.?balance|billing|529"
+    r"|not.?authenticated|invalid.?api.?key|authentication.?fail"
+    r"|claude\.ai subscription|pro.?plan|login required",
+    re.IGNORECASE,
+)
+def is_rate_limited(text: str) -> bool:
+    """Return True if the text contains a rate-limit or auth-failure signal."""
+    return bool(_RATE_LIMIT_RE.search(text))
+def rate_limit_message(source: str, raw: str) -> str:
+    """Produce a human-readable Slack alert for a rate-limit event."""
+    snippet = raw.strip()[:300].replace("\n", " ")
+    return (
+        f":warning: *Sentinel — Claude usage/auth problem ({source})*\n"
+        f"Claude returned an error that requires admin attention:\n"
+        f"```{snippet}```\n"
+        f"*What to check:*\n"
+        f"• API key: verify `ANTHROPIC_API_KEY` in `sentinel.properties` is valid and has credit\n"
+        f"• Claude Pro: run `claude login` on the server to refresh OAuth\n"
+        f"• Both: at least one auth method must be working\n"
+        f"Sentinel will retry on the next poll cycle."
+    )
+# ── Alert dispatcher ──────────────────────────────────────────────────────────
+def slack_alert(bot_token: str, channel: str, text: str) -> None:
+    """
+    Post a plain-text alert to a Slack channel.
+    Best-effort: logs on failure, never raises.
+    """
+    if not bot_token or not channel:
+        logger.debug("slack_alert: no token/channel configured — logging only: %s", text[:120])
+        return
+    try:
+        resp = requests.post(
+            "https://slack.com/api/chat.postMessage",
+            headers={
+                "Authorization": f"Bearer {bot_token}",
+                "Content-Type": "application/json",
+            },
+            json={"channel": channel, "text": text},
+            timeout=10,
+        )
+        data = resp.json()
+        if not data.get("ok"):
+            logger.warning("slack_alert: Slack API error: %s", data.get("error"))
+    except Exception as exc:
+        logger.warning("slack_alert: failed to post: %s", exc)
+def alert_if_rate_limited(
+    bot_token: str,
+    channel: str,
+    source: str,
+    output: str,
+) -> bool:
+    """
+    Check output for rate-limit / auth signals.
+    If found, post a Slack alert and return True.
+    """
+    if not is_rate_limited(output):
+        return False
+    msg = rate_limit_message(source, output)
+    logger.error("Claude rate-limit/auth failure in %s: %s", source, output[:200])
+    slack_alert(bot_token, channel, msg)
+    return True