npm - @misterhuydo/sentinel - Versions diffs - 1.6.14 → 1.6.16 - Mend

@misterhuydo/sentinel 1.6.14 → 1.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.cairn/session.json +2 -2
package/lib/generate.js +15 -3
package/package.json +1 -1
package/python/sentinel/__init__.py +1 -1
package/python/sentinel/main.py +65 -6
package/python/sentinel/sentinel_boss.py +22 -10

package/.cairn/session.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "message": "Auto-checkpoint at 2026-04-28T09:29:24.499Z",
-  "checkpoint_at": "2026-04-28T09:29:24.501Z",
+  "message": "Auto-checkpoint at 2026-04-28T09:57:09.547Z",
+  "checkpoint_at": "2026-04-28T09:57:09.549Z",
   "active_files": [
     "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
     "J:\\Projects\\Sentinel\\cli\\lib\\test.js"

package/lib/generate.js CHANGED Viewed

@@ -45,8 +45,15 @@ if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
   exit 0
 fi
-# Kill any orphaned sentinel processes for this project (stale PIDs not in PID file)
-pkill -f "sentinel.main --config $DIR/config" 2>/dev/null || true
+# Kill any orphaned sentinel.main processes whose cwd is this project dir.
+# Match by /proc/PID/cwd (immune to relative-vs-absolute --config arg) so we
+# don't leak duplicate workers when watchdog spawns over a still-alive worker.
+for _pid in $(pgrep -f 'sentinel\\.main' 2>/dev/null); do
+  _pcwd=$(readlink -f "/proc/$_pid/cwd" 2>/dev/null) || continue
+  if [[ "$_pcwd" == "$DIR" ]]; then
+    kill "$_pid" 2>/dev/null && echo "[sentinel] killed orphaned sentinel-main PID $_pid (cwd=$DIR)"
+  fi
+done
 rm -f "$PID_FILE"
 WORKSPACE="$(dirname "$DIR")"
@@ -200,7 +207,12 @@ if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
   echo "[sentinel] __NAME__ already running (PID $(cat "$PID_FILE"))"
   exit 0
 fi
-pkill -f "sentinel.main --config $DIR/config" 2>/dev/null || true
+for _pid in $(pgrep -f 'sentinel\.main' 2>/dev/null); do
+  _pcwd=$(readlink -f "/proc/$_pid/cwd" 2>/dev/null) || continue
+  if [[ "$_pcwd" == "$DIR" ]]; then
+    kill "$_pid" 2>/dev/null && echo "[sentinel] killed orphaned sentinel-main PID $_pid (cwd=$DIR)"
+  fi
+done
 rm -f "$PID_FILE"
 WORKSPACE="$(dirname "$DIR")"
 _claude_pro=true

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@misterhuydo/sentinel",
-  "version": "1.6.14",
+  "version": "1.6.16",
   "description": "Sentinel — Autonomous DevOps Agent installer and manager",
   "bin": {
     "sentinel": "./bin/sentinel.js"

package/python/sentinel/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.6.14"
1	+ __version__ = "1.6.16"

package/python/sentinel/main.py CHANGED Viewed

@@ -61,6 +61,41 @@ def _project_lock(project_name: str) -> "asyncio.Lock":
     return _project_locks[project_name]
+def _notify_skip(sentinel: SentinelConfig, event: "IssueEvent", reason: str) -> None:
+    """Tell the submitter (and channel) when an issue is skipped via dedupe.
+    Without this, Boss's "I'll @-mention you when it completes" promise breaks —
+    the user is left waiting indefinitely after a silent skip.
+    """
+    from .notify import slack_alert as _alert
+    submitter = getattr(event, "submitter_user_id", "")
+    channel = getattr(event, "origin_channel", "") or sentinel.slack_channel
+    if not channel or not sentinel.slack_bot_token:
+        return
+    mention = f"<@{submitter}> " if submitter else ""
+    _alert(
+        sentinel.slack_bot_token, channel,
+        f":fast_forward: {mention}*Issue skipped* — fingerprint `{event.fingerprint[:8]}`. "
+        f"{reason}. To force a new attempt, use `retry_issue` (it clears prior `failed` rows). "
+        f"If a successful fix already shipped for this fingerprint, the retry will still skip "
+        f"unless an admin clears the `applied` row.",
+    )
+def _restart_via_execv() -> None:
+    """Re-exec the current process, preserving the original `python -m sentinel.main` invocation.
+    sys.argv[0] under -m mode is the absolute path to main.py — running that path
+    directly makes Python treat the file as a script, which breaks the relative
+    `from .cairn_client import ...` imports. Detect -m mode and re-launch with
+    `[python, '-m', 'sentinel.main', *original_args]` instead.
+    """
+    if __package__ and sys.argv and sys.argv[0].endswith(("main.py", "main")):
+        os.execv(sys.executable, [sys.executable, "-m", f"{__package__}.main", *sys.argv[1:]])
+    else:
+        os.execv(sys.executable, [sys.executable, *sys.argv])
 def _on_sigusr1(*_):
     global _report_requested
     _report_requested = True
@@ -568,6 +603,7 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
             "(use Boss `retry_issue` to clear the prior row and re-attempt)",
             event.source, event.fingerprint,
         )
+        _notify_skip(sentinel, event, "An attempt was already made in the last 24h")
         mark_done(event.issue_file)
         return
@@ -619,6 +655,20 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
 async def _handle_issue_locked(event, repo, cfg_loader, store):
     """Heavy work portion of _handle_issue — serialised per project via _project_lock."""
     sentinel = cfg_loader.sentinel
+    # Re-check dedupe AFTER acquiring the lock — the pre-lock check in
+    # _handle_issue is racy when two poll cycles (or two worker processes
+    # sharing the same sentinel.db) both see the issue file before either has
+    # recorded an attempt. The lock then merely serialises duplicate work.
+    if store.fix_attempted_recently(event.fingerprint, hours=24):
+        logger.info(
+            "Issue %s skipped (post-lock recheck) — fingerprint %s already attempted",
+            event.source, event.fingerprint,
+        )
+        _notify_skip(sentinel, event, "Another worker already attempted this in the last 24h")
+        mark_done(event.issue_file)
+        return None
     auto_commit  = resolve_auto_commit(repo, sentinel)
     auto_release = resolve_auto_release(repo, sentinel)
@@ -945,7 +995,17 @@ async def _handle_issue_locked(event, repo, cfg_loader, store):
     except Exception:
         logger.exception("Unexpected error processing issue %s — archiving to prevent retry loop", event.source)
-        store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
+        # Don't insert a 'failed' row if the apply/publish flow already recorded
+        # an outcome (success or otherwise) for this fingerprint — otherwise a
+        # post-success exception (e.g. mark_done TOCTOU) would overwrite the
+        # success in Boss DM aggregation that picks the most recent row.
+        if not store.fix_attempted_recently(event.fingerprint, hours=24):
+            store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
+        else:
+            logger.info(
+                "Catch-all skipped recording 'failed' for %s — fixes table already has a recent row for this fingerprint",
+                event.fingerprint,
+            )
         mark_done(event.issue_file)
         return {"submitter": getattr(event, "submitter_user_id", ""),
                 "repo_name": repo.repo_name if repo else event.target_repo,
@@ -1481,7 +1541,7 @@ def _check_and_upgrade(cfg: SentinelConfig) -> bool:
     except Exception:
         pass
-    os.execv(sys.executable, [sys.executable] + sys.argv)
+    _restart_via_execv()
     return True  # unreachable after execv
@@ -1603,7 +1663,7 @@ async def _handle_dev_task(task, cfg_loader: ConfigLoader, store: StateStore):
             f"{mentions}:white_check_mark: *Patch finished* — running tests before restart...",
         )
         # Run test suite before restarting — revert if tests fail.
-        import os as _os, sys as _sys, subprocess as _sp
+        import subprocess as _sp
         _loop2 = asyncio.get_event_loop()
         _code_dir = Path(sentinel.sentinel_dev_repo_path or ".")
         _venv_pytest = _code_dir / ".venv" / "bin" / "pytest"
@@ -1666,7 +1726,7 @@ async def _handle_dev_task(task, cfg_loader: ConfigLoader, store: StateStore):
                 f"SOAK_MINUTES={_soak_mins}\n"
             )
             await asyncio.sleep(1)  # let the Slack message flush
-            _os.execv(_sys.executable, [_sys.executable] + _sys.argv)
+            _restart_via_execv()
     elif status == "needs_human":
         # Boss qualifies the raw Patch explanation before surfacing to users
         qualified = _boss_qualify_dev_reason(detail, sentinel)
@@ -2006,9 +2066,8 @@ async def _patch_soak_monitor(cfg_loader: ConfigLoader) -> None:
                      f":x: *Patch soak failed* — new errors detected after patch `{patch_hash[:8]}`. "
                      f"Reverted {'✓' if revert_ok else '(failed — check manually)'}. Restarting...")
         if revert_ok:
-            import os as _os, sys as _sys
             await asyncio.sleep(2)
-            _os.execv(_sys.executable, [_sys.executable] + _sys.argv)
+            _restart_via_execv()
         return
     # Clean soak — notify

package/python/sentinel/sentinel_boss.py CHANGED Viewed

@@ -2603,25 +2603,37 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
         if store:
             try:
                 with store._conn() as _c:
-                    _row = _c.execute(
-                        "SELECT status, pr_url, commit_hash FROM fixes "
-                        "WHERE fingerprint=? ORDER BY timestamp DESC LIMIT 1",
+                    # Check for a successful fix in the last 24h FIRST. We can't
+                    # use ORDER BY timestamp DESC LIMIT 1 because a spurious
+                    # 'failed' row inserted by a post-success exception (see
+                    # the catch-all in main.py) sorts ahead of the real
+                    # 'applied' row by milliseconds, hiding the success.
+                    _applied = _c.execute(
+                        "SELECT status, commit_hash FROM fixes "
+                        "WHERE fingerprint=? AND status IN ('applied', 'merged') "
+                        "AND timestamp >= datetime('now', '-24 hours') "
+                        "ORDER BY timestamp DESC LIMIT 1",
                         (_fp,),
                     ).fetchone()
-                if _row:
-                    _status = _row["status"]
-                    if _status in ("merged", "applied"):
-                        _commit = _row["commit_hash"] or ""
+                    if _applied:
+                        _commit = _applied["commit_hash"] or ""
                         return json.dumps({
                             "error": (
                                 f"Already fixed — this issue was resolved "
                                 + (f"in commit `{_commit[:8]}`" if _commit else "successfully")
-                                + f". Status: `{_status}`. "
+                                + f". Status: `{_applied['status']}`. "
                                 f"If the problem recurred, describe it as a new issue."
                             )
                         })
-                    if _status == "pending":
-                        _pr = _row["pr_url"] or ""
+                    _pending = _c.execute(
+                        "SELECT pr_url FROM fixes "
+                        "WHERE fingerprint=? AND status='pending' "
+                        "AND timestamp >= datetime('now', '-24 hours') "
+                        "ORDER BY timestamp DESC LIMIT 1",
+                        (_fp,),
+                    ).fetchone()
+                    if _pending:
+                        _pr = _pending["pr_url"] or ""
                         return json.dumps({
                             "error": (
                                 f"There is already an open PR for this issue"