@misterhuydo/sentinel 1.6.14 → 1.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-04-28T09:29:24.499Z",
3
- "checkpoint_at": "2026-04-28T09:29:24.501Z",
2
+ "message": "Auto-checkpoint at 2026-04-28T09:57:09.547Z",
3
+ "checkpoint_at": "2026-04-28T09:57:09.549Z",
4
4
  "active_files": [
5
5
  "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
6
6
  "J:\\Projects\\Sentinel\\cli\\lib\\test.js"
package/lib/generate.js CHANGED
@@ -45,8 +45,15 @@ if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
45
45
  exit 0
46
46
  fi
47
47
 
48
- # Kill any orphaned sentinel processes for this project (stale PIDs not in PID file)
49
- pkill -f "sentinel.main --config $DIR/config" 2>/dev/null || true
48
+ # Kill any orphaned sentinel.main processes whose cwd is this project dir.
49
+ # Match by /proc/PID/cwd (immune to relative-vs-absolute --config arg) so we
50
+ # don't leak duplicate workers when watchdog spawns over a still-alive worker.
51
+ for _pid in $(pgrep -f 'sentinel\\.main' 2>/dev/null); do
52
+ _pcwd=$(readlink -f "/proc/$_pid/cwd" 2>/dev/null) || continue
53
+ if [[ "$_pcwd" == "$DIR" ]]; then
54
+ kill "$_pid" 2>/dev/null && echo "[sentinel] killed orphaned sentinel-main PID $_pid (cwd=$DIR)"
55
+ fi
56
+ done
50
57
  rm -f "$PID_FILE"
51
58
 
52
59
  WORKSPACE="$(dirname "$DIR")"
@@ -200,7 +207,12 @@ if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
200
207
  echo "[sentinel] __NAME__ already running (PID $(cat "$PID_FILE"))"
201
208
  exit 0
202
209
  fi
203
- pkill -f "sentinel.main --config $DIR/config" 2>/dev/null || true
210
+ for _pid in $(pgrep -f 'sentinel\.main' 2>/dev/null); do
211
+ _pcwd=$(readlink -f "/proc/$_pid/cwd" 2>/dev/null) || continue
212
+ if [[ "$_pcwd" == "$DIR" ]]; then
213
+ kill "$_pid" 2>/dev/null && echo "[sentinel] killed orphaned sentinel-main PID $_pid (cwd=$DIR)"
214
+ fi
215
+ done
204
216
  rm -f "$PID_FILE"
205
217
  WORKSPACE="$(dirname "$DIR")"
206
218
  _claude_pro=true
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.6.14",
3
+ "version": "1.6.16",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -1 +1 @@
1
- __version__ = "1.6.14"
1
+ __version__ = "1.6.16"
@@ -61,6 +61,41 @@ def _project_lock(project_name: str) -> "asyncio.Lock":
61
61
  return _project_locks[project_name]
62
62
 
63
63
 
64
+ def _notify_skip(sentinel: SentinelConfig, event: "IssueEvent", reason: str) -> None:
65
+ """Tell the submitter (and channel) when an issue is skipped via dedupe.
66
+
67
+ Without this, Boss's "I'll @-mention you when it completes" promise breaks —
68
+ the user is left waiting indefinitely after a silent skip.
69
+ """
70
+ from .notify import slack_alert as _alert
71
+ submitter = getattr(event, "submitter_user_id", "")
72
+ channel = getattr(event, "origin_channel", "") or sentinel.slack_channel
73
+ if not channel or not sentinel.slack_bot_token:
74
+ return
75
+ mention = f"<@{submitter}> " if submitter else ""
76
+ _alert(
77
+ sentinel.slack_bot_token, channel,
78
+ f":fast_forward: {mention}*Issue skipped* — fingerprint `{event.fingerprint[:8]}`. "
79
+ f"{reason}. To force a new attempt, use `retry_issue` (it clears prior `failed` rows). "
80
+ f"If a successful fix already shipped for this fingerprint, the retry will still skip "
81
+ f"unless an admin clears the `applied` row.",
82
+ )
83
+
84
+
85
+ def _restart_via_execv() -> None:
86
+ """Re-exec the current process, preserving the original `python -m sentinel.main` invocation.
87
+
88
+ sys.argv[0] under -m mode is the absolute path to main.py — running that path
89
+ directly makes Python treat the file as a script, which breaks the relative
90
+ `from .cairn_client import ...` imports. Detect -m mode and re-launch with
91
+ `[python, '-m', 'sentinel.main', *original_args]` instead.
92
+ """
93
+ if __package__ and sys.argv and sys.argv[0].endswith(("main.py", "main")):
94
+ os.execv(sys.executable, [sys.executable, "-m", f"{__package__}.main", *sys.argv[1:]])
95
+ else:
96
+ os.execv(sys.executable, [sys.executable, *sys.argv])
97
+
98
+
64
99
  def _on_sigusr1(*_):
65
100
  global _report_requested
66
101
  _report_requested = True
@@ -568,6 +603,7 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
568
603
  "(use Boss `retry_issue` to clear the prior row and re-attempt)",
569
604
  event.source, event.fingerprint,
570
605
  )
606
+ _notify_skip(sentinel, event, "An attempt was already made in the last 24h")
571
607
  mark_done(event.issue_file)
572
608
  return
573
609
 
@@ -619,6 +655,20 @@ async def _handle_issue(event: IssueEvent, cfg_loader: ConfigLoader, store: Stat
619
655
  async def _handle_issue_locked(event, repo, cfg_loader, store):
620
656
  """Heavy work portion of _handle_issue — serialised per project via _project_lock."""
621
657
  sentinel = cfg_loader.sentinel
658
+
659
+ # Re-check dedupe AFTER acquiring the lock — the pre-lock check in
660
+ # _handle_issue is racy when two poll cycles (or two worker processes
661
+ # sharing the same sentinel.db) both see the issue file before either has
662
+ # recorded an attempt. The lock then merely serialises duplicate work.
663
+ if store.fix_attempted_recently(event.fingerprint, hours=24):
664
+ logger.info(
665
+ "Issue %s skipped (post-lock recheck) — fingerprint %s already attempted",
666
+ event.source, event.fingerprint,
667
+ )
668
+ _notify_skip(sentinel, event, "Another worker already attempted this in the last 24h")
669
+ mark_done(event.issue_file)
670
+ return None
671
+
622
672
  auto_commit = resolve_auto_commit(repo, sentinel)
623
673
  auto_release = resolve_auto_release(repo, sentinel)
624
674
 
@@ -945,7 +995,17 @@ async def _handle_issue_locked(event, repo, cfg_loader, store):
945
995
 
946
996
  except Exception:
947
997
  logger.exception("Unexpected error processing issue %s — archiving to prevent retry loop", event.source)
948
- store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
998
+ # Don't insert a 'failed' row if the apply/publish flow already recorded
999
+ # an outcome (success or otherwise) for this fingerprint — otherwise a
1000
+ # post-success exception (e.g. mark_done TOCTOU) would overwrite the
1001
+ # success in Boss DM aggregation that picks the most recent row.
1002
+ if not store.fix_attempted_recently(event.fingerprint, hours=24):
1003
+ store.record_fix(event.fingerprint, "failed", repo_name=repo.repo_name)
1004
+ else:
1005
+ logger.info(
1006
+ "Catch-all skipped recording 'failed' for %s — fixes table already has a recent row for this fingerprint",
1007
+ event.fingerprint,
1008
+ )
949
1009
  mark_done(event.issue_file)
950
1010
  return {"submitter": getattr(event, "submitter_user_id", ""),
951
1011
  "repo_name": repo.repo_name if repo else event.target_repo,
@@ -1481,7 +1541,7 @@ def _check_and_upgrade(cfg: SentinelConfig) -> bool:
1481
1541
  except Exception:
1482
1542
  pass
1483
1543
 
1484
- os.execv(sys.executable, [sys.executable] + sys.argv)
1544
+ _restart_via_execv()
1485
1545
  return True # unreachable after execv
1486
1546
 
1487
1547
 
@@ -1603,7 +1663,7 @@ async def _handle_dev_task(task, cfg_loader: ConfigLoader, store: StateStore):
1603
1663
  f"{mentions}:white_check_mark: *Patch finished* — running tests before restart...",
1604
1664
  )
1605
1665
  # Run test suite before restarting — revert if tests fail.
1606
- import os as _os, sys as _sys, subprocess as _sp
1666
+ import subprocess as _sp
1607
1667
  _loop2 = asyncio.get_event_loop()
1608
1668
  _code_dir = Path(sentinel.sentinel_dev_repo_path or ".")
1609
1669
  _venv_pytest = _code_dir / ".venv" / "bin" / "pytest"
@@ -1666,7 +1726,7 @@ async def _handle_dev_task(task, cfg_loader: ConfigLoader, store: StateStore):
1666
1726
  f"SOAK_MINUTES={_soak_mins}\n"
1667
1727
  )
1668
1728
  await asyncio.sleep(1) # let the Slack message flush
1669
- _os.execv(_sys.executable, [_sys.executable] + _sys.argv)
1729
+ _restart_via_execv()
1670
1730
  elif status == "needs_human":
1671
1731
  # Boss qualifies the raw Patch explanation before surfacing to users
1672
1732
  qualified = _boss_qualify_dev_reason(detail, sentinel)
@@ -2006,9 +2066,8 @@ async def _patch_soak_monitor(cfg_loader: ConfigLoader) -> None:
2006
2066
  f":x: *Patch soak failed* — new errors detected after patch `{patch_hash[:8]}`. "
2007
2067
  f"Reverted {'✓' if revert_ok else '(failed — check manually)'}. Restarting...")
2008
2068
  if revert_ok:
2009
- import os as _os, sys as _sys
2010
2069
  await asyncio.sleep(2)
2011
- _os.execv(_sys.executable, [_sys.executable] + _sys.argv)
2070
+ _restart_via_execv()
2012
2071
  return
2013
2072
 
2014
2073
  # Clean soak — notify
@@ -2603,25 +2603,37 @@ async def _run_tool(name: str, inputs: dict, cfg_loader, store, slack_client=Non
2603
2603
  if store:
2604
2604
  try:
2605
2605
  with store._conn() as _c:
2606
- _row = _c.execute(
2607
- "SELECT status, pr_url, commit_hash FROM fixes "
2608
- "WHERE fingerprint=? ORDER BY timestamp DESC LIMIT 1",
2606
+ # Check for a successful fix in the last 24h FIRST. We can't
2607
+ # use ORDER BY timestamp DESC LIMIT 1 because a spurious
2608
+ # 'failed' row inserted by a post-success exception (see
2609
+ # the catch-all in main.py) sorts ahead of the real
2610
+ # 'applied' row by milliseconds, hiding the success.
2611
+ _applied = _c.execute(
2612
+ "SELECT status, commit_hash FROM fixes "
2613
+ "WHERE fingerprint=? AND status IN ('applied', 'merged') "
2614
+ "AND timestamp >= datetime('now', '-24 hours') "
2615
+ "ORDER BY timestamp DESC LIMIT 1",
2609
2616
  (_fp,),
2610
2617
  ).fetchone()
2611
- if _row:
2612
- _status = _row["status"]
2613
- if _status in ("merged", "applied"):
2614
- _commit = _row["commit_hash"] or ""
2618
+ if _applied:
2619
+ _commit = _applied["commit_hash"] or ""
2615
2620
  return json.dumps({
2616
2621
  "error": (
2617
2622
  f"Already fixed — this issue was resolved "
2618
2623
  + (f"in commit `{_commit[:8]}`" if _commit else "successfully")
2619
- + f". Status: `{_status}`. "
2624
+ + f". Status: `{_applied['status']}`. "
2620
2625
  f"If the problem recurred, describe it as a new issue."
2621
2626
  )
2622
2627
  })
2623
- if _status == "pending":
2624
- _pr = _row["pr_url"] or ""
2628
+ _pending = _c.execute(
2629
+ "SELECT pr_url FROM fixes "
2630
+ "WHERE fingerprint=? AND status='pending' "
2631
+ "AND timestamp >= datetime('now', '-24 hours') "
2632
+ "ORDER BY timestamp DESC LIMIT 1",
2633
+ (_fp,),
2634
+ ).fetchone()
2635
+ if _pending:
2636
+ _pr = _pending["pr_url"] or ""
2625
2637
  return json.dumps({
2626
2638
  "error": (
2627
2639
  f"There is already an open PR for this issue"