ctrlrelay 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CHANGELOG.md +71 -0
  2. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/PKG-INFO +1 -1
  3. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/pyproject.toml +1 -1
  4. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/__main__.py +20 -0
  5. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/server.py +136 -0
  6. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/cli.py +166 -1
  7. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/poller.py +46 -0
  8. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/state.py +112 -0
  9. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/secops.py +189 -0
  10. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_bridge_server.py +164 -1
  11. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_poller.py +53 -0
  12. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_state.py +65 -0
  13. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  14. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  15. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  16. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/dependabot.yml +0 -0
  17. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/build.yml +0 -0
  18. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/cla.yml +0 -0
  19. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/pages.yml +0 -0
  20. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/publish.yml +0 -0
  21. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/test.yml +0 -0
  22. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.gitignore +0 -0
  23. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CODE_OF_CONDUCT.md +0 -0
  24. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CONTRIBUTING.md +0 -0
  25. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/LICENSE +0 -0
  26. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/README.md +0 -0
  27. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/SECURITY.md +0 -0
  28. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/config/orchestrator.yaml.example +0 -0
  29. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/Gemfile +0 -0
  30. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/_config.yml +0 -0
  31. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/architecture.md +0 -0
  32. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/bridge.md +0 -0
  33. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/cli.md +0 -0
  34. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/configuration.md +0 -0
  35. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/development.md +0 -0
  36. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/feedback-loop.md +0 -0
  37. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/getting-started.md +0 -0
  38. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/index.md +0 -0
  39. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/operations.md +0 -0
  40. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/__init__.py +0 -0
  41. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/__init__.py +0 -0
  42. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/protocol.py +0 -0
  43. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/telegram_handler.py +0 -0
  44. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/__init__.py +0 -0
  45. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/audit.py +0 -0
  46. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/checkpoint.py +0 -0
  47. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/config.py +0 -0
  48. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/dispatcher.py +0 -0
  49. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/github.py +0 -0
  50. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/obs.py +0 -0
  51. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/pr_verifier.py +0 -0
  52. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/pr_watcher.py +0 -0
  53. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/scheduler.py +0 -0
  54. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/worktree.py +0 -0
  55. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/dashboard/__init__.py +0 -0
  56. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/dashboard/client.py +0 -0
  57. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/__init__.py +0 -0
  58. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/base.py +0 -0
  59. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/dev.py +0 -0
  60. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/post_merge.py +0 -0
  61. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/__init__.py +0 -0
  62. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/base.py +0 -0
  63. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/file_mock.py +0 -0
  64. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/socket_client.py +0 -0
  65. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/__init__.py +0 -0
  66. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/conftest.py +0 -0
  67. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_audit.py +0 -0
  68. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_bridge_protocol.py +0 -0
  69. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_checkpoint.py +0 -0
  70. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_ci_wait.py +0 -0
  71. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_dev.py +0 -0
  72. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_secops.py +0 -0
  73. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_start.py +0 -0
  74. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_version.py +0 -0
  75. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_config.py +0 -0
  76. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dashboard_client.py +0 -0
  77. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dev_integration.py +0 -0
  78. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dev_pipeline.py +0 -0
  79. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dispatcher.py +0 -0
  80. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_docs_site.py +0 -0
  81. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_github.py +0 -0
  82. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_obs.py +0 -0
  83. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pipeline_base.py +0 -0
  84. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_post_merge.py +0 -0
  85. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pr_verifier.py +0 -0
  86. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pr_watcher.py +0 -0
  87. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_scheduler.py +0 -0
  88. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_secops_integration.py +0 -0
  89. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_secops_pipeline.py +0 -0
  90. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_telegram_handler.py +0 -0
  91. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_transport.py +0 -0
  92. {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_worktree.py +0 -0
@@ -7,6 +7,77 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.8] - 2026-04-21
11
+
12
+ The "reply to BLOCKED in Telegram and it actually resumes" release.
13
+ Two operator-visibility fixes surfaced from running a 79-repo secops
14
+ sweep: a noisy log-spam issue and a silently-dropped-reply issue. The
15
+ latter turned into a proper resume flow.
16
+
17
+ ### Added
18
+
19
+ - **Resume BLOCKED secops via Telegram reply.** When a scheduled
20
+ secops sweep escalates BLOCKED and exits, the question is now
21
+ persisted in a new `pending_resumes` table. Replying in Telegram
22
+ matches against that table and queues the answer; a new per-minute
23
+ `pending_resume_sweeper` scheduler job inside the poller drains
24
+ answered rows — re-acquires the repo lock, re-creates the worktree,
25
+ calls `SecopsPipeline.resume(ctx, answer)`, and Telegrams the
26
+ result (success / re-blocked / failed). First reply-to-resume
27
+ round-trip is ≤60s.
28
+ - **Disambiguation when multiple BLOCKED sessions exist.** Replying
29
+ "merge it" when both `repoA` and `repoB` are blocked used to route
30
+ to FIFO (wrong repo, possibly destructive). The bridge now refuses
31
+ to guess: with >1 unanswered BLOCKED sessions it returns a Telegram
32
+ list of pending session_ids so the operator can reply with one
33
+ included. Single-BLOCKED case stays unambiguous.
34
+
35
+ ### Fixed
36
+
37
+ - **Poller log spam on issues-disabled repos.** Repos with GitHub's
38
+ Issues feature disabled (template repos, signature repos, GitHub
39
+ Pages sites) returned a permanent `GitHubError(... has disabled
40
+ issues)` that the poller classified as transient, retrying every
41
+ 120s and escalating to WARNING after 3 cycles. `poll()` and
42
+ `seed_current()` now detect the specific error, mark the repo in
43
+ an in-memory permanent-skip set, log once at INFO as
44
+ `poll.repo.issues_disabled`, and skip the `gh` call on subsequent
45
+ cycles. Resets on daemon restart.
46
+ - **Orphan Telegram replies silently dropped.** When a BLOCKED
47
+ session had already torn down (scheduled secops), the bridge's
48
+ in-memory `_pending_questions` entry died with the ASK socket and
49
+ the operator's reply disappeared with just an `info` log line. The
50
+ bridge now replies via Telegram so the failure is visible (and,
51
+ with the resume flow above, actually lands as an answer).
52
+ - **Pending_resumes rows no longer dropped on sweeper lock
53
+ contention.** When the per-minute sweeper raced the 6am scheduled
54
+ secops on the same repo, it used to `mark_pending_resume_resumed`
55
+ unconditionally and lose the queued answer. The sweeper now detects
56
+ the specific `"Repository locked by another session"` error and
57
+ leaves the row pending for the next tick.
58
+
59
+ ### Schema migration
60
+
61
+ State DB gains a `pending_resumes` table (session_id PK, pipeline,
62
+ repo, question, created_at, answer, answered_at, resumed_at). Two
63
+ partial indexes: `idx_pending_resumes_unanswered` (for orphan-reply
64
+ lookup) and `idx_pending_resumes_answered_unresumed` (for sweeper
65
+ load). Created automatically on daemon start; no backfill needed.
66
+
67
+ ### Operator notes
68
+
69
+ - Upgrade via `uv tool upgrade ctrlrelay` (or
70
+ `uv tool install ctrlrelay@latest --force` if pinned), restart
71
+ poller and bridge so the new sweeper schedules and the bridge
72
+ sees the new schema.
73
+ - To exercise the resume-via-Telegram path: let a scheduled secops
74
+ escalate BLOCKED, reply to the Telegram notification with your
75
+ decision (or a fresh message that mentions the session_id if
76
+ multiple repos are BLOCKED). Expect a `✅ Answer queued` ack within
77
+ seconds and a result message within ~1 minute.
78
+ - Dev pipeline resume-via-Telegram is not yet wired; the sweeper
79
+ skips non-secops rows.
80
+
10
81
  ## [0.1.7] - 2026-04-20
11
82
 
12
83
  Patch release. Fixes one drift bug in how the package reports its own
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ctrlrelay
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Local-first orchestrator for headless coding agents across multiple GitHub repos
5
5
  Project-URL: Homepage, https://github.com/AInvirion/ctrlrelay
6
6
  Project-URL: Documentation, https://ainvirion.github.io/ctrlrelay/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ctrlrelay"
3
- version = "0.1.7"
3
+ version = "0.1.8"
4
4
  description = "Local-first orchestrator for headless coding agents across multiple GitHub repos"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -21,6 +21,15 @@ def main() -> None:
21
21
  help="Environment variable holding the Telegram bot token",
22
22
  )
23
23
  parser.add_argument("--chat-id", type=int, required=True, help="Telegram chat ID")
24
+ parser.add_argument(
25
+ "--state-db",
26
+ default=None,
27
+ help=(
28
+ "Path to the orchestrator state.db. When provided, orphan "
29
+ "Telegram replies route to persisted BLOCKED sessions in "
30
+ "pending_resumes. Required for the resume-via-Telegram flow."
31
+ ),
32
+ )
24
33
  args = parser.parse_args()
25
34
 
26
35
  bot_token = os.environ.get(args.bot_token_env)
@@ -31,11 +40,17 @@ def main() -> None:
31
40
  )
32
41
  sys.exit(2)
33
42
 
43
+ state_db = None
44
+ if args.state_db:
45
+ from ctrlrelay.core.state import StateDB
46
+ state_db = StateDB(Path(args.state_db))
47
+
34
48
  socket_path = Path(args.socket_path)
35
49
  server = BridgeServer(
36
50
  socket_path=socket_path,
37
51
  bot_token=bot_token,
38
52
  chat_id=args.chat_id,
53
+ state_db=state_db,
39
54
  )
40
55
 
41
56
  loop = asyncio.new_event_loop()
@@ -63,6 +78,11 @@ def main() -> None:
63
78
  pass
64
79
  finally:
65
80
  loop.close()
81
+ if state_db is not None:
82
+ try:
83
+ state_db.close()
84
+ except Exception:
85
+ pass
66
86
 
67
87
 
68
88
  if __name__ == "__main__":
@@ -9,6 +9,7 @@ import stat
9
9
  from collections import OrderedDict
10
10
  from datetime import datetime, timezone
11
11
  from pathlib import Path
12
+ from typing import TYPE_CHECKING
12
13
 
13
14
  from ctrlrelay.bridge.protocol import (
14
15
  BridgeMessage,
@@ -20,6 +21,9 @@ from ctrlrelay.bridge.protocol import (
20
21
  from ctrlrelay.bridge.telegram_handler import TelegramHandler
21
22
  from ctrlrelay.core.obs import get_logger, hash_text, log_event
22
23
 
24
+ if TYPE_CHECKING:
25
+ from ctrlrelay.core.state import StateDB
26
+
23
27
  _logger = get_logger("bridge.server")
24
28
  _log = logging.getLogger(__name__)
25
29
 
@@ -53,10 +57,18 @@ class BridgeServer:
53
57
  socket_path: Path,
54
58
  bot_token: str,
55
59
  chat_id: int,
60
+ state_db: "StateDB | None" = None,
56
61
  ) -> None:
57
62
  self.socket_path = socket_path
58
63
  self.bot_token = bot_token
59
64
  self.chat_id = chat_id
65
+ # Optional: when provided, orphan Telegram replies (no live
66
+ # _pending_question to match) are routed to the oldest unanswered
67
+ # BLOCKED session in state_db's pending_resumes table. The poller's
68
+ # pending-resume sweeper then picks up the answer and drives the
69
+ # actual pipeline resume. Without state_db, orphan replies still
70
+ # get a "didn't land" Telegram notice but nothing gets queued.
71
+ self.state_db = state_db
60
72
  self._server: asyncio.Server | None = None
61
73
  self._running = False
62
74
  self._telegram: TelegramHandler | None = None
@@ -250,6 +262,55 @@ class BridgeServer:
250
262
  "bridge: incoming telegram msg with no pending question; "
251
263
  "text=%r", text[:80],
252
264
  )
265
+ # Try to route to a persisted BLOCKED session in state_db
266
+ # so the operator's reply actually drives a resume. Without
267
+ # this, the reply disappears the instant the session's ASK
268
+ # socket closes — which is exactly what happens when a
269
+ # scheduled secops sweep escalates BLOCKED and exits.
270
+ outcome = await self._queue_orphan_reply_as_resume_answer(text)
271
+ if self._telegram is not None:
272
+ try:
273
+ if outcome["status"] == "queued":
274
+ row = outcome["row"]
275
+ await self._telegram.send(
276
+ "✅ Answer queued for BLOCKED session "
277
+ f"`{row['session_id']}` "
278
+ f"(pipeline={row['pipeline']}, "
279
+ f"repo={row['repo']}).\n"
280
+ "The pending-resume sweeper will drive it "
281
+ "on the next tick — you'll get another "
282
+ "message with the result."
283
+ )
284
+ elif outcome["status"] == "ambiguous":
285
+ pending_list = "\n".join(
286
+ f" • `{r['session_id']}` ({r['repo']}): "
287
+ f"{(r['question'] or '')[:80]}"
288
+ for r in outcome["rows"]
289
+ )
290
+ await self._telegram.send(
291
+ "⚠️ Your reply wasn't routed — multiple "
292
+ "BLOCKED sessions are unanswered and your "
293
+ "message didn't include a session_id to "
294
+ "disambiguate.\n\n"
295
+ "Pending:\n"
296
+ f"{pending_list}\n\n"
297
+ "Reply again with the session_id included "
298
+ "(just paste it anywhere in your message)."
299
+ )
300
+ else:
301
+ await self._telegram.send(
302
+ "⚠️ Your reply wasn't routed — no active "
303
+ "session is waiting on input and no "
304
+ "persisted BLOCKED session is unanswered. "
305
+ "To act manually, re-run the pipeline, "
306
+ "e.g. `ctrlrelay run secops --repo "
307
+ "<owner>/<repo>`."
308
+ )
309
+ except Exception as e:
310
+ _log.warning(
311
+ "bridge: failed to notify orphan-reply sender: %s",
312
+ e,
313
+ )
253
314
  return
254
315
  self._pending_questions.pop(match.request_id, None)
255
316
 
@@ -283,3 +344,78 @@ class BridgeServer:
283
344
  "bridge: failed to deliver ANSWER request_id=%s err=%s",
284
345
  match.request_id, e,
285
346
  )
347
+
348
+ async def _queue_orphan_reply_as_resume_answer(
349
+ self, text: str
350
+ ) -> dict:
351
+ """Try to route an orphan Telegram reply to a persisted BLOCKED
352
+ session so the pending-resume sweeper can pick it up and drive a
353
+ pipeline resume.
354
+
355
+ Returns a dict with ``status`` set to one of:
356
+ - ``"queued"`` with ``row`` (dict) — answer was attached.
357
+ - ``"ambiguous"`` with ``rows`` (list[dict]) — multiple BLOCKED
358
+ sessions exist and the reply didn't name one, so we refuse to
359
+ guess. The sender is told which session_ids exist so they can
360
+ retry with one included.
361
+ - ``"none"`` — no state_db, no unanswered rows, or DB error.
362
+
363
+ Disambiguation rule: if the reply text contains exactly one of
364
+ the unanswered session_ids as a substring, route to that row.
365
+ Otherwise, with >1 unanswered rows and no substring match,
366
+ return ambiguous. With exactly one unanswered row and no
367
+ substring match, route anyway (single-repo case is unambiguous).
368
+ """
369
+ if self.state_db is None:
370
+ return {"status": "none"}
371
+ try:
372
+ rows = self.state_db.list_unanswered_pending_resumes()
373
+ except Exception as e:
374
+ log_event(
375
+ _logger,
376
+ "bridge.pending_resume.list_failed",
377
+ reason=type(e).__name__,
378
+ error=str(e)[:200],
379
+ )
380
+ return {"status": "none"}
381
+
382
+ if not rows:
383
+ return {"status": "none"}
384
+
385
+ matched_by_id = [r for r in rows if r["session_id"] in text]
386
+ if len(matched_by_id) == 1:
387
+ target = matched_by_id[0]
388
+ elif len(matched_by_id) > 1:
389
+ # Multiple session_ids named in the same reply — refuse to
390
+ # pick one. Let the operator send a single-session reply.
391
+ return {"status": "ambiguous", "rows": matched_by_id}
392
+ elif len(rows) == 1:
393
+ target = rows[0]
394
+ else:
395
+ # Multiple unanswered, no session_id hint — can't route safely.
396
+ return {"status": "ambiguous", "rows": rows}
397
+
398
+ try:
399
+ if not self.state_db.answer_pending_resume(
400
+ target["session_id"], text
401
+ ):
402
+ return {"status": "none"}
403
+ except Exception as e:
404
+ log_event(
405
+ _logger,
406
+ "bridge.pending_resume.update_failed",
407
+ reason=type(e).__name__,
408
+ error=str(e)[:200],
409
+ )
410
+ return {"status": "none"}
411
+
412
+ log_event(
413
+ _logger,
414
+ "bridge.pending_resume.queued",
415
+ session_id=target["session_id"],
416
+ pipeline=target["pipeline"],
417
+ repo=target["repo"],
418
+ answer_length=len(text),
419
+ answer_hash=hash_text(text),
420
+ )
421
+ return {"status": "queued", "row": target}
@@ -306,10 +306,18 @@ def bridge_start(
306
306
  console.print(f"Starting bridge on {socket_path}")
307
307
  console.print("Press Ctrl+C to stop")
308
308
 
309
+ # Open the state DB so the bridge can route orphan Telegram replies
310
+ # to persisted BLOCKED sessions in pending_resumes. Both daemons
311
+ # share ~/.ctrlrelay/state.db — SQLite's WAL mode handles concurrent
312
+ # readers/writers for the low contention we see here.
313
+ from ctrlrelay.core.state import StateDB
314
+ state_db = StateDB(config.paths.state_db)
315
+
309
316
  server = BridgeServer(
310
317
  socket_path=socket_path,
311
318
  bot_token=bot_token,
312
319
  chat_id=telegram_config.chat_id,
320
+ state_db=state_db,
313
321
  )
314
322
 
315
323
  loop = asyncio.new_event_loop()
@@ -340,6 +348,10 @@ def bridge_start(
340
348
  pass
341
349
  finally:
342
350
  loop.close()
351
+ try:
352
+ state_db.close()
353
+ except Exception:
354
+ pass
343
355
  pid_file.unlink(missing_ok=True)
344
356
  else:
345
357
  # Pass the token via environment, never argv. Putting it on the command
@@ -354,6 +366,8 @@ def bridge_start(
354
366
  telegram_config.bot_token_env,
355
367
  "--chat-id",
356
368
  str(telegram_config.chat_id),
369
+ "--state-db",
370
+ str(config.paths.state_db),
357
371
  ]
358
372
  proc = subprocess.Popen(
359
373
  cmd,
@@ -1321,6 +1335,147 @@ def poller_start(
1321
1335
  except Exception:
1322
1336
  pass
1323
1337
 
1338
+ async def _run_pending_resume_sweeper() -> None:
1339
+ """Drain pending_resumes rows that an operator answered via
1340
+ Telegram while the original BLOCKED session had already torn
1341
+ down.
1342
+
1343
+ Runs every minute inside the poller. For each answered row
1344
+ it acquires the repo lock, re-creates a worktree, calls
1345
+ ``SecopsPipeline.resume(ctx, answer)`` via the shared
1346
+ ``resume_secops_from_pending`` helper, then marks the row
1347
+ resumed and fans out a Telegram notification with the result
1348
+ so the operator knows whether the resume landed.
1349
+ """
1350
+ try:
1351
+ pending = state_db.list_pending_resumes_to_execute()
1352
+ except Exception as e:
1353
+ console.print(
1354
+ f"[yellow]pending_resume_sweeper: list failed ({e})"
1355
+ f"[/yellow]"
1356
+ )
1357
+ return
1358
+ if not pending:
1359
+ return
1360
+
1361
+ from ctrlrelay.pipelines.secops import resume_secops_from_pending
1362
+
1363
+ sweeper_transport = None
1364
+ if config.transport.type.value == "telegram" and config.transport.telegram:
1365
+ from ctrlrelay.transports import SocketTransport
1366
+ sock = config.transport.telegram.socket_path.expanduser().resolve()
1367
+ if sock.exists():
1368
+ try:
1369
+ candidate = SocketTransport(sock)
1370
+ await candidate.connect()
1371
+ sweeper_transport = candidate
1372
+ except Exception:
1373
+ sweeper_transport = None
1374
+
1375
+ try:
1376
+ for row in pending:
1377
+ session_id = row["session_id"]
1378
+ repo = row["repo"]
1379
+ pipeline_name = row["pipeline"]
1380
+ answer = row["answer"] or ""
1381
+ if pipeline_name != "secops":
1382
+ # dev pipeline resume-from-pending not wired yet —
1383
+ # leave the row marked-answered so a later sweep
1384
+ # picks it up once that path lands.
1385
+ continue
1386
+
1387
+ if sweeper_transport:
1388
+ try:
1389
+ await sweeper_transport.send(
1390
+ f"🔁 Resuming BLOCKED session "
1391
+ f"`{session_id}` on {repo} with your "
1392
+ f"answer..."
1393
+ )
1394
+ except Exception:
1395
+ pass
1396
+
1397
+ try:
1398
+ result = await resume_secops_from_pending(
1399
+ session_id=session_id,
1400
+ repo=repo,
1401
+ answer=answer,
1402
+ dispatcher=dispatcher,
1403
+ github=github,
1404
+ worktree=worktree,
1405
+ dashboard=scheduled_dashboard,
1406
+ state_db=state_db,
1407
+ transport=sweeper_transport,
1408
+ contexts_dir=config.paths.contexts,
1409
+ )
1410
+ except Exception as e:
1411
+ if sweeper_transport:
1412
+ try:
1413
+ await sweeper_transport.send(
1414
+ f"❌ Resume of `{session_id}` on "
1415
+ f"{repo} crashed: {e}"
1416
+ )
1417
+ except Exception:
1418
+ pass
1419
+ # Mark resumed so the sweeper doesn't hot-loop the
1420
+ # same broken row. Operator can inspect via the
1421
+ # sessions table.
1422
+ try:
1423
+ state_db.mark_pending_resume_resumed(session_id)
1424
+ except Exception:
1425
+ pass
1426
+ continue
1427
+
1428
+ # Lock-contention is retryable: the 6am secops cron or
1429
+ # an in-flight dev session holds the repo lock. Leave
1430
+ # the pending_resumes row as-is so the next sweeper
1431
+ # tick tries again. Without this guard the operator's
1432
+ # queued answer is silently dropped.
1433
+ if not result.success and result.error == (
1434
+ "Repository locked by another session"
1435
+ ):
1436
+ console.print(
1437
+ "[dim]pending_resume_sweeper: "
1438
+ f"lock contention on {repo}, will retry "
1439
+ f"next tick (session={session_id})[/dim]"
1440
+ )
1441
+ continue
1442
+
1443
+ try:
1444
+ state_db.mark_pending_resume_resumed(session_id)
1445
+ except Exception:
1446
+ pass
1447
+
1448
+ if sweeper_transport:
1449
+ try:
1450
+ if result.success:
1451
+ await sweeper_transport.send(
1452
+ f"✅ Resume succeeded on {repo}\n"
1453
+ f"Session: `{session_id}`\n"
1454
+ f"\n{result.summary}"
1455
+ )
1456
+ elif result.blocked:
1457
+ q = result.question or "(no question text)"
1458
+ await sweeper_transport.send(
1459
+ f"⏸️ Resume re-blocked on {repo}\n"
1460
+ f"Session: `{session_id}`\n"
1461
+ f"\n{q}"
1462
+ )
1463
+ else:
1464
+ err = result.error or result.summary
1465
+ await sweeper_transport.send(
1466
+ f"❌ Resume failed on {repo}\n"
1467
+ f"Session: `{session_id}`\n"
1468
+ f"\n{err}"
1469
+ )
1470
+ except Exception:
1471
+ pass
1472
+ finally:
1473
+ if sweeper_transport:
1474
+ try:
1475
+ await sweeper_transport.close()
1476
+ except Exception:
1477
+ pass
1478
+
1324
1479
  async def _main() -> None:
1325
1480
  # Register + start the scheduler FIRST, before any potentially
1326
1481
  # slow startup work. Otherwise a 6am fire that lands during
@@ -1333,10 +1488,20 @@ def poller_start(
1333
1488
  cron_expr=config.schedules.secops_cron,
1334
1489
  func=_run_scheduled_secops,
1335
1490
  )
1491
+ # Drain answered pending_resumes every minute so a Telegram
1492
+ # reply to a BLOCKED session turns into an actual pipeline
1493
+ # resume within ~60s, not 24h (the next scheduled secops
1494
+ # cron). Cheap: no-ops when the pending_resumes table is empty.
1495
+ scheduler.add_cron_job(
1496
+ name="pending_resume_sweeper",
1497
+ cron_expr="* * * * *",
1498
+ func=_run_pending_resume_sweeper,
1499
+ )
1336
1500
  scheduler.start()
1337
1501
  console.print(
1338
1502
  f"[dim]Scheduler: secops cron={config.schedules.secops_cron} "
1339
- f"tz={config.timezone}[/dim]"
1503
+ f"tz={config.timezone} | "
1504
+ f"pending_resume_sweeper=every 1m[/dim]"
1340
1505
  )
1341
1506
 
1342
1507
  # Now the slow startup: first-run seeding (one gh call per
@@ -31,6 +31,16 @@ _TRANSIENT_POLL_ERRORS = (TimeoutError, GitHubError, OSError)
31
31
  _REPO_FAILURE_WARN_THRESHOLD = 3
32
32
 
33
33
 
34
+ def _is_issues_disabled_error(exc: Exception) -> bool:
35
+ """Detect the specific GitHubError raised when a repo has its Issues
36
+ feature disabled. This is a permanent state (not a transient API
37
+ failure), so callers should skip the repo rather than retry it on every
38
+ poll cycle."""
39
+ if not isinstance(exc, GitHubError):
40
+ return False
41
+ return "has disabled issues" in str(exc).lower()
42
+
43
+
34
44
  @dataclass
35
45
  class IssuePoller:
36
46
  """Polls GitHub repos for newly assigned issues.
@@ -63,6 +73,12 @@ class IssuePoller:
63
73
  # seed_current(). Not persisted — intentionally resets on daemon
64
74
  # restart so an operator fix is exercised before we re-escalate.
65
75
  _repo_failure_counts: dict[str, int] = field(default_factory=dict, repr=False)
76
+ # Repos with GitHub Issues feature disabled — a permanent state, not a
77
+ # transient fetch error. Populated on first encounter and kept for the
78
+ # daemon lifetime so we don't spam WARNING logs every 120s cycle.
79
+ # Resets on daemon restart so a fresh detection still runs if the repo
80
+ # re-enables issues in the meantime.
81
+ _issues_disabled_repos: set[str] = field(default_factory=set, repr=False)
66
82
 
67
83
  def __post_init__(self) -> None:
68
84
  self._load_state()
@@ -142,6 +158,24 @@ class IssuePoller:
142
158
  """Reset the failure counter after a successful repo lookup."""
143
159
  self._repo_failure_counts.pop(repo, None)
144
160
 
161
+ def _mark_issues_disabled(self, repo: str) -> None:
162
+ """Mark a repo as having GitHub Issues disabled. Logged once at INFO
163
+ level so the operator can see which repos won't be polled; future
164
+ cycles skip the `gh` call entirely until daemon restart."""
165
+ if repo in self._issues_disabled_repos:
166
+ return
167
+ self._issues_disabled_repos.add(repo)
168
+ # Any accumulated transient-failure count is meaningless once we've
169
+ # identified the error as permanent — clear it so the restart counter
170
+ # starts fresh if the repo ever re-enables issues.
171
+ self._repo_failure_counts.pop(repo, None)
172
+ log_event(
173
+ _logger,
174
+ "poll.repo.issues_disabled",
175
+ repo=repo,
176
+ action="skipping permanently until daemon restart",
177
+ )
178
+
145
179
  # ------------------------------------------------------------------
146
180
  # Public API
147
181
  # ------------------------------------------------------------------
@@ -167,6 +201,10 @@ class IssuePoller:
167
201
  new_issues: list[dict[str, Any]] = []
168
202
 
169
203
  for repo in self.repos:
204
+ # Repos with GitHub Issues disabled will never return issues; skip
205
+ # before the `gh` call so we don't log the same error every cycle.
206
+ if repo in self._issues_disabled_repos:
207
+ continue
170
208
  try:
171
209
  issues = await self.github.list_assigned_issues(
172
210
  repo, assignee=self.username
@@ -174,6 +212,9 @@ class IssuePoller:
174
212
  except asyncio.CancelledError:
175
213
  raise
176
214
  except Exception as e:
215
+ if _is_issues_disabled_error(e):
216
+ self._mark_issues_disabled(repo)
217
+ continue
177
218
  # Transient-ish (TimeoutError/GitHubError/OSError) goes through
178
219
  # the failure counter so persistent misconfig escalates; any
179
220
  # other unexpected exception is logged as a skip too so the
@@ -371,6 +412,8 @@ class IssuePoller:
371
412
  treated as new and picked up — that's safer than crashing first-run.
372
413
  """
373
414
  for repo in self.repos:
415
+ if repo in self._issues_disabled_repos:
416
+ continue
374
417
  try:
375
418
  issues = await self.github.list_assigned_issues(
376
419
  repo, assignee=self.username
@@ -378,6 +421,9 @@ class IssuePoller:
378
421
  except asyncio.CancelledError:
379
422
  raise
380
423
  except _TRANSIENT_POLL_ERRORS as e:
424
+ if _is_issues_disabled_error(e):
425
+ self._mark_issues_disabled(repo)
426
+ continue
381
427
  self._record_repo_failure(repo, e, phase="seed")
382
428
  continue
383
429
  self._clear_repo_failure(repo)