ctrlrelay 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CHANGELOG.md +71 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/PKG-INFO +1 -1
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/pyproject.toml +1 -1
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/__main__.py +20 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/server.py +136 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/cli.py +166 -1
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/poller.py +46 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/state.py +112 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/secops.py +189 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_bridge_server.py +164 -1
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_poller.py +53 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_state.py +65 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/dependabot.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/build.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/cla.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/pages.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/publish.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.github/workflows/test.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/.gitignore +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CODE_OF_CONDUCT.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/CONTRIBUTING.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/LICENSE +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/README.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/SECURITY.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/config/orchestrator.yaml.example +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/Gemfile +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/_config.yml +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/architecture.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/bridge.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/cli.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/configuration.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/development.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/feedback-loop.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/getting-started.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/index.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/docs/operations.md +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/protocol.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/bridge/telegram_handler.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/audit.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/checkpoint.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/config.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/dispatcher.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/github.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/obs.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/pr_verifier.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/pr_watcher.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/scheduler.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/core/worktree.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/dashboard/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/dashboard/client.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/base.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/dev.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/pipelines/post_merge.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/base.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/file_mock.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/src/ctrlrelay/transports/socket_client.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/__init__.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/conftest.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_audit.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_bridge_protocol.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_checkpoint.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_ci_wait.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_dev.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_secops.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_start.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_cli_version.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_config.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dashboard_client.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dev_integration.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dev_pipeline.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_dispatcher.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_docs_site.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_github.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_obs.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pipeline_base.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_post_merge.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pr_verifier.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_pr_watcher.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_scheduler.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_secops_integration.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_secops_pipeline.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_telegram_handler.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_transport.py +0 -0
- {ctrlrelay-0.1.7 → ctrlrelay-0.1.8}/tests/test_worktree.py +0 -0
|
@@ -7,6 +7,77 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.8] - 2026-04-21
|
|
11
|
+
|
|
12
|
+
The "reply to BLOCKED in Telegram and it actually resumes" release.
|
|
13
|
+
Two operator-visibility fixes surfaced from running a 79-repo secops
|
|
14
|
+
sweep: a noisy log-spam issue and a silently-dropped-reply issue. The
|
|
15
|
+
latter turned into a proper resume flow.
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- **Resume BLOCKED secops via Telegram reply.** When a scheduled
|
|
20
|
+
secops sweep escalates BLOCKED and exits, the question is now
|
|
21
|
+
persisted in a new `pending_resumes` table. Replying in Telegram
|
|
22
|
+
matches against that table and queues the answer; a new per-minute
|
|
23
|
+
`pending_resume_sweeper` scheduler job inside the poller drains
|
|
24
|
+
answered rows — re-acquires the repo lock, re-creates the worktree,
|
|
25
|
+
calls `SecopsPipeline.resume(ctx, answer)`, and Telegrams the
|
|
26
|
+
result (success / re-blocked / failed). First reply-to-resume
|
|
27
|
+
round-trip is ≤60s.
|
|
28
|
+
- **Disambiguation when multiple BLOCKED sessions exist.** Replying
|
|
29
|
+
"merge it" when both `repoA` and `repoB` are blocked used to route
|
|
30
|
+
to FIFO (wrong repo, possibly destructive). The bridge now refuses
|
|
31
|
+
to guess: with >1 unanswered BLOCKED sessions it returns a Telegram
|
|
32
|
+
list of pending session_ids so the operator can reply with one
|
|
33
|
+
included. Single-BLOCKED case stays unambiguous.
|
|
34
|
+
|
|
35
|
+
### Fixed
|
|
36
|
+
|
|
37
|
+
- **Poller log spam on issues-disabled repos.** Repos with GitHub's
|
|
38
|
+
Issues feature disabled (template repos, signature repos, GitHub
|
|
39
|
+
Pages sites) returned a permanent `GitHubError(... has disabled
|
|
40
|
+
issues)` that the poller classified as transient, retrying every
|
|
41
|
+
120s and escalating to WARNING after 3 cycles. `poll()` and
|
|
42
|
+
`seed_current()` now detect the specific error, mark the repo in
|
|
43
|
+
an in-memory permanent-skip set, log once at INFO as
|
|
44
|
+
`poll.repo.issues_disabled`, and skip the `gh` call on subsequent
|
|
45
|
+
cycles. Resets on daemon restart.
|
|
46
|
+
- **Orphan Telegram replies silently dropped.** When a BLOCKED
|
|
47
|
+
session had already torn down (scheduled secops), the bridge's
|
|
48
|
+
in-memory `_pending_questions` entry died with the ASK socket and
|
|
49
|
+
the operator's reply disappeared with just an `info` log line. The
|
|
50
|
+
bridge now replies via Telegram so the failure is visible (and,
|
|
51
|
+
with the resume flow above, actually lands as an answer).
|
|
52
|
+
- **Pending_resumes rows no longer dropped on sweeper lock
|
|
53
|
+
contention.** When the per-minute sweeper raced the 6am scheduled
|
|
54
|
+
secops on the same repo, it used to `mark_pending_resume_resumed`
|
|
55
|
+
unconditionally and lose the queued answer. The sweeper now detects
|
|
56
|
+
the specific `"Repository locked by another session"` error and
|
|
57
|
+
leaves the row pending for the next tick.
|
|
58
|
+
|
|
59
|
+
### Schema migration
|
|
60
|
+
|
|
61
|
+
State DB gains a `pending_resumes` table (session_id PK, pipeline,
|
|
62
|
+
repo, question, created_at, answer, answered_at, resumed_at). Two
|
|
63
|
+
partial indexes: `idx_pending_resumes_unanswered` (for orphan-reply
|
|
64
|
+
lookup) and `idx_pending_resumes_answered_unresumed` (for sweeper
|
|
65
|
+
load). Created automatically on daemon start; no backfill needed.
|
|
66
|
+
|
|
67
|
+
### Operator notes
|
|
68
|
+
|
|
69
|
+
- Upgrade via `uv tool upgrade ctrlrelay` (or
|
|
70
|
+
`uv tool install ctrlrelay@latest --force` if pinned), restart
|
|
71
|
+
poller and bridge so the new sweeper schedules and the bridge
|
|
72
|
+
sees the new schema.
|
|
73
|
+
- To exercise the resume-via-Telegram path: let a scheduled secops
|
|
74
|
+
escalate BLOCKED, reply to the Telegram notification with your
|
|
75
|
+
decision (or a fresh message that mentions the session_id if
|
|
76
|
+
multiple repos are BLOCKED). Expect a `✅ Answer queued` ack within
|
|
77
|
+
seconds and a result message within ~1 minute.
|
|
78
|
+
- Dev pipeline resume-via-Telegram is not yet wired; the sweeper
|
|
79
|
+
skips non-secops rows.
|
|
80
|
+
|
|
10
81
|
## [0.1.7] - 2026-04-20
|
|
11
82
|
|
|
12
83
|
Patch release. Fixes one drift bug in how the package reports its own
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ctrlrelay
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Local-first orchestrator for headless coding agents across multiple GitHub repos
|
|
5
5
|
Project-URL: Homepage, https://github.com/AInvirion/ctrlrelay
|
|
6
6
|
Project-URL: Documentation, https://ainvirion.github.io/ctrlrelay/
|
|
@@ -21,6 +21,15 @@ def main() -> None:
|
|
|
21
21
|
help="Environment variable holding the Telegram bot token",
|
|
22
22
|
)
|
|
23
23
|
parser.add_argument("--chat-id", type=int, required=True, help="Telegram chat ID")
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--state-db",
|
|
26
|
+
default=None,
|
|
27
|
+
help=(
|
|
28
|
+
"Path to the orchestrator state.db. When provided, orphan "
|
|
29
|
+
"Telegram replies route to persisted BLOCKED sessions in "
|
|
30
|
+
"pending_resumes. Required for the resume-via-Telegram flow."
|
|
31
|
+
),
|
|
32
|
+
)
|
|
24
33
|
args = parser.parse_args()
|
|
25
34
|
|
|
26
35
|
bot_token = os.environ.get(args.bot_token_env)
|
|
@@ -31,11 +40,17 @@ def main() -> None:
|
|
|
31
40
|
)
|
|
32
41
|
sys.exit(2)
|
|
33
42
|
|
|
43
|
+
state_db = None
|
|
44
|
+
if args.state_db:
|
|
45
|
+
from ctrlrelay.core.state import StateDB
|
|
46
|
+
state_db = StateDB(Path(args.state_db))
|
|
47
|
+
|
|
34
48
|
socket_path = Path(args.socket_path)
|
|
35
49
|
server = BridgeServer(
|
|
36
50
|
socket_path=socket_path,
|
|
37
51
|
bot_token=bot_token,
|
|
38
52
|
chat_id=args.chat_id,
|
|
53
|
+
state_db=state_db,
|
|
39
54
|
)
|
|
40
55
|
|
|
41
56
|
loop = asyncio.new_event_loop()
|
|
@@ -63,6 +78,11 @@ def main() -> None:
|
|
|
63
78
|
pass
|
|
64
79
|
finally:
|
|
65
80
|
loop.close()
|
|
81
|
+
if state_db is not None:
|
|
82
|
+
try:
|
|
83
|
+
state_db.close()
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
66
86
|
|
|
67
87
|
|
|
68
88
|
if __name__ == "__main__":
|
|
@@ -9,6 +9,7 @@ import stat
|
|
|
9
9
|
from collections import OrderedDict
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
12
13
|
|
|
13
14
|
from ctrlrelay.bridge.protocol import (
|
|
14
15
|
BridgeMessage,
|
|
@@ -20,6 +21,9 @@ from ctrlrelay.bridge.protocol import (
|
|
|
20
21
|
from ctrlrelay.bridge.telegram_handler import TelegramHandler
|
|
21
22
|
from ctrlrelay.core.obs import get_logger, hash_text, log_event
|
|
22
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from ctrlrelay.core.state import StateDB
|
|
26
|
+
|
|
23
27
|
_logger = get_logger("bridge.server")
|
|
24
28
|
_log = logging.getLogger(__name__)
|
|
25
29
|
|
|
@@ -53,10 +57,18 @@ class BridgeServer:
|
|
|
53
57
|
socket_path: Path,
|
|
54
58
|
bot_token: str,
|
|
55
59
|
chat_id: int,
|
|
60
|
+
state_db: "StateDB | None" = None,
|
|
56
61
|
) -> None:
|
|
57
62
|
self.socket_path = socket_path
|
|
58
63
|
self.bot_token = bot_token
|
|
59
64
|
self.chat_id = chat_id
|
|
65
|
+
# Optional: when provided, orphan Telegram replies (no live
|
|
66
|
+
# _pending_question to match) are routed to the oldest unanswered
|
|
67
|
+
# BLOCKED session in state_db's pending_resumes table. The poller's
|
|
68
|
+
# pending-resume sweeper then picks up the answer and drives the
|
|
69
|
+
# actual pipeline resume. Without state_db, orphan replies still
|
|
70
|
+
# get a "didn't land" Telegram notice but nothing gets queued.
|
|
71
|
+
self.state_db = state_db
|
|
60
72
|
self._server: asyncio.Server | None = None
|
|
61
73
|
self._running = False
|
|
62
74
|
self._telegram: TelegramHandler | None = None
|
|
@@ -250,6 +262,55 @@ class BridgeServer:
|
|
|
250
262
|
"bridge: incoming telegram msg with no pending question; "
|
|
251
263
|
"text=%r", text[:80],
|
|
252
264
|
)
|
|
265
|
+
# Try to route to a persisted BLOCKED session in state_db
|
|
266
|
+
# so the operator's reply actually drives a resume. Without
|
|
267
|
+
# this, the reply disappears the instant the session's ASK
|
|
268
|
+
# socket closes — which is exactly what happens when a
|
|
269
|
+
# scheduled secops sweep escalates BLOCKED and exits.
|
|
270
|
+
outcome = await self._queue_orphan_reply_as_resume_answer(text)
|
|
271
|
+
if self._telegram is not None:
|
|
272
|
+
try:
|
|
273
|
+
if outcome["status"] == "queued":
|
|
274
|
+
row = outcome["row"]
|
|
275
|
+
await self._telegram.send(
|
|
276
|
+
"✅ Answer queued for BLOCKED session "
|
|
277
|
+
f"`{row['session_id']}` "
|
|
278
|
+
f"(pipeline={row['pipeline']}, "
|
|
279
|
+
f"repo={row['repo']}).\n"
|
|
280
|
+
"The pending-resume sweeper will drive it "
|
|
281
|
+
"on the next tick — you'll get another "
|
|
282
|
+
"message with the result."
|
|
283
|
+
)
|
|
284
|
+
elif outcome["status"] == "ambiguous":
|
|
285
|
+
pending_list = "\n".join(
|
|
286
|
+
f" • `{r['session_id']}` ({r['repo']}): "
|
|
287
|
+
f"{(r['question'] or '')[:80]}"
|
|
288
|
+
for r in outcome["rows"]
|
|
289
|
+
)
|
|
290
|
+
await self._telegram.send(
|
|
291
|
+
"⚠️ Your reply wasn't routed — multiple "
|
|
292
|
+
"BLOCKED sessions are unanswered and your "
|
|
293
|
+
"message didn't include a session_id to "
|
|
294
|
+
"disambiguate.\n\n"
|
|
295
|
+
"Pending:\n"
|
|
296
|
+
f"{pending_list}\n\n"
|
|
297
|
+
"Reply again with the session_id included "
|
|
298
|
+
"(just paste it anywhere in your message)."
|
|
299
|
+
)
|
|
300
|
+
else:
|
|
301
|
+
await self._telegram.send(
|
|
302
|
+
"⚠️ Your reply wasn't routed — no active "
|
|
303
|
+
"session is waiting on input and no "
|
|
304
|
+
"persisted BLOCKED session is unanswered. "
|
|
305
|
+
"To act manually, re-run the pipeline, "
|
|
306
|
+
"e.g. `ctrlrelay run secops --repo "
|
|
307
|
+
"<owner>/<repo>`."
|
|
308
|
+
)
|
|
309
|
+
except Exception as e:
|
|
310
|
+
_log.warning(
|
|
311
|
+
"bridge: failed to notify orphan-reply sender: %s",
|
|
312
|
+
e,
|
|
313
|
+
)
|
|
253
314
|
return
|
|
254
315
|
self._pending_questions.pop(match.request_id, None)
|
|
255
316
|
|
|
@@ -283,3 +344,78 @@ class BridgeServer:
|
|
|
283
344
|
"bridge: failed to deliver ANSWER request_id=%s err=%s",
|
|
284
345
|
match.request_id, e,
|
|
285
346
|
)
|
|
347
|
+
|
|
348
|
+
async def _queue_orphan_reply_as_resume_answer(
|
|
349
|
+
self, text: str
|
|
350
|
+
) -> dict:
|
|
351
|
+
"""Try to route an orphan Telegram reply to a persisted BLOCKED
|
|
352
|
+
session so the pending-resume sweeper can pick it up and drive a
|
|
353
|
+
pipeline resume.
|
|
354
|
+
|
|
355
|
+
Returns a dict with ``status`` set to one of:
|
|
356
|
+
- ``"queued"`` with ``row`` (dict) — answer was attached.
|
|
357
|
+
- ``"ambiguous"`` with ``rows`` (list[dict]) — multiple BLOCKED
|
|
358
|
+
sessions exist and the reply didn't name one, so we refuse to
|
|
359
|
+
guess. The sender is told which session_ids exist so they can
|
|
360
|
+
retry with one included.
|
|
361
|
+
- ``"none"`` — no state_db, no unanswered rows, or DB error.
|
|
362
|
+
|
|
363
|
+
Disambiguation rule: if the reply text contains exactly one of
|
|
364
|
+
the unanswered session_ids as a substring, route to that row.
|
|
365
|
+
Otherwise, with >1 unanswered rows and no substring match,
|
|
366
|
+
return ambiguous. With exactly one unanswered row and no
|
|
367
|
+
substring match, route anyway (single-repo case is unambiguous).
|
|
368
|
+
"""
|
|
369
|
+
if self.state_db is None:
|
|
370
|
+
return {"status": "none"}
|
|
371
|
+
try:
|
|
372
|
+
rows = self.state_db.list_unanswered_pending_resumes()
|
|
373
|
+
except Exception as e:
|
|
374
|
+
log_event(
|
|
375
|
+
_logger,
|
|
376
|
+
"bridge.pending_resume.list_failed",
|
|
377
|
+
reason=type(e).__name__,
|
|
378
|
+
error=str(e)[:200],
|
|
379
|
+
)
|
|
380
|
+
return {"status": "none"}
|
|
381
|
+
|
|
382
|
+
if not rows:
|
|
383
|
+
return {"status": "none"}
|
|
384
|
+
|
|
385
|
+
matched_by_id = [r for r in rows if r["session_id"] in text]
|
|
386
|
+
if len(matched_by_id) == 1:
|
|
387
|
+
target = matched_by_id[0]
|
|
388
|
+
elif len(matched_by_id) > 1:
|
|
389
|
+
# Multiple session_ids named in the same reply — refuse to
|
|
390
|
+
# pick one. Let the operator send a single-session reply.
|
|
391
|
+
return {"status": "ambiguous", "rows": matched_by_id}
|
|
392
|
+
elif len(rows) == 1:
|
|
393
|
+
target = rows[0]
|
|
394
|
+
else:
|
|
395
|
+
# Multiple unanswered, no session_id hint — can't route safely.
|
|
396
|
+
return {"status": "ambiguous", "rows": rows}
|
|
397
|
+
|
|
398
|
+
try:
|
|
399
|
+
if not self.state_db.answer_pending_resume(
|
|
400
|
+
target["session_id"], text
|
|
401
|
+
):
|
|
402
|
+
return {"status": "none"}
|
|
403
|
+
except Exception as e:
|
|
404
|
+
log_event(
|
|
405
|
+
_logger,
|
|
406
|
+
"bridge.pending_resume.update_failed",
|
|
407
|
+
reason=type(e).__name__,
|
|
408
|
+
error=str(e)[:200],
|
|
409
|
+
)
|
|
410
|
+
return {"status": "none"}
|
|
411
|
+
|
|
412
|
+
log_event(
|
|
413
|
+
_logger,
|
|
414
|
+
"bridge.pending_resume.queued",
|
|
415
|
+
session_id=target["session_id"],
|
|
416
|
+
pipeline=target["pipeline"],
|
|
417
|
+
repo=target["repo"],
|
|
418
|
+
answer_length=len(text),
|
|
419
|
+
answer_hash=hash_text(text),
|
|
420
|
+
)
|
|
421
|
+
return {"status": "queued", "row": target}
|
|
@@ -306,10 +306,18 @@ def bridge_start(
|
|
|
306
306
|
console.print(f"Starting bridge on {socket_path}")
|
|
307
307
|
console.print("Press Ctrl+C to stop")
|
|
308
308
|
|
|
309
|
+
# Open the state DB so the bridge can route orphan Telegram replies
|
|
310
|
+
# to persisted BLOCKED sessions in pending_resumes. Both daemons
|
|
311
|
+
# share ~/.ctrlrelay/state.db — SQLite's WAL mode handles concurrent
|
|
312
|
+
# readers/writers for the low contention we see here.
|
|
313
|
+
from ctrlrelay.core.state import StateDB
|
|
314
|
+
state_db = StateDB(config.paths.state_db)
|
|
315
|
+
|
|
309
316
|
server = BridgeServer(
|
|
310
317
|
socket_path=socket_path,
|
|
311
318
|
bot_token=bot_token,
|
|
312
319
|
chat_id=telegram_config.chat_id,
|
|
320
|
+
state_db=state_db,
|
|
313
321
|
)
|
|
314
322
|
|
|
315
323
|
loop = asyncio.new_event_loop()
|
|
@@ -340,6 +348,10 @@ def bridge_start(
|
|
|
340
348
|
pass
|
|
341
349
|
finally:
|
|
342
350
|
loop.close()
|
|
351
|
+
try:
|
|
352
|
+
state_db.close()
|
|
353
|
+
except Exception:
|
|
354
|
+
pass
|
|
343
355
|
pid_file.unlink(missing_ok=True)
|
|
344
356
|
else:
|
|
345
357
|
# Pass the token via environment, never argv. Putting it on the command
|
|
@@ -354,6 +366,8 @@ def bridge_start(
|
|
|
354
366
|
telegram_config.bot_token_env,
|
|
355
367
|
"--chat-id",
|
|
356
368
|
str(telegram_config.chat_id),
|
|
369
|
+
"--state-db",
|
|
370
|
+
str(config.paths.state_db),
|
|
357
371
|
]
|
|
358
372
|
proc = subprocess.Popen(
|
|
359
373
|
cmd,
|
|
@@ -1321,6 +1335,147 @@ def poller_start(
|
|
|
1321
1335
|
except Exception:
|
|
1322
1336
|
pass
|
|
1323
1337
|
|
|
1338
|
+
async def _run_pending_resume_sweeper() -> None:
|
|
1339
|
+
"""Drain pending_resumes rows that an operator answered via
|
|
1340
|
+
Telegram while the original BLOCKED session had already torn
|
|
1341
|
+
down.
|
|
1342
|
+
|
|
1343
|
+
Runs every minute inside the poller. For each answered row
|
|
1344
|
+
it acquires the repo lock, re-creates a worktree, calls
|
|
1345
|
+
``SecopsPipeline.resume(ctx, answer)`` via the shared
|
|
1346
|
+
``resume_secops_from_pending`` helper, then marks the row
|
|
1347
|
+
resumed and fans out a Telegram notification with the result
|
|
1348
|
+
so the operator knows whether the resume landed.
|
|
1349
|
+
"""
|
|
1350
|
+
try:
|
|
1351
|
+
pending = state_db.list_pending_resumes_to_execute()
|
|
1352
|
+
except Exception as e:
|
|
1353
|
+
console.print(
|
|
1354
|
+
f"[yellow]pending_resume_sweeper: list failed ({e})"
|
|
1355
|
+
f"[/yellow]"
|
|
1356
|
+
)
|
|
1357
|
+
return
|
|
1358
|
+
if not pending:
|
|
1359
|
+
return
|
|
1360
|
+
|
|
1361
|
+
from ctrlrelay.pipelines.secops import resume_secops_from_pending
|
|
1362
|
+
|
|
1363
|
+
sweeper_transport = None
|
|
1364
|
+
if config.transport.type.value == "telegram" and config.transport.telegram:
|
|
1365
|
+
from ctrlrelay.transports import SocketTransport
|
|
1366
|
+
sock = config.transport.telegram.socket_path.expanduser().resolve()
|
|
1367
|
+
if sock.exists():
|
|
1368
|
+
try:
|
|
1369
|
+
candidate = SocketTransport(sock)
|
|
1370
|
+
await candidate.connect()
|
|
1371
|
+
sweeper_transport = candidate
|
|
1372
|
+
except Exception:
|
|
1373
|
+
sweeper_transport = None
|
|
1374
|
+
|
|
1375
|
+
try:
|
|
1376
|
+
for row in pending:
|
|
1377
|
+
session_id = row["session_id"]
|
|
1378
|
+
repo = row["repo"]
|
|
1379
|
+
pipeline_name = row["pipeline"]
|
|
1380
|
+
answer = row["answer"] or ""
|
|
1381
|
+
if pipeline_name != "secops":
|
|
1382
|
+
# dev pipeline resume-from-pending not wired yet —
|
|
1383
|
+
# leave the row marked-answered so a later sweep
|
|
1384
|
+
# picks it up once that path lands.
|
|
1385
|
+
continue
|
|
1386
|
+
|
|
1387
|
+
if sweeper_transport:
|
|
1388
|
+
try:
|
|
1389
|
+
await sweeper_transport.send(
|
|
1390
|
+
f"🔁 Resuming BLOCKED session "
|
|
1391
|
+
f"`{session_id}` on {repo} with your "
|
|
1392
|
+
f"answer..."
|
|
1393
|
+
)
|
|
1394
|
+
except Exception:
|
|
1395
|
+
pass
|
|
1396
|
+
|
|
1397
|
+
try:
|
|
1398
|
+
result = await resume_secops_from_pending(
|
|
1399
|
+
session_id=session_id,
|
|
1400
|
+
repo=repo,
|
|
1401
|
+
answer=answer,
|
|
1402
|
+
dispatcher=dispatcher,
|
|
1403
|
+
github=github,
|
|
1404
|
+
worktree=worktree,
|
|
1405
|
+
dashboard=scheduled_dashboard,
|
|
1406
|
+
state_db=state_db,
|
|
1407
|
+
transport=sweeper_transport,
|
|
1408
|
+
contexts_dir=config.paths.contexts,
|
|
1409
|
+
)
|
|
1410
|
+
except Exception as e:
|
|
1411
|
+
if sweeper_transport:
|
|
1412
|
+
try:
|
|
1413
|
+
await sweeper_transport.send(
|
|
1414
|
+
f"❌ Resume of `{session_id}` on "
|
|
1415
|
+
f"{repo} crashed: {e}"
|
|
1416
|
+
)
|
|
1417
|
+
except Exception:
|
|
1418
|
+
pass
|
|
1419
|
+
# Mark resumed so the sweeper doesn't hot-loop the
|
|
1420
|
+
# same broken row. Operator can inspect via the
|
|
1421
|
+
# sessions table.
|
|
1422
|
+
try:
|
|
1423
|
+
state_db.mark_pending_resume_resumed(session_id)
|
|
1424
|
+
except Exception:
|
|
1425
|
+
pass
|
|
1426
|
+
continue
|
|
1427
|
+
|
|
1428
|
+
# Lock-contention is retryable: the 6am secops cron or
|
|
1429
|
+
# an in-flight dev session holds the repo lock. Leave
|
|
1430
|
+
# the pending_resumes row as-is so the next sweeper
|
|
1431
|
+
# tick tries again. Without this guard the operator's
|
|
1432
|
+
# queued answer is silently dropped.
|
|
1433
|
+
if not result.success and result.error == (
|
|
1434
|
+
"Repository locked by another session"
|
|
1435
|
+
):
|
|
1436
|
+
console.print(
|
|
1437
|
+
"[dim]pending_resume_sweeper: "
|
|
1438
|
+
f"lock contention on {repo}, will retry "
|
|
1439
|
+
f"next tick (session={session_id})[/dim]"
|
|
1440
|
+
)
|
|
1441
|
+
continue
|
|
1442
|
+
|
|
1443
|
+
try:
|
|
1444
|
+
state_db.mark_pending_resume_resumed(session_id)
|
|
1445
|
+
except Exception:
|
|
1446
|
+
pass
|
|
1447
|
+
|
|
1448
|
+
if sweeper_transport:
|
|
1449
|
+
try:
|
|
1450
|
+
if result.success:
|
|
1451
|
+
await sweeper_transport.send(
|
|
1452
|
+
f"✅ Resume succeeded on {repo}\n"
|
|
1453
|
+
f"Session: `{session_id}`\n"
|
|
1454
|
+
f"\n{result.summary}"
|
|
1455
|
+
)
|
|
1456
|
+
elif result.blocked:
|
|
1457
|
+
q = result.question or "(no question text)"
|
|
1458
|
+
await sweeper_transport.send(
|
|
1459
|
+
f"⏸️ Resume re-blocked on {repo}\n"
|
|
1460
|
+
f"Session: `{session_id}`\n"
|
|
1461
|
+
f"\n{q}"
|
|
1462
|
+
)
|
|
1463
|
+
else:
|
|
1464
|
+
err = result.error or result.summary
|
|
1465
|
+
await sweeper_transport.send(
|
|
1466
|
+
f"❌ Resume failed on {repo}\n"
|
|
1467
|
+
f"Session: `{session_id}`\n"
|
|
1468
|
+
f"\n{err}"
|
|
1469
|
+
)
|
|
1470
|
+
except Exception:
|
|
1471
|
+
pass
|
|
1472
|
+
finally:
|
|
1473
|
+
if sweeper_transport:
|
|
1474
|
+
try:
|
|
1475
|
+
await sweeper_transport.close()
|
|
1476
|
+
except Exception:
|
|
1477
|
+
pass
|
|
1478
|
+
|
|
1324
1479
|
async def _main() -> None:
|
|
1325
1480
|
# Register + start the scheduler FIRST, before any potentially
|
|
1326
1481
|
# slow startup work. Otherwise a 6am fire that lands during
|
|
@@ -1333,10 +1488,20 @@ def poller_start(
|
|
|
1333
1488
|
cron_expr=config.schedules.secops_cron,
|
|
1334
1489
|
func=_run_scheduled_secops,
|
|
1335
1490
|
)
|
|
1491
|
+
# Drain answered pending_resumes every minute so a Telegram
|
|
1492
|
+
# reply to a BLOCKED session turns into an actual pipeline
|
|
1493
|
+
# resume within ~60s, not 24h (the next scheduled secops
|
|
1494
|
+
# cron). Cheap: no-ops when the pending_resumes table is empty.
|
|
1495
|
+
scheduler.add_cron_job(
|
|
1496
|
+
name="pending_resume_sweeper",
|
|
1497
|
+
cron_expr="* * * * *",
|
|
1498
|
+
func=_run_pending_resume_sweeper,
|
|
1499
|
+
)
|
|
1336
1500
|
scheduler.start()
|
|
1337
1501
|
console.print(
|
|
1338
1502
|
f"[dim]Scheduler: secops cron={config.schedules.secops_cron} "
|
|
1339
|
-
f"tz={config.timezone}
|
|
1503
|
+
f"tz={config.timezone} | "
|
|
1504
|
+
f"pending_resume_sweeper=every 1m[/dim]"
|
|
1340
1505
|
)
|
|
1341
1506
|
|
|
1342
1507
|
# Now the slow startup: first-run seeding (one gh call per
|
|
@@ -31,6 +31,16 @@ _TRANSIENT_POLL_ERRORS = (TimeoutError, GitHubError, OSError)
|
|
|
31
31
|
_REPO_FAILURE_WARN_THRESHOLD = 3
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
def _is_issues_disabled_error(exc: Exception) -> bool:
|
|
35
|
+
"""Detect the specific GitHubError raised when a repo has its Issues
|
|
36
|
+
feature disabled. This is a permanent state (not a transient API
|
|
37
|
+
failure), so callers should skip the repo rather than retry it on every
|
|
38
|
+
poll cycle."""
|
|
39
|
+
if not isinstance(exc, GitHubError):
|
|
40
|
+
return False
|
|
41
|
+
return "has disabled issues" in str(exc).lower()
|
|
42
|
+
|
|
43
|
+
|
|
34
44
|
@dataclass
|
|
35
45
|
class IssuePoller:
|
|
36
46
|
"""Polls GitHub repos for newly assigned issues.
|
|
@@ -63,6 +73,12 @@ class IssuePoller:
|
|
|
63
73
|
# seed_current(). Not persisted — intentionally resets on daemon
|
|
64
74
|
# restart so an operator fix is exercised before we re-escalate.
|
|
65
75
|
_repo_failure_counts: dict[str, int] = field(default_factory=dict, repr=False)
|
|
76
|
+
# Repos with GitHub Issues feature disabled — a permanent state, not a
|
|
77
|
+
# transient fetch error. Populated on first encounter and kept for the
|
|
78
|
+
# daemon lifetime so we don't spam WARNING logs every 120s cycle.
|
|
79
|
+
# Resets on daemon restart so a fresh detection still runs if the repo
|
|
80
|
+
# re-enables issues in the meantime.
|
|
81
|
+
_issues_disabled_repos: set[str] = field(default_factory=set, repr=False)
|
|
66
82
|
|
|
67
83
|
def __post_init__(self) -> None:
|
|
68
84
|
self._load_state()
|
|
@@ -142,6 +158,24 @@ class IssuePoller:
|
|
|
142
158
|
"""Reset the failure counter after a successful repo lookup."""
|
|
143
159
|
self._repo_failure_counts.pop(repo, None)
|
|
144
160
|
|
|
161
|
+
def _mark_issues_disabled(self, repo: str) -> None:
|
|
162
|
+
"""Mark a repo as having GitHub Issues disabled. Logged once at INFO
|
|
163
|
+
level so the operator can see which repos won't be polled; future
|
|
164
|
+
cycles skip the `gh` call entirely until daemon restart."""
|
|
165
|
+
if repo in self._issues_disabled_repos:
|
|
166
|
+
return
|
|
167
|
+
self._issues_disabled_repos.add(repo)
|
|
168
|
+
# Any accumulated transient-failure count is meaningless once we've
|
|
169
|
+
# identified the error as permanent — clear it so the restart counter
|
|
170
|
+
# starts fresh if the repo ever re-enables issues.
|
|
171
|
+
self._repo_failure_counts.pop(repo, None)
|
|
172
|
+
log_event(
|
|
173
|
+
_logger,
|
|
174
|
+
"poll.repo.issues_disabled",
|
|
175
|
+
repo=repo,
|
|
176
|
+
action="skipping permanently until daemon restart",
|
|
177
|
+
)
|
|
178
|
+
|
|
145
179
|
# ------------------------------------------------------------------
|
|
146
180
|
# Public API
|
|
147
181
|
# ------------------------------------------------------------------
|
|
@@ -167,6 +201,10 @@ class IssuePoller:
|
|
|
167
201
|
new_issues: list[dict[str, Any]] = []
|
|
168
202
|
|
|
169
203
|
for repo in self.repos:
|
|
204
|
+
# Repos with GitHub Issues disabled will never return issues; skip
|
|
205
|
+
# before the `gh` call so we don't log the same error every cycle.
|
|
206
|
+
if repo in self._issues_disabled_repos:
|
|
207
|
+
continue
|
|
170
208
|
try:
|
|
171
209
|
issues = await self.github.list_assigned_issues(
|
|
172
210
|
repo, assignee=self.username
|
|
@@ -174,6 +212,9 @@ class IssuePoller:
|
|
|
174
212
|
except asyncio.CancelledError:
|
|
175
213
|
raise
|
|
176
214
|
except Exception as e:
|
|
215
|
+
if _is_issues_disabled_error(e):
|
|
216
|
+
self._mark_issues_disabled(repo)
|
|
217
|
+
continue
|
|
177
218
|
# Transient-ish (TimeoutError/GitHubError/OSError) goes through
|
|
178
219
|
# the failure counter so persistent misconfig escalates; any
|
|
179
220
|
# other unexpected exception is logged as a skip too so the
|
|
@@ -371,6 +412,8 @@ class IssuePoller:
|
|
|
371
412
|
treated as new and picked up — that's safer than crashing first-run.
|
|
372
413
|
"""
|
|
373
414
|
for repo in self.repos:
|
|
415
|
+
if repo in self._issues_disabled_repos:
|
|
416
|
+
continue
|
|
374
417
|
try:
|
|
375
418
|
issues = await self.github.list_assigned_issues(
|
|
376
419
|
repo, assignee=self.username
|
|
@@ -378,6 +421,9 @@ class IssuePoller:
|
|
|
378
421
|
except asyncio.CancelledError:
|
|
379
422
|
raise
|
|
380
423
|
except _TRANSIENT_POLL_ERRORS as e:
|
|
424
|
+
if _is_issues_disabled_error(e):
|
|
425
|
+
self._mark_issues_disabled(repo)
|
|
426
|
+
continue
|
|
381
427
|
self._record_repo_failure(repo, e, phase="seed")
|
|
382
428
|
continue
|
|
383
429
|
self._clear_repo_failure(repo)
|