ctrlrelay 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,337 @@
1
+ """In-process job scheduler for recurring background work.
2
+
3
+ Wraps APScheduler's AsyncIOScheduler with the project's conventions:
4
+
5
+ - MemoryJobStore only (cron triggers recompute the next fire time on every
6
+ start, so persistence buys us nothing and adds SQLAlchemy as a runtime
7
+ dep).
8
+ - ``coalesce=True`` + ``misfire_grace_time=3600s`` so a laptop that was
9
+ asleep at the fire time still runs the job when it wakes within an hour,
10
+ and multiple missed fires collapse into one run.
11
+ - Structured obs logging via ``log_event`` so job lifecycle shows up in
12
+ the same log stream as the poller itself.
13
+
14
+ Cross-platform: the scheduler runs in the poller's asyncio loop, so macOS
15
+ (launchd) and Linux (systemd) behave identically — no per-OS timer unit
16
+ is required.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ from collections.abc import Awaitable, Callable
23
+
24
+ from apscheduler.schedulers.asyncio import AsyncIOScheduler
25
+ from apscheduler.triggers.combining import OrTrigger
26
+ from apscheduler.triggers.cron import CronTrigger
27
+
28
+ from ctrlrelay.core.obs import get_logger, log_event
29
+
30
+ _logger = get_logger("core.scheduler")
31
+
32
+ JobFunc = Callable[[], Awaitable[None]]
33
+
34
+
35
+ # APScheduler's CronTrigger.from_crontab uses Mon=0..Sun=6 for numeric
36
+ # day-of-week, and rejects 7 entirely. Vixie cron (the one every reference
37
+ # and the orchestrator.yaml docs describe) uses Sun=0..Sat=6 with 7 as an
38
+ # alias of Sun. Users writing `0 6 * * 1` expecting Monday would silently
39
+ # get Tuesday runs under APScheduler's numbering. Normalize by remapping
40
+ # numeric DOW fields to APScheduler's named weekdays before building the
41
+ # trigger — names mean the same thing under either numbering scheme.
42
+ _VIXIE_DOW_NAMES = ("sun", "mon", "tue", "wed", "thu", "fri", "sat")
43
+ _VIXIE_NAME_TO_NUM = {name: idx for idx, name in enumerate(_VIXIE_DOW_NAMES)}
44
+
45
+
46
+ def _dow_to_vixie_num(tok: str) -> int | None:
47
+ """Parse a DOW token as a Vixie number. Accepts digits 0..7 (with 7 as
48
+ Sunday alias) and the standard three-letter names. Returns ``None`` if
49
+ the token is neither (so callers can leave it for APScheduler to
50
+ error on)."""
51
+ if tok.isdigit():
52
+ n = int(tok)
53
+ if 0 <= n <= 7:
54
+ return 0 if n == 7 else n
55
+ return None
56
+ return _VIXIE_NAME_TO_NUM.get(tok.lower())
57
+
58
+
59
+ def _dow_name(n: int) -> str:
60
+ """Vixie DOW number → APScheduler name. 0 and 7 both = Sunday."""
61
+ return _VIXIE_DOW_NAMES[0 if n == 7 else n]
62
+
63
+
64
+ def _expand_numeric_dow_range(
65
+ start: int, end: int, step: int = 1
66
+ ) -> list[str] | None:
67
+ """Expand a numeric Vixie-style DOW range to a list of APScheduler names,
68
+ or ``None`` if any endpoint is out of 0..7. Vixie ordering (Sun=0..Sat=6,
69
+ 7=Sun alias) is NOT compatible with APScheduler's named-weekday ordering
70
+ (mon..sun), so a range like ``0-6`` cannot be rewritten as ``sun-sat`` —
71
+ APScheduler would reject that as an inverted range. Expand to an
72
+ explicit comma-list instead so the behavior is always well-defined."""
73
+ if not (0 <= start <= 7 and 0 <= end <= 7 and step >= 1):
74
+ return None
75
+ if start > end:
76
+ return None
77
+ return [_dow_name(n) for n in range(start, end + 1, step)]
78
+
79
+
80
+ def _remap_dow_token(tok: str) -> str:
81
+ """Convert a single Vixie DOW token (number, range, step, or name) to
82
+ APScheduler's named-weekday form.
83
+
84
+ Every range/step form — numeric OR named — is expanded into an
85
+ explicit comma-separated name list. APScheduler orders weekdays
86
+ ``mon..sun``, so a perfectly valid Vixie expression like ``sun-fri``
87
+ looks inverted to APScheduler and gets rejected; expanding to a name
88
+ list dodges the ordering mismatch. Stepped forms like ``mon/2`` also
89
+ need expansion because APScheduler reads named-with-step as "every
90
+ Nth named-weekday occurrence", not Vixie's "from base, every N days".
91
+ """
92
+ if "/" in tok:
93
+ base, step_str = tok.split("/", 1)
94
+ try:
95
+ step = int(step_str)
96
+ except ValueError:
97
+ return tok
98
+ if step < 1:
99
+ return tok
100
+ # Range-with-step "a-b/s" — endpoints can be numeric or named.
101
+ if "-" in base:
102
+ a, b = base.split("-", 1)
103
+ a_num = _dow_to_vixie_num(a)
104
+ b_num = _dow_to_vixie_num(b)
105
+ if a_num is not None and b_num is not None:
106
+ expanded = _expand_numeric_dow_range(a_num, b_num, step)
107
+ if expanded is not None:
108
+ return ",".join(expanded)
109
+ return tok
110
+ # Wildcard with step: "*/s" — expand across the full week.
111
+ if base == "*":
112
+ expanded = _expand_numeric_dow_range(0, 6, step)
113
+ return ",".join(expanded) if expanded else tok
114
+ # Single base with step: "n/s" or "mon/s" — Vixie says "from base,
115
+ # every s days until end of week". Expand explicitly.
116
+ base_num = _dow_to_vixie_num(base)
117
+ if base_num is not None:
118
+ expanded = _expand_numeric_dow_range(base_num, 6, step)
119
+ if expanded is not None:
120
+ return ",".join(expanded)
121
+ return tok
122
+ # Range without step "a-b" — endpoints numeric or named.
123
+ if "-" in tok:
124
+ a, b = tok.split("-", 1)
125
+ a_num = _dow_to_vixie_num(a)
126
+ b_num = _dow_to_vixie_num(b)
127
+ if a_num is not None and b_num is not None:
128
+ expanded = _expand_numeric_dow_range(a_num, b_num)
129
+ if expanded is not None:
130
+ return ",".join(expanded)
131
+ # Bare number: "0".."7"
132
+ if tok.isdigit():
133
+ n = int(tok)
134
+ if 0 <= n <= 7:
135
+ return _dow_name(n)
136
+ return tok
137
+
138
+
139
+ def _build_vixie_trigger(cron_expr: str, timezone):
140
+ """Build an APScheduler trigger that honors Vixie cron DOM/DOW OR
141
+ semantics.
142
+
143
+ Vixie cron: when BOTH day-of-month and day-of-week are non-wildcard,
144
+ the expression fires when EITHER field matches (union). APScheduler's
145
+ ``CronTrigger.from_crontab`` treats them as AND (intersection), which
146
+ makes ``0 6 1 * mon`` fire only on Mondays that fall on the 1st — a
147
+ much rarer schedule than the user wrote.
148
+
149
+ When both fields are set, we split the expression into two triggers
150
+ (``m h DOM mon *`` and ``m h * mon DOW``) wrapped in an ``OrTrigger``
151
+ so APScheduler fires on either match. The rare case where a given
152
+ minute matches both triggers (Aug 1 is a Monday + cron is
153
+ ``0 6 1 8 mon``) produces a single fire because ``OrTrigger``
154
+ coalesces simultaneous sub-trigger hits by fire time.
155
+
156
+ If either DOM or DOW is a wildcard (the common case), we take the
157
+ simple path and return a single ``CronTrigger`` — saves a log entry
158
+ and an allocation.
159
+ """
160
+ normalized = _normalize_cron(cron_expr)
161
+ parts = normalized.split()
162
+ if len(parts) == 5:
163
+ m, h, dom, mon, dow = parts
164
+ if dom != "*" and dow != "*":
165
+ dom_only = f"{m} {h} {dom} {mon} *"
166
+ dow_only = f"{m} {h} * {mon} {dow}"
167
+ return OrTrigger([
168
+ CronTrigger.from_crontab(dom_only, timezone=timezone),
169
+ CronTrigger.from_crontab(dow_only, timezone=timezone),
170
+ ])
171
+ return CronTrigger.from_crontab(normalized, timezone=timezone)
172
+
173
+
174
+ def _normalize_cron(expr: str) -> str:
175
+ """Convert a Vixie-style 5-field cron expression to APScheduler syntax.
176
+
177
+ Only the day-of-week field is rewritten (numeric → name); the other
178
+ four fields share semantics across both systems. Returns the input
179
+ unchanged if it isn't a 5-field expression so APScheduler's own
180
+ parser can emit the real error message.
181
+
182
+ Every DOW token is passed through ``_remap_dow_token`` individually
183
+ so mixed expressions like ``sun,1`` or ``mon,5`` get normalized —
184
+ leaving a bare numeric token in a mostly-named list would let
185
+ APScheduler silently mis-interpret it (their 1 = Tuesday).
186
+ """
187
+ parts = expr.split()
188
+ if len(parts) != 5:
189
+ return expr
190
+ m, h, dom, mon, dow = parts
191
+ new_dow = ",".join(_remap_dow_token(t) for t in dow.split(","))
192
+ return f"{m} {h} {dom} {mon} {new_dow}"
193
+
194
+
195
+ class Scheduler:
196
+ """Thin wrapper so the poller doesn't import APScheduler directly.
197
+
198
+ Instances are created with ``make_scheduler``. Lifecycle:
199
+
200
+ scheduler = make_scheduler(timezone="America/Santiago")
201
+ scheduler.add_cron_job("secops", "0 6 * * *", my_async_fn)
202
+ scheduler.start()
203
+ ...
204
+ await scheduler.shutdown() # must be awaited; see below
205
+
206
+ The wrapper is intentionally narrow (no pause/resume, no job lookup):
207
+ yagni — we have one caller and one job today.
208
+
209
+ ``shutdown`` is async so it can cancel and await in-flight job tasks.
210
+ ``AsyncIOScheduler.shutdown(wait=False)`` only posts the shutdown —
211
+ the loop has to keep running for the pending job coroutines to finish
212
+ their ``finally`` blocks (releasing state-DB locks, closing worktrees).
213
+ Calling ``wait=True`` synchronously from inside the loop would
214
+ deadlock because the jobs need the same loop to complete.
215
+ """
216
+
217
+ def __init__(self, impl: AsyncIOScheduler) -> None:
218
+ self._impl = impl
219
+ self._started = False
220
+ self._running_jobs: set[asyncio.Task[None]] = set()
221
+
222
+ def add_cron_job(
223
+ self,
224
+ name: str,
225
+ cron_expr: str,
226
+ func: JobFunc,
227
+ *,
228
+ misfire_grace_time: int = 3600,
229
+ coalesce: bool = True,
230
+ ) -> None:
231
+ """Register an async function to fire on a cron schedule.
232
+
233
+ Wraps ``func`` so exceptions are logged but don't poison the
234
+ scheduler — the next fire should still go through. This matches how
235
+ the poll loop isolates per-repo failures. Also tracks the running
236
+ task so ``shutdown`` can cancel and await it cleanly.
237
+ """
238
+ trigger = _build_vixie_trigger(cron_expr, timezone=self._impl.timezone)
239
+
240
+ async def _safe_job() -> None:
241
+ task = asyncio.current_task()
242
+ if task is not None:
243
+ self._running_jobs.add(task)
244
+ log_event(_logger, "scheduler.job.start", job=name, cron=cron_expr)
245
+ try:
246
+ await func()
247
+ log_event(_logger, "scheduler.job.done", job=name)
248
+ except asyncio.CancelledError:
249
+ log_event(_logger, "scheduler.job.cancelled", job=name)
250
+ raise
251
+ except Exception as e:
252
+ log_event(
253
+ _logger,
254
+ "scheduler.job.failed",
255
+ job=name,
256
+ error_type=type(e).__name__,
257
+ error=str(e)[:200],
258
+ )
259
+ finally:
260
+ if task is not None:
261
+ self._running_jobs.discard(task)
262
+
263
+ self._impl.add_job(
264
+ _safe_job,
265
+ trigger=trigger,
266
+ id=name,
267
+ name=name,
268
+ misfire_grace_time=misfire_grace_time,
269
+ coalesce=coalesce,
270
+ replace_existing=True,
271
+ )
272
+ log_event(
273
+ _logger,
274
+ "scheduler.job.registered",
275
+ job=name,
276
+ cron=cron_expr,
277
+ timezone=str(self._impl.timezone),
278
+ )
279
+
280
+ def start(self) -> None:
281
+ self._impl.start()
282
+ self._started = True
283
+ log_event(_logger, "scheduler.started")
284
+
285
+ async def shutdown(self, *, cancel_timeout: float = 150.0) -> None:
286
+ """Stop the scheduler and await in-flight jobs to finalize.
287
+
288
+ 1. Signals APScheduler to stop accepting new fires.
289
+ 2. Cancels any currently running job tasks so their ``finally``
290
+ blocks run (release DB locks, close transports).
291
+ 3. Awaits those tasks up to ``cancel_timeout`` seconds so the
292
+ poller's ``loop.close()`` doesn't land mid-cleanup.
293
+
294
+ ``cancel_timeout`` defaults to 150s — comfortably above the
295
+ ``WorktreeManager._run_git`` 120s ceiling so a scheduled secops
296
+ sweep that's mid ``git worktree prune`` when SIGTERM arrives
297
+ gets a real chance to finish cleanup before ``loop.close()``
298
+ terminates everything. If your launchd plist /
299
+ systemd unit imposes a stricter ``ExitTimeOut`` /
300
+ ``TimeoutStopSec``, raise that limit too — the scheduler can
301
+ only keep the loop alive within the supervisor's kill window.
302
+
303
+ Calling shutdown before ``start`` is a no-op.
304
+ """
305
+ if not self._started:
306
+ return
307
+ self._impl.shutdown(wait=False)
308
+ self._started = False
309
+
310
+ if self._running_jobs:
311
+ in_flight = list(self._running_jobs)
312
+ for task in in_flight:
313
+ task.cancel()
314
+ try:
315
+ await asyncio.wait_for(
316
+ asyncio.gather(*in_flight, return_exceptions=True),
317
+ timeout=cancel_timeout,
318
+ )
319
+ except asyncio.TimeoutError:
320
+ log_event(
321
+ _logger,
322
+ "scheduler.shutdown.jobs_timed_out",
323
+ count=len(in_flight),
324
+ timeout=cancel_timeout,
325
+ )
326
+ log_event(_logger, "scheduler.shutdown")
327
+
328
+
329
+ def make_scheduler(timezone: str) -> Scheduler:
330
+ """Build a Scheduler configured for the orchestrator's timezone.
331
+
332
+ Uses MemoryJobStore implicitly (APScheduler's default). The caller owns
333
+ the lifecycle — call ``start()`` after your asyncio loop is up and
334
+ ``shutdown()`` in a ``finally`` block alongside other teardown.
335
+ """
336
+ impl = AsyncIOScheduler(timezone=timezone)
337
+ return Scheduler(impl)
@@ -0,0 +1,167 @@
1
+ """SQLite state management for ctrlrelay orchestrator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sqlite3
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ SCHEMA = """
11
+ CREATE TABLE IF NOT EXISTS sessions (
12
+ id TEXT PRIMARY KEY,
13
+ pipeline TEXT NOT NULL,
14
+ repo TEXT NOT NULL,
15
+ issue_number INTEGER,
16
+ worktree_path TEXT,
17
+ status TEXT NOT NULL,
18
+ blocked_question TEXT,
19
+ started_at INTEGER NOT NULL,
20
+ ended_at INTEGER,
21
+ claude_exit_code INTEGER,
22
+ summary TEXT
23
+ );
24
+
25
+ CREATE TABLE IF NOT EXISTS repo_locks (
26
+ repo TEXT PRIMARY KEY,
27
+ session_id TEXT NOT NULL,
28
+ acquired_at INTEGER NOT NULL
29
+ );
30
+
31
+ CREATE TABLE IF NOT EXISTS github_cursor (
32
+ repo TEXT PRIMARY KEY,
33
+ last_checked_at INTEGER NOT NULL,
34
+ last_seen_issue_update TEXT
35
+ );
36
+
37
+ CREATE TABLE IF NOT EXISTS telegram_pending (
38
+ request_id TEXT PRIMARY KEY,
39
+ session_id TEXT NOT NULL,
40
+ question TEXT NOT NULL,
41
+ asked_at INTEGER NOT NULL,
42
+ answered_at INTEGER,
43
+ answer TEXT
44
+ );
45
+
46
+ CREATE TABLE IF NOT EXISTS automation_decisions (
47
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
48
+ repo TEXT NOT NULL,
49
+ operation TEXT NOT NULL,
50
+ policy TEXT NOT NULL,
51
+ item_id TEXT,
52
+ decision TEXT,
53
+ decided_by TEXT,
54
+ decided_at INTEGER,
55
+ context TEXT
56
+ );
57
+
58
+ CREATE INDEX IF NOT EXISTS idx_sessions_repo ON sessions(repo);
59
+ CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status);
60
+ CREATE INDEX IF NOT EXISTS idx_automation_repo ON automation_decisions(repo);
61
+ """
62
+
63
+
64
+ class StateDB:
65
+ """SQLite database for orchestrator state.
66
+
67
+ Thread-safety: Each thread/async context should create its own StateDB instance.
68
+ The underlying SQLite connection is not shared.
69
+ """
70
+
71
+ def __init__(self, db_path: Path | str) -> None:
72
+ """Initialize the database, creating tables if needed.
73
+
74
+ Args:
75
+ db_path: Path to the SQLite database file.
76
+ """
77
+ self.db_path = Path(db_path)
78
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
79
+
80
+ self._conn = sqlite3.connect(str(self.db_path))
81
+ self._conn.row_factory = sqlite3.Row
82
+ self._conn.executescript(SCHEMA)
83
+ self._conn.commit()
84
+
85
+ def close(self) -> None:
86
+ """Close the database connection."""
87
+ self._conn.close()
88
+
89
+ def execute(self, sql: str, params: tuple[Any, ...] = ()) -> sqlite3.Cursor:
90
+ """Execute a SQL statement.
91
+
92
+ Args:
93
+ sql: SQL statement to execute.
94
+ params: Parameters for the statement.
95
+
96
+ Returns:
97
+ Cursor with results.
98
+ """
99
+ return self._conn.execute(sql, params)
100
+
101
+ def commit(self) -> None:
102
+ """Commit the current transaction."""
103
+ self._conn.commit()
104
+
105
+ # Repo locks
106
+
107
+ def acquire_lock(self, repo: str, session_id: str) -> bool:
108
+ """Attempt to acquire a lock on a repository.
109
+
110
+ Args:
111
+ repo: Repository name (e.g., "owner/repo").
112
+ session_id: Session ID acquiring the lock.
113
+
114
+ Returns:
115
+ True if lock was acquired, False if already held.
116
+ """
117
+ try:
118
+ self._conn.execute(
119
+ "INSERT INTO repo_locks (repo, session_id, acquired_at) VALUES (?, ?, ?)",
120
+ (repo, session_id, int(time.time())),
121
+ )
122
+ self._conn.commit()
123
+ return True
124
+ except sqlite3.IntegrityError:
125
+ return False
126
+
127
+ def release_lock(self, repo: str, session_id: str) -> bool:
128
+ """Release a lock on a repository.
129
+
130
+ Only releases if the lock is held by the specified session.
131
+
132
+ Args:
133
+ repo: Repository name to unlock.
134
+ session_id: Session ID that should own the lock.
135
+
136
+ Returns:
137
+ True if lock was released, False if not held by this session.
138
+ """
139
+ cursor = self._conn.execute(
140
+ "DELETE FROM repo_locks WHERE repo = ? AND session_id = ?",
141
+ (repo, session_id),
142
+ )
143
+ self._conn.commit()
144
+ return cursor.rowcount > 0
145
+
146
+ def get_lock_holder(self, repo: str) -> str | None:
147
+ """Get the session ID holding a lock.
148
+
149
+ Args:
150
+ repo: Repository name.
151
+
152
+ Returns:
153
+ Session ID if locked, None otherwise.
154
+ """
155
+ row = self._conn.execute(
156
+ "SELECT session_id FROM repo_locks WHERE repo = ?", (repo,)
157
+ ).fetchone()
158
+ return row["session_id"] if row else None
159
+
160
+ def list_locks(self) -> list[dict[str, Any]]:
161
+ """List all current locks.
162
+
163
+ Returns:
164
+ List of lock records.
165
+ """
166
+ rows = self._conn.execute("SELECT * FROM repo_locks").fetchall()
167
+ return [dict(row) for row in rows]