baqueue 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """BaQueue driver implementations."""
@@ -0,0 +1,212 @@
1
+ """Abstract base driver for BaQueue."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from typing import Any, Awaitable, Callable
8
+
9
+ from baqueue.serializer import JobPayload
10
+
11
+ logger = logging.getLogger("baqueue.driver")
12
+
13
+
14
+ class BaseDriver(ABC):
15
+ """Every BaQueue driver must implement this interface."""
16
+
17
+ # When True, write operations that fail with a storage-full error trigger
18
+ # an emergency cleanup and one retry. Wired from BaQueueConfig in queue.py.
19
+ auto_cleanup_on_disk_full: bool = True
20
+
21
+ # Re-entrancy guard so emergency_cleanup() doesn't recurse if its own
22
+ # prune calls also hit disk-full.
23
+ _in_emergency_cleanup: bool = False
24
+
25
+ def is_storage_full_error(self, exc: BaseException) -> bool:
26
+ """Subclasses override to detect their driver-specific storage-exhausted errors
27
+ (SQLite "database or disk is full", Postgres disk_full, Redis OOM, etc.)."""
28
+ return False
29
+
30
+ async def emergency_cleanup(self) -> int:
31
+ """Aggressively free space: purge every terminal job + old metrics.
32
+ Returns the count of removed entries. Subclasses may override for
33
+ a more targeted sweep."""
34
+ total = 0
35
+ for status in ("completed", "failed", "cancelled"):
36
+ try:
37
+ total += await self.prune(status=status)
38
+ except Exception:
39
+ logger.exception("emergency_cleanup: prune(status=%s) failed", status)
40
+ try:
41
+ total += await self.prune_metrics(older_than_seconds=0)
42
+ except Exception:
43
+ logger.exception("emergency_cleanup: prune_metrics failed")
44
+ logger.warning("emergency_cleanup removed %d entries due to storage pressure", total)
45
+ return total
46
+
47
+ async def _with_disk_full_recovery(self, fn: Callable[[], Awaitable[Any]]) -> Any:
48
+ """Run an async write; on storage-full error, run emergency cleanup once and retry.
49
+ Drivers wrap their write paths with this helper."""
50
+ if not self.auto_cleanup_on_disk_full or self._in_emergency_cleanup:
51
+ return await fn()
52
+ try:
53
+ return await fn()
54
+ except Exception as e:
55
+ if not self.is_storage_full_error(e):
56
+ raise
57
+ logger.warning("Storage-full error caught: %s. Running emergency cleanup.", e)
58
+ self._in_emergency_cleanup = True
59
+ try:
60
+ await self.emergency_cleanup()
61
+ finally:
62
+ self._in_emergency_cleanup = False
63
+ return await fn()
64
+
65
+ @abstractmethod
66
+ async def connect(self) -> None: ...
67
+
68
+ @abstractmethod
69
+ async def disconnect(self) -> None: ...
70
+
71
+ # ── Push / Pop ──────────────────────────────────────────────
72
+
73
+ @abstractmethod
74
+ async def push(self, payload: JobPayload) -> str:
75
+ """Push a job onto the queue. Returns the job id."""
76
+ ...
77
+
78
+ @abstractmethod
79
+ async def push_many(self, payloads: list[JobPayload]) -> list[str]:
80
+ """Push many jobs at once (bulk insert)."""
81
+ ...
82
+
83
+ @abstractmethod
84
+ async def pop(self, queue: str) -> JobPayload | None:
85
+ """Pop the next available job from the queue."""
86
+ ...
87
+
88
+ @abstractmethod
89
+ async def pop_delayed(self) -> list[JobPayload]:
90
+ """Move delayed jobs whose delay has expired into their queues."""
91
+ ...
92
+
93
+ # ── Job lifecycle ───────────────────────────────────────────
94
+
95
+ @abstractmethod
96
+ async def complete(self, payload: JobPayload) -> None: ...
97
+
98
+ @abstractmethod
99
+ async def fail(self, payload: JobPayload, error: str) -> None: ...
100
+
101
+ @abstractmethod
102
+ async def release(self, payload: JobPayload, delay: float = 0) -> None:
103
+ """Release a job back onto the queue (for retries)."""
104
+ ...
105
+
106
+ @abstractmethod
107
+ async def delete(self, job_id: str) -> None: ...
108
+
109
+ # ── Query ───────────────────────────────────────────────────
110
+
111
+ @abstractmethod
112
+ async def get_job(self, job_id: str) -> JobPayload | None: ...
113
+
114
+ @abstractmethod
115
+ async def get_jobs(
116
+ self,
117
+ queue: str | None = None,
118
+ status: str | None = None,
119
+ tag: str | None = None,
120
+ batch_id: str | None = None,
121
+ offset: int = 0,
122
+ limit: int = 50,
123
+ created_from: float | None = None,
124
+ created_to: float | None = None,
125
+ ) -> list[JobPayload]: ...
126
+
127
+ @abstractmethod
128
+ async def count_jobs(
129
+ self,
130
+ queue: str | None = None,
131
+ status: str | None = None,
132
+ created_from: float | None = None,
133
+ created_to: float | None = None,
134
+ ) -> int:
135
+ """Count jobs matching filters. Drivers MUST implement this efficiently —
136
+ do not fall back to loading all rows."""
137
+ ...
138
+
139
+ @abstractmethod
140
+ async def size(self, queue: str) -> int: ...
141
+
142
+ @abstractmethod
143
+ async def queues(self) -> list[str]: ...
144
+
145
+ # ── Metrics ─────────────────────────────────────────────────
146
+
147
+ @abstractmethod
148
+ async def record_metric(self, queue: str, metric: str, value: float) -> None: ...
149
+
150
+ @abstractmethod
151
+ async def get_metrics(self, queue: str | None = None) -> dict[str, Any]: ...
152
+
153
+ @abstractmethod
154
+ async def report_supervisor(self, stats: dict[str, Any]) -> None:
155
+ """Persist a supervisor snapshot for dashboard worker monitoring."""
156
+ ...
157
+
158
+ @abstractmethod
159
+ async def get_supervisor_stats(self, stale_after: float = 10.0) -> list[dict[str, Any]]:
160
+ """Return active supervisor snapshots (recent heartbeat only)."""
161
+ ...
162
+
163
+ # ── Batch helpers ───────────────────────────────────────────
164
+
165
+ @abstractmethod
166
+ async def store_batch(self, batch_id: str, data: dict[str, Any]) -> None: ...
167
+
168
+ @abstractmethod
169
+ async def get_batch(self, batch_id: str) -> dict[str, Any] | None: ...
170
+
171
+ @abstractmethod
172
+ async def update_batch(self, batch_id: str, data: dict[str, Any]) -> None: ...
173
+
174
+ @abstractmethod
175
+ async def increment_batch_counter(
176
+ self, batch_id: str, field: str, delta: int = 1,
177
+ ) -> dict[str, Any] | None:
178
+ """Atomically increment data[field] on a batch. Returns the post-increment
179
+ batch dict, or None if the batch is missing. Implementations MUST be safe
180
+ against concurrent writers (no read-modify-write in Python)."""
181
+ ...
182
+
183
+ # ── Pruning ─────────────────────────────────────────────────
184
+
185
+ @abstractmethod
186
+ async def prune(
187
+ self,
188
+ status: str | None = None,
189
+ tag: str | None = None,
190
+ older_than_seconds: float | None = None,
191
+ queue: str | None = None,
192
+ ) -> int:
193
+ """Delete matching jobs. Returns count of pruned jobs."""
194
+ ...
195
+
196
+ @abstractmethod
197
+ async def flush(self, queue: str | None = None) -> None:
198
+ """Remove all jobs (optionally for a specific queue)."""
199
+ ...
200
+
201
+ @abstractmethod
202
+ async def prune_metrics(self, older_than_seconds: float) -> int:
203
+ """Delete metric entries older than the cutoff. Returns count pruned."""
204
+ ...
205
+
206
+ @abstractmethod
207
+ async def recent_throughput(
208
+ self, seconds: int = 60, queue: str | None = None,
209
+ ) -> dict[str, int]:
210
+ """Count processing/completed/failed events recorded in the last `seconds`.
211
+ Returns a dict like {"processing": N, "completed": N, "failed": N}."""
212
+ ...
@@ -0,0 +1,318 @@
1
+ """In-memory driver for testing and development."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections import defaultdict
7
+ from typing import Any
8
+
9
+ from baqueue.drivers.base import BaseDriver
10
+ from baqueue.serializer import JobPayload, _now_ts
11
+
12
+
13
+ class MemoryDriver(BaseDriver):
14
+ """Stores everything in-memory. Ideal for tests and local dev."""
15
+
16
+ def __init__(self) -> None:
17
+ self._jobs: dict[str, JobPayload] = {}
18
+ self._queues: dict[str, list[str]] = defaultdict(list)
19
+ self._delayed: list[str] = []
20
+ self._batches: dict[str, dict[str, Any]] = {}
21
+ self._metrics: dict[str, list[dict[str, Any]]] = defaultdict(list)
22
+ self._supervisors: dict[str, dict[str, Any]] = {}
23
+ self._lock = asyncio.Lock()
24
+
25
+ async def connect(self) -> None:
26
+ pass
27
+
28
+ async def disconnect(self) -> None:
29
+ pass
30
+
31
+ # ── Push / Pop ──────────────────────────────────────────────
32
+
33
+ async def push(self, payload: JobPayload) -> str:
34
+ async with self._lock:
35
+ payload.status = "pending"
36
+ payload.updated_at = _now_ts()
37
+ self._jobs[payload.id] = payload
38
+ if payload.delay_until and payload.delay_until > _now_ts():
39
+ self._delayed.append(payload.id)
40
+ else:
41
+ self._queues[payload.queue].append(payload.id)
42
+ return payload.id
43
+
44
+ async def push_many(self, payloads: list[JobPayload]) -> list[str]:
45
+ ids = []
46
+ for p in payloads:
47
+ ids.append(await self.push(p))
48
+ return ids
49
+
50
+ async def pop(self, queue: str) -> JobPayload | None:
51
+ async with self._lock:
52
+ q = self._queues.get(queue, [])
53
+ while q:
54
+ job_id = q.pop(0)
55
+ payload = self._jobs.get(job_id)
56
+ if payload and payload.status == "pending":
57
+ payload.status = "processing"
58
+ payload.started_at = _now_ts()
59
+ payload.updated_at = _now_ts()
60
+ payload.attempts += 1
61
+ return payload
62
+ return None
63
+
64
+ async def pop_delayed(self) -> list[JobPayload]:
65
+ now = _now_ts()
66
+ moved: list[JobPayload] = []
67
+ async with self._lock:
68
+ still_delayed = []
69
+ for job_id in self._delayed:
70
+ payload = self._jobs.get(job_id)
71
+ if not payload:
72
+ continue
73
+ if payload.delay_until and payload.delay_until <= now:
74
+ payload.delay_until = None
75
+ self._queues[payload.queue].append(job_id)
76
+ moved.append(payload)
77
+ else:
78
+ still_delayed.append(job_id)
79
+ self._delayed = still_delayed
80
+ return moved
81
+
82
+ # ── Job lifecycle ───────────────────────────────────────────
83
+
84
+ async def complete(self, payload: JobPayload) -> None:
85
+ async with self._lock:
86
+ payload.status = "completed"
87
+ payload.completed_at = _now_ts()
88
+ payload.updated_at = _now_ts()
89
+ self._jobs[payload.id] = payload
90
+
91
+ async def fail(self, payload: JobPayload, error: str) -> None:
92
+ async with self._lock:
93
+ payload.status = "failed"
94
+ payload.failed_at = _now_ts()
95
+ payload.updated_at = _now_ts()
96
+ payload.error = error
97
+ self._jobs[payload.id] = payload
98
+
99
+ async def release(self, payload: JobPayload, delay: float = 0) -> None:
100
+ async with self._lock:
101
+ payload.status = "pending"
102
+ payload.updated_at = _now_ts()
103
+ if delay > 0:
104
+ payload.delay_until = _now_ts() + delay
105
+ self._delayed.append(payload.id)
106
+ else:
107
+ self._queues[payload.queue].append(payload.id)
108
+ self._jobs[payload.id] = payload
109
+
110
+ async def delete(self, job_id: str) -> None:
111
+ async with self._lock:
112
+ self._jobs.pop(job_id, None)
113
+ for q in self._queues.values():
114
+ if job_id in q:
115
+ q.remove(job_id)
116
+ if job_id in self._delayed:
117
+ self._delayed.remove(job_id)
118
+
119
+ # ── Query ───────────────────────────────────────────────────
120
+
121
+ async def get_job(self, job_id: str) -> JobPayload | None:
122
+ return self._jobs.get(job_id)
123
+
124
+ async def get_jobs(
125
+ self,
126
+ queue: str | None = None,
127
+ status: str | None = None,
128
+ tag: str | None = None,
129
+ batch_id: str | None = None,
130
+ offset: int = 0,
131
+ limit: int = 50,
132
+ created_from: float | None = None,
133
+ created_to: float | None = None,
134
+ ) -> list[JobPayload]:
135
+ results = list(self._jobs.values())
136
+ if queue:
137
+ results = [j for j in results if j.queue == queue]
138
+ if status:
139
+ results = [j for j in results if j.status == status]
140
+ if tag:
141
+ results = [j for j in results if tag in j.tags]
142
+ if batch_id:
143
+ results = [j for j in results if j.batch_id == batch_id]
144
+ if created_from is not None:
145
+ results = [j for j in results if j.created_at >= created_from]
146
+ if created_to is not None:
147
+ results = [j for j in results if j.created_at <= created_to]
148
+ results.sort(key=lambda j: j.created_at, reverse=True)
149
+ return results[offset : offset + limit]
150
+
151
+ async def count_jobs(
152
+ self,
153
+ queue: str | None = None,
154
+ status: str | None = None,
155
+ created_from: float | None = None,
156
+ created_to: float | None = None,
157
+ ) -> int:
158
+ n = 0
159
+ for j in self._jobs.values():
160
+ if queue and j.queue != queue:
161
+ continue
162
+ if status and j.status != status:
163
+ continue
164
+ if created_from is not None and j.created_at < created_from:
165
+ continue
166
+ if created_to is not None and j.created_at > created_to:
167
+ continue
168
+ n += 1
169
+ return n
170
+
171
+ async def size(self, queue: str) -> int:
172
+ return len(self._queues.get(queue, []))
173
+
174
+ async def queues(self) -> list[str]:
175
+ all_queues = set(self._queues.keys())
176
+ for j in self._jobs.values():
177
+ all_queues.add(j.queue)
178
+ return sorted(all_queues)
179
+
180
+ # ── Metrics ─────────────────────────────────────────────────
181
+
182
+ async def record_metric(self, queue: str, metric: str, value: float) -> None:
183
+ self._metrics[queue].append(
184
+ {"metric": metric, "value": value, "time": _now_ts()}
185
+ )
186
+
187
+ async def get_metrics(self, queue: str | None = None) -> dict[str, Any]:
188
+ """Live status counts from the jobs dict — never from the metrics event log."""
189
+ if queue:
190
+ queues_list = [queue]
191
+ else:
192
+ seen: set[str] = set(self._queues.keys())
193
+ for j in self._jobs.values():
194
+ seen.add(j.queue)
195
+ queues_list = sorted(seen)
196
+
197
+ result: dict[str, Any] = {
198
+ q: {"pending": 0, "processing": 0, "completed": 0, "failed": 0}
199
+ for q in queues_list
200
+ }
201
+ for j in self._jobs.values():
202
+ counts = result.get(j.queue)
203
+ if counts is None:
204
+ continue
205
+ if j.status in counts:
206
+ counts[j.status] += 1
207
+ return result
208
+
209
+ async def report_supervisor(self, stats: dict[str, Any]) -> None:
210
+ now = _now_ts()
211
+ name = str(stats.get("name", "")).strip()
212
+ if not name:
213
+ return
214
+ async with self._lock:
215
+ self._supervisors[name] = {
216
+ "data": dict(stats),
217
+ "heartbeat_at": now,
218
+ }
219
+
220
+ async def get_supervisor_stats(self, stale_after: float = 10.0) -> list[dict[str, Any]]:
221
+ cutoff = _now_ts() - stale_after
222
+ out: list[dict[str, Any]] = []
223
+ async with self._lock:
224
+ for name in sorted(self._supervisors):
225
+ item = self._supervisors[name]
226
+ if item.get("heartbeat_at", 0.0) < cutoff:
227
+ continue
228
+ data = dict(item.get("data", {}))
229
+ if not data.get("running", False):
230
+ continue
231
+ out.append(data)
232
+ return out
233
+
234
+ # ── Batch helpers ───────────────────────────────────────────
235
+
236
+ async def store_batch(self, batch_id: str, data: dict[str, Any]) -> None:
237
+ self._batches[batch_id] = data
238
+
239
+ async def get_batch(self, batch_id: str) -> dict[str, Any] | None:
240
+ return self._batches.get(batch_id)
241
+
242
+ async def update_batch(self, batch_id: str, data: dict[str, Any]) -> None:
243
+ if batch_id in self._batches:
244
+ self._batches[batch_id].update(data)
245
+
246
+ async def increment_batch_counter(
247
+ self, batch_id: str, field: str, delta: int = 1,
248
+ ) -> dict[str, Any] | None:
249
+ async with self._lock:
250
+ batch = self._batches.get(batch_id)
251
+ if batch is None:
252
+ return None
253
+ batch[field] = batch.get(field, 0) + delta
254
+ return dict(batch)
255
+
256
+ # ── Pruning ─────────────────────────────────────────────────
257
+
258
+ async def prune(
259
+ self,
260
+ status: str | None = None,
261
+ tag: str | None = None,
262
+ older_than_seconds: float | None = None,
263
+ queue: str | None = None,
264
+ ) -> int:
265
+ now = _now_ts()
266
+ to_delete: list[str] = []
267
+ for job_id, job in self._jobs.items():
268
+ if queue and job.queue != queue:
269
+ continue
270
+ if status and job.status != status:
271
+ continue
272
+ if tag and tag not in job.tags:
273
+ continue
274
+ if older_than_seconds and (now - job.updated_at) < older_than_seconds:
275
+ continue
276
+ to_delete.append(job_id)
277
+ for job_id in to_delete:
278
+ await self.delete(job_id)
279
+ return len(to_delete)
280
+
281
+ async def prune_metrics(self, older_than_seconds: float) -> int:
282
+ cutoff = _now_ts() - older_than_seconds
283
+ removed = 0
284
+ async with self._lock:
285
+ for q, entries in list(self._metrics.items()):
286
+ kept = [e for e in entries if e.get("time", 0) >= cutoff]
287
+ removed += len(entries) - len(kept)
288
+ self._metrics[q] = kept
289
+ return removed
290
+
291
+ async def recent_throughput(
292
+ self, seconds: int = 60, queue: str | None = None,
293
+ ) -> dict[str, int]:
294
+ cutoff = _now_ts() - seconds
295
+ result = {"processing": 0, "completed": 0, "failed": 0}
296
+ queues = [queue] if queue else list(self._metrics.keys())
297
+ for q in queues:
298
+ for e in self._metrics.get(q, []):
299
+ if e.get("time", 0) < cutoff:
300
+ continue
301
+ m = e.get("metric")
302
+ if m in result:
303
+ result[m] += 1
304
+ return result
305
+
306
+ async def flush(self, queue: str | None = None) -> None:
307
+ if queue:
308
+ ids = [jid for jid, j in self._jobs.items() if j.queue == queue]
309
+ for jid in ids:
310
+ del self._jobs[jid]
311
+ self._queues.pop(queue, None)
312
+ else:
313
+ self._jobs.clear()
314
+ self._queues.clear()
315
+ self._delayed.clear()
316
+ self._batches.clear()
317
+ self._metrics.clear()
318
+ self._supervisors.clear()