baqueue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- baqueue/__init__.py +19 -0
- baqueue/balancer.py +108 -0
- baqueue/batch.py +159 -0
- baqueue/cli.py +459 -0
- baqueue/config.py +79 -0
- baqueue/dashboard/__init__.py +1 -0
- baqueue/dashboard/api.py +193 -0
- baqueue/dashboard/server.py +263 -0
- baqueue/dashboard/static/app.js +450 -0
- baqueue/dashboard/static/index.html +580 -0
- baqueue/dashboard/static/style.css +1415 -0
- baqueue/drivers/__init__.py +1 -0
- baqueue/drivers/base.py +212 -0
- baqueue/drivers/memory_driver.py +318 -0
- baqueue/drivers/postgres_driver.py +656 -0
- baqueue/drivers/redis_driver.py +656 -0
- baqueue/drivers/sqlite_driver.py +706 -0
- baqueue/events.py +64 -0
- baqueue/job.py +128 -0
- baqueue/pruner.py +128 -0
- baqueue/queue.py +225 -0
- baqueue/retry.py +55 -0
- baqueue/scheduler.py +101 -0
- baqueue/serializer.py +124 -0
- baqueue/supervisor.py +206 -0
- baqueue/worker.py +165 -0
- baqueue-0.1.0.dist-info/METADATA +609 -0
- baqueue-0.1.0.dist-info/RECORD +32 -0
- baqueue-0.1.0.dist-info/WHEEL +5 -0
- baqueue-0.1.0.dist-info/entry_points.txt +2 -0
- baqueue-0.1.0.dist-info/licenses/LICENSE +21 -0
- baqueue-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
"""SQLite driver for BaQueue - zero-dependency cross-process local storage.
|
|
2
|
+
|
|
3
|
+
Perfect for development and testing: works across multiple processes
|
|
4
|
+
without needing Redis or PostgreSQL.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sqlite3
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from baqueue.drivers.base import BaseDriver
|
|
16
|
+
from baqueue.serializer import JobPayload, _now_ts
|
|
17
|
+
|
|
18
|
+
DEFAULT_DB_PATH = ".baqueue.db"
|
|
19
|
+
MAX_RETRIES = 5
|
|
20
|
+
RETRY_BASE_DELAY = 0.05
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("baqueue.sqlite")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SqliteDriver(BaseDriver):
|
|
26
|
+
"""SQLite-backed driver. Data is stored in a local file so multiple
|
|
27
|
+
processes (dashboard, workers, dispatchers) can share the same queue."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, path: str = DEFAULT_DB_PATH, **kwargs: Any):
|
|
30
|
+
self._path = path
|
|
31
|
+
self._conn: sqlite3.Connection | None = None
|
|
32
|
+
self._lock = asyncio.Lock()
|
|
33
|
+
|
|
34
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
35
|
+
if self._conn is None:
|
|
36
|
+
raise RuntimeError("Driver not connected. Call connect() first.")
|
|
37
|
+
return self._conn
|
|
38
|
+
|
|
39
|
+
async def connect(self) -> None:
|
|
40
|
+
self._conn = sqlite3.connect(self._path, check_same_thread=False, timeout=30)
|
|
41
|
+
self._conn.row_factory = sqlite3.Row
|
|
42
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
43
|
+
self._conn.execute("PRAGMA busy_timeout=15000")
|
|
44
|
+
self._conn.execute("PRAGMA synchronous=NORMAL")
|
|
45
|
+
self._conn.execute("PRAGMA wal_autocheckpoint=1000")
|
|
46
|
+
self._ensure_tables()
|
|
47
|
+
|
|
48
|
+
async def disconnect(self) -> None:
|
|
49
|
+
if self._conn:
|
|
50
|
+
self._conn.close()
|
|
51
|
+
self._conn = None
|
|
52
|
+
|
|
53
|
+
def is_storage_full_error(self, exc: BaseException) -> bool:
|
|
54
|
+
if not isinstance(exc, sqlite3.OperationalError):
|
|
55
|
+
return False
|
|
56
|
+
msg = str(exc).lower()
|
|
57
|
+
# SQLite raises e.g. "database or disk is full".
|
|
58
|
+
return (
|
|
59
|
+
"database or disk is full" in msg
|
|
60
|
+
or "disk full" in msg
|
|
61
|
+
or "no space" in msg
|
|
62
|
+
or "out of memory" in msg
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
async def emergency_cleanup(self) -> int:
|
|
66
|
+
"""Free SQLite space without going through public prune methods.
|
|
67
|
+
|
|
68
|
+
SQLite write methods call _execute_with_retry() while holding _lock. The
|
|
69
|
+
disk-full recovery path therefore uses _emergency_cleanup_locked() so it
|
|
70
|
+
does not try to re-acquire the same asyncio.Lock and deadlock.
|
|
71
|
+
"""
|
|
72
|
+
async with self._lock:
|
|
73
|
+
return await self._emergency_cleanup_locked()
|
|
74
|
+
|
|
75
|
+
async def _emergency_cleanup_locked(self) -> int:
|
|
76
|
+
def _do() -> int:
|
|
77
|
+
c = self._get_conn()
|
|
78
|
+
total = 0
|
|
79
|
+
for status in ("completed", "failed", "cancelled"):
|
|
80
|
+
cur = c.execute("DELETE FROM jobs WHERE status=?", (status,))
|
|
81
|
+
total += cur.rowcount
|
|
82
|
+
cur = c.execute("DELETE FROM metrics WHERE recorded_at < ?", (_now_ts(),))
|
|
83
|
+
total += cur.rowcount
|
|
84
|
+
c.commit()
|
|
85
|
+
return total
|
|
86
|
+
|
|
87
|
+
total = await asyncio.to_thread(_do)
|
|
88
|
+
logger.warning("emergency_cleanup removed %d entries due to storage pressure", total)
|
|
89
|
+
return total
|
|
90
|
+
|
|
91
|
+
async def _execute_with_retry(self, fn):
|
|
92
|
+
"""Execute a database operation with retry on 'database is locked'.
|
|
93
|
+
|
|
94
|
+
The caller holds self._lock. Storage-full recovery must stay inside that
|
|
95
|
+
lock and use raw SQL cleanup to avoid re-entering public prune methods.
|
|
96
|
+
"""
|
|
97
|
+
async def _attempt():
|
|
98
|
+
for attempt in range(MAX_RETRIES):
|
|
99
|
+
try:
|
|
100
|
+
return await asyncio.to_thread(fn)
|
|
101
|
+
except sqlite3.OperationalError as e:
|
|
102
|
+
if "locked" in str(e) and attempt < MAX_RETRIES - 1:
|
|
103
|
+
delay = RETRY_BASE_DELAY * (2 ** attempt)
|
|
104
|
+
logger.debug("Database locked, retry %d/%d in %.2fs", attempt + 1, MAX_RETRIES, delay)
|
|
105
|
+
await asyncio.sleep(delay)
|
|
106
|
+
else:
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
if not self.auto_cleanup_on_disk_full or self._in_emergency_cleanup:
|
|
110
|
+
return await _attempt()
|
|
111
|
+
try:
|
|
112
|
+
return await _attempt()
|
|
113
|
+
except Exception as e:
|
|
114
|
+
if not self.is_storage_full_error(e):
|
|
115
|
+
raise
|
|
116
|
+
logger.warning("Storage-full error caught: %s. Running emergency cleanup.", e)
|
|
117
|
+
self._in_emergency_cleanup = True
|
|
118
|
+
try:
|
|
119
|
+
await self._emergency_cleanup_locked()
|
|
120
|
+
finally:
|
|
121
|
+
self._in_emergency_cleanup = False
|
|
122
|
+
return await _attempt()
|
|
123
|
+
|
|
124
|
+
async def _run(self, fn):
|
|
125
|
+
"""Run a sync read-only sqlite call off the event loop.
|
|
126
|
+
|
|
127
|
+
The lock is required because Python's sqlite3 module does not allow
|
|
128
|
+
concurrent use of a single Connection across threads — even with
|
|
129
|
+
check_same_thread=False. Two parallel `to_thread` callbacks hitting the
|
|
130
|
+
same connection raise `InterfaceError: bad parameter or other API misuse`."""
|
|
131
|
+
async with self._lock:
|
|
132
|
+
return await asyncio.to_thread(fn)
|
|
133
|
+
|
|
134
|
+
def _ensure_tables(self) -> None:
|
|
135
|
+
c = self._get_conn()
|
|
136
|
+
c.executescript("""
|
|
137
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
|
138
|
+
id TEXT PRIMARY KEY,
|
|
139
|
+
job_class TEXT NOT NULL,
|
|
140
|
+
data TEXT NOT NULL DEFAULT '{}',
|
|
141
|
+
queue TEXT NOT NULL DEFAULT 'default',
|
|
142
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
143
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
144
|
+
max_attempts INTEGER NOT NULL DEFAULT 3,
|
|
145
|
+
backoff TEXT NOT NULL DEFAULT 'exponential',
|
|
146
|
+
timeout INTEGER NOT NULL DEFAULT 60,
|
|
147
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
148
|
+
batch_id TEXT,
|
|
149
|
+
delay_until REAL,
|
|
150
|
+
error TEXT,
|
|
151
|
+
created_at REAL NOT NULL,
|
|
152
|
+
updated_at REAL NOT NULL,
|
|
153
|
+
started_at REAL,
|
|
154
|
+
completed_at REAL,
|
|
155
|
+
failed_at REAL
|
|
156
|
+
);
|
|
157
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_queue_status_created
|
|
158
|
+
ON jobs (queue, status, created_at DESC);
|
|
159
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs (created_at DESC);
|
|
160
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_status_created ON jobs (status, created_at DESC);
|
|
161
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_delay ON jobs (delay_until) WHERE delay_until IS NOT NULL;
|
|
162
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_batch ON jobs (batch_id) WHERE batch_id IS NOT NULL;
|
|
163
|
+
DROP INDEX IF EXISTS idx_jobs_queue_status;
|
|
164
|
+
|
|
165
|
+
CREATE TABLE IF NOT EXISTS batches (
|
|
166
|
+
id TEXT PRIMARY KEY,
|
|
167
|
+
data TEXT NOT NULL DEFAULT '{}'
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
CREATE TABLE IF NOT EXISTS metrics (
|
|
171
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
172
|
+
queue TEXT NOT NULL,
|
|
173
|
+
metric TEXT NOT NULL,
|
|
174
|
+
value REAL NOT NULL,
|
|
175
|
+
recorded_at REAL NOT NULL
|
|
176
|
+
);
|
|
177
|
+
CREATE INDEX IF NOT EXISTS idx_metrics_queue_metric ON metrics (queue, metric);
|
|
178
|
+
CREATE INDEX IF NOT EXISTS idx_metrics_recorded_at ON metrics (recorded_at);
|
|
179
|
+
|
|
180
|
+
CREATE TABLE IF NOT EXISTS supervisors (
|
|
181
|
+
name TEXT PRIMARY KEY,
|
|
182
|
+
data TEXT NOT NULL DEFAULT '{}',
|
|
183
|
+
heartbeat_at REAL NOT NULL
|
|
184
|
+
);
|
|
185
|
+
CREATE INDEX IF NOT EXISTS idx_supervisors_heartbeat ON supervisors (heartbeat_at);
|
|
186
|
+
""")
|
|
187
|
+
|
|
188
|
+
def _row_to_payload(self, row: sqlite3.Row) -> JobPayload:
|
|
189
|
+
backoff: str | list[int] = row["backoff"]
|
|
190
|
+
try:
|
|
191
|
+
parsed = json.loads(backoff)
|
|
192
|
+
if isinstance(parsed, list):
|
|
193
|
+
backoff = parsed
|
|
194
|
+
except (json.JSONDecodeError, TypeError):
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
tags = json.loads(row["tags"]) if row["tags"] else []
|
|
198
|
+
|
|
199
|
+
return JobPayload(
|
|
200
|
+
id=row["id"],
|
|
201
|
+
job_class=row["job_class"],
|
|
202
|
+
data=json.loads(row["data"]) if isinstance(row["data"], str) else row["data"],
|
|
203
|
+
queue=row["queue"],
|
|
204
|
+
status=row["status"],
|
|
205
|
+
attempts=row["attempts"],
|
|
206
|
+
max_attempts=row["max_attempts"],
|
|
207
|
+
backoff=backoff,
|
|
208
|
+
timeout=row["timeout"],
|
|
209
|
+
tags=tags,
|
|
210
|
+
batch_id=row["batch_id"],
|
|
211
|
+
delay_until=row["delay_until"],
|
|
212
|
+
error=row["error"],
|
|
213
|
+
created_at=row["created_at"],
|
|
214
|
+
updated_at=row["updated_at"],
|
|
215
|
+
started_at=row["started_at"],
|
|
216
|
+
completed_at=row["completed_at"],
|
|
217
|
+
failed_at=row["failed_at"],
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# ── Push / Pop ──────────────────────────────────────────────
|
|
221
|
+
|
|
222
|
+
async def push(self, payload: JobPayload) -> str:
|
|
223
|
+
payload.status = "pending"
|
|
224
|
+
payload.updated_at = _now_ts()
|
|
225
|
+
backoff_str = json.dumps(payload.backoff) if isinstance(payload.backoff, list) else payload.backoff
|
|
226
|
+
params = (payload.id, payload.job_class, json.dumps(payload.data),
|
|
227
|
+
payload.queue, payload.status, payload.attempts, payload.max_attempts,
|
|
228
|
+
backoff_str, payload.timeout, json.dumps(payload.tags),
|
|
229
|
+
payload.batch_id, payload.delay_until,
|
|
230
|
+
payload.created_at, payload.updated_at)
|
|
231
|
+
|
|
232
|
+
async with self._lock:
|
|
233
|
+
def _do():
|
|
234
|
+
c = self._get_conn()
|
|
235
|
+
c.execute(
|
|
236
|
+
"""INSERT OR REPLACE INTO jobs
|
|
237
|
+
(id, job_class, data, queue, status, attempts, max_attempts,
|
|
238
|
+
backoff, timeout, tags, batch_id, delay_until, created_at, updated_at)
|
|
239
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", params)
|
|
240
|
+
c.commit()
|
|
241
|
+
await self._execute_with_retry(_do)
|
|
242
|
+
return payload.id
|
|
243
|
+
|
|
244
|
+
async def push_many(self, payloads: list[JobPayload]) -> list[str]:
|
|
245
|
+
now = _now_ts()
|
|
246
|
+
ids = []
|
|
247
|
+
rows = []
|
|
248
|
+
for p in payloads:
|
|
249
|
+
p.status = "pending"
|
|
250
|
+
p.updated_at = now
|
|
251
|
+
backoff_str = json.dumps(p.backoff) if isinstance(p.backoff, list) else p.backoff
|
|
252
|
+
rows.append((p.id, p.job_class, json.dumps(p.data),
|
|
253
|
+
p.queue, p.status, p.attempts, p.max_attempts,
|
|
254
|
+
backoff_str, p.timeout, json.dumps(p.tags),
|
|
255
|
+
p.batch_id, p.delay_until, p.created_at, p.updated_at))
|
|
256
|
+
ids.append(p.id)
|
|
257
|
+
|
|
258
|
+
async with self._lock:
|
|
259
|
+
def _do():
|
|
260
|
+
c = self._get_conn()
|
|
261
|
+
c.executemany(
|
|
262
|
+
"""INSERT OR REPLACE INTO jobs
|
|
263
|
+
(id, job_class, data, queue, status, attempts, max_attempts,
|
|
264
|
+
backoff, timeout, tags, batch_id, delay_until, created_at, updated_at)
|
|
265
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", rows)
|
|
266
|
+
c.commit()
|
|
267
|
+
await self._execute_with_retry(_do)
|
|
268
|
+
return ids
|
|
269
|
+
|
|
270
|
+
async def pop(self, queue: str) -> JobPayload | None:
|
|
271
|
+
now = _now_ts()
|
|
272
|
+
async with self._lock:
|
|
273
|
+
result: list[JobPayload | None] = [None]
|
|
274
|
+
def _do():
|
|
275
|
+
c = self._get_conn()
|
|
276
|
+
# Atomic claim: SELECT then UPDATE … WHERE status='pending' guards
|
|
277
|
+
# against another process having grabbed the row between the two
|
|
278
|
+
# statements. Bounded retry so we don't livelock under contention.
|
|
279
|
+
for _ in range(5):
|
|
280
|
+
row = c.execute(
|
|
281
|
+
"""SELECT * FROM jobs
|
|
282
|
+
WHERE queue=? AND status='pending'
|
|
283
|
+
AND (delay_until IS NULL OR delay_until <= ?)
|
|
284
|
+
ORDER BY created_at ASC LIMIT 1""",
|
|
285
|
+
(queue, now),
|
|
286
|
+
).fetchone()
|
|
287
|
+
if not row:
|
|
288
|
+
c.commit()
|
|
289
|
+
return
|
|
290
|
+
cur = c.execute(
|
|
291
|
+
"""UPDATE jobs
|
|
292
|
+
SET status='processing', started_at=?, updated_at=?, attempts=attempts+1
|
|
293
|
+
WHERE id=? AND status='pending'""",
|
|
294
|
+
(now, now, row["id"]),
|
|
295
|
+
)
|
|
296
|
+
c.commit()
|
|
297
|
+
if cur.rowcount == 1:
|
|
298
|
+
payload = self._row_to_payload(row)
|
|
299
|
+
payload.status = "processing"
|
|
300
|
+
payload.started_at = now
|
|
301
|
+
payload.updated_at = now
|
|
302
|
+
payload.attempts += 1
|
|
303
|
+
result[0] = payload
|
|
304
|
+
return
|
|
305
|
+
await self._execute_with_retry(_do)
|
|
306
|
+
return result[0]
|
|
307
|
+
|
|
308
|
+
async def pop_delayed(self) -> list[JobPayload]:
|
|
309
|
+
now = _now_ts()
|
|
310
|
+
async with self._lock:
|
|
311
|
+
results = []
|
|
312
|
+
def _do():
|
|
313
|
+
c = self._get_conn()
|
|
314
|
+
rows = c.execute(
|
|
315
|
+
"SELECT * FROM jobs WHERE status='pending' AND delay_until IS NOT NULL AND delay_until <= ?",
|
|
316
|
+
(now,),
|
|
317
|
+
).fetchall()
|
|
318
|
+
if rows:
|
|
319
|
+
c.execute(
|
|
320
|
+
"UPDATE jobs SET delay_until=NULL, updated_at=? WHERE status='pending' AND delay_until IS NOT NULL AND delay_until <= ?",
|
|
321
|
+
(now, now),
|
|
322
|
+
)
|
|
323
|
+
c.commit()
|
|
324
|
+
results.extend([self._row_to_payload(r) for r in rows])
|
|
325
|
+
await self._execute_with_retry(_do)
|
|
326
|
+
return results
|
|
327
|
+
|
|
328
|
+
# ── Job lifecycle ───────────────────────────────────────────
|
|
329
|
+
|
|
330
|
+
async def complete(self, payload: JobPayload) -> None:
|
|
331
|
+
now = _now_ts()
|
|
332
|
+
async with self._lock:
|
|
333
|
+
def _do():
|
|
334
|
+
c = self._get_conn()
|
|
335
|
+
c.execute(
|
|
336
|
+
"UPDATE jobs SET status='completed', completed_at=?, updated_at=? WHERE id=?",
|
|
337
|
+
(now, now, payload.id),
|
|
338
|
+
)
|
|
339
|
+
c.commit()
|
|
340
|
+
await self._execute_with_retry(_do)
|
|
341
|
+
payload.status = "completed"
|
|
342
|
+
payload.completed_at = now
|
|
343
|
+
|
|
344
|
+
async def fail(self, payload: JobPayload, error: str) -> None:
|
|
345
|
+
now = _now_ts()
|
|
346
|
+
async with self._lock:
|
|
347
|
+
def _do():
|
|
348
|
+
c = self._get_conn()
|
|
349
|
+
c.execute(
|
|
350
|
+
"UPDATE jobs SET status='failed', failed_at=?, updated_at=?, error=? WHERE id=?",
|
|
351
|
+
(now, now, error, payload.id),
|
|
352
|
+
)
|
|
353
|
+
c.commit()
|
|
354
|
+
await self._execute_with_retry(_do)
|
|
355
|
+
payload.status = "failed"
|
|
356
|
+
payload.failed_at = now
|
|
357
|
+
payload.error = error
|
|
358
|
+
|
|
359
|
+
async def release(self, payload: JobPayload, delay: float = 0) -> None:
|
|
360
|
+
now = _now_ts()
|
|
361
|
+
delay_until = now + delay if delay > 0 else None
|
|
362
|
+
async with self._lock:
|
|
363
|
+
def _do():
|
|
364
|
+
c = self._get_conn()
|
|
365
|
+
c.execute(
|
|
366
|
+
"UPDATE jobs SET status='pending', updated_at=?, delay_until=? WHERE id=?",
|
|
367
|
+
(now, delay_until, payload.id),
|
|
368
|
+
)
|
|
369
|
+
c.commit()
|
|
370
|
+
await self._execute_with_retry(_do)
|
|
371
|
+
|
|
372
|
+
async def delete(self, job_id: str) -> None:
|
|
373
|
+
async with self._lock:
|
|
374
|
+
def _do():
|
|
375
|
+
c = self._get_conn()
|
|
376
|
+
c.execute("DELETE FROM jobs WHERE id=?", (job_id,))
|
|
377
|
+
c.commit()
|
|
378
|
+
await self._execute_with_retry(_do)
|
|
379
|
+
|
|
380
|
+
# ── Query ───────────────────────────────────────────────────
|
|
381
|
+
|
|
382
|
+
async def get_job(self, job_id: str) -> JobPayload | None:
|
|
383
|
+
def _do():
|
|
384
|
+
return self._get_conn().execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone()
|
|
385
|
+
row = await self._run(_do)
|
|
386
|
+
return self._row_to_payload(row) if row else None
|
|
387
|
+
|
|
388
|
+
def _build_where(
|
|
389
|
+
self,
|
|
390
|
+
queue: str | None = None,
|
|
391
|
+
status: str | None = None,
|
|
392
|
+
tag: str | None = None,
|
|
393
|
+
batch_id: str | None = None,
|
|
394
|
+
created_from: float | None = None,
|
|
395
|
+
created_to: float | None = None,
|
|
396
|
+
) -> tuple[str, list[Any]]:
|
|
397
|
+
conditions: list[str] = []
|
|
398
|
+
params: list[Any] = []
|
|
399
|
+
if queue:
|
|
400
|
+
conditions.append("queue=?")
|
|
401
|
+
params.append(queue)
|
|
402
|
+
if status:
|
|
403
|
+
conditions.append("status=?")
|
|
404
|
+
params.append(status)
|
|
405
|
+
if tag:
|
|
406
|
+
conditions.append("tags LIKE ?")
|
|
407
|
+
params.append(f'%"{tag}"%')
|
|
408
|
+
if batch_id:
|
|
409
|
+
conditions.append("batch_id=?")
|
|
410
|
+
params.append(batch_id)
|
|
411
|
+
if created_from is not None:
|
|
412
|
+
conditions.append("created_at >= ?")
|
|
413
|
+
params.append(created_from)
|
|
414
|
+
if created_to is not None:
|
|
415
|
+
conditions.append("created_at <= ?")
|
|
416
|
+
params.append(created_to)
|
|
417
|
+
where = " AND ".join(conditions) if conditions else "1=1"
|
|
418
|
+
return where, params
|
|
419
|
+
|
|
420
|
+
async def get_jobs(
|
|
421
|
+
self,
|
|
422
|
+
queue: str | None = None,
|
|
423
|
+
status: str | None = None,
|
|
424
|
+
tag: str | None = None,
|
|
425
|
+
batch_id: str | None = None,
|
|
426
|
+
offset: int = 0,
|
|
427
|
+
limit: int = 50,
|
|
428
|
+
created_from: float | None = None,
|
|
429
|
+
created_to: float | None = None,
|
|
430
|
+
) -> list[JobPayload]:
|
|
431
|
+
where, params = self._build_where(queue, status, tag, batch_id, created_from, created_to)
|
|
432
|
+
params.extend([limit, offset])
|
|
433
|
+
def _do():
|
|
434
|
+
return self._get_conn().execute(
|
|
435
|
+
f"SELECT * FROM jobs WHERE {where} ORDER BY created_at DESC LIMIT ? OFFSET ?",
|
|
436
|
+
params,
|
|
437
|
+
).fetchall()
|
|
438
|
+
rows = await self._run(_do)
|
|
439
|
+
return [self._row_to_payload(r) for r in rows]
|
|
440
|
+
|
|
441
|
+
async def count_jobs(
|
|
442
|
+
self,
|
|
443
|
+
queue: str | None = None,
|
|
444
|
+
status: str | None = None,
|
|
445
|
+
created_from: float | None = None,
|
|
446
|
+
created_to: float | None = None,
|
|
447
|
+
) -> int:
|
|
448
|
+
where, params = self._build_where(queue=queue, status=status, created_from=created_from, created_to=created_to)
|
|
449
|
+
def _do():
|
|
450
|
+
return self._get_conn().execute(
|
|
451
|
+
f"SELECT COUNT(*) as cnt FROM jobs WHERE {where}", params,
|
|
452
|
+
).fetchone()
|
|
453
|
+
row = await self._run(_do)
|
|
454
|
+
return row["cnt"] if row else 0
|
|
455
|
+
|
|
456
|
+
async def size(self, queue: str) -> int:
|
|
457
|
+
def _do():
|
|
458
|
+
return self._get_conn().execute(
|
|
459
|
+
"SELECT COUNT(*) as cnt FROM jobs WHERE queue=? AND status='pending'",
|
|
460
|
+
(queue,),
|
|
461
|
+
).fetchone()
|
|
462
|
+
row = await self._run(_do)
|
|
463
|
+
return row["cnt"] if row else 0
|
|
464
|
+
|
|
465
|
+
async def queues(self) -> list[str]:
|
|
466
|
+
def _do():
|
|
467
|
+
return self._get_conn().execute(
|
|
468
|
+
"SELECT DISTINCT queue FROM jobs ORDER BY queue"
|
|
469
|
+
).fetchall()
|
|
470
|
+
rows = await self._run(_do)
|
|
471
|
+
return [r["queue"] for r in rows]
|
|
472
|
+
|
|
473
|
+
# ── Metrics ─────────────────────────────────────────────────
|
|
474
|
+
|
|
475
|
+
async def record_metric(self, queue: str, metric: str, value: float) -> None:
|
|
476
|
+
ts = _now_ts()
|
|
477
|
+
async with self._lock:
|
|
478
|
+
def _do():
|
|
479
|
+
c = self._get_conn()
|
|
480
|
+
c.execute("INSERT INTO metrics (queue, metric, value, recorded_at) VALUES (?,?,?,?)",
|
|
481
|
+
(queue, metric, value, ts))
|
|
482
|
+
c.commit()
|
|
483
|
+
await self._execute_with_retry(_do)
|
|
484
|
+
|
|
485
|
+
async def get_metrics(self, queue: str | None = None) -> dict[str, Any]:
|
|
486
|
+
"""Live status counts from the jobs table — never from the metrics event log.
|
|
487
|
+
Counting from metrics caused the Overview "Total Jobs" to shrink over time
|
|
488
|
+
as old metric rows aged out and to plateau when metric storage filled up."""
|
|
489
|
+
def _do() -> dict[str, Any]:
|
|
490
|
+
c = self._get_conn()
|
|
491
|
+
if queue:
|
|
492
|
+
status_rows = c.execute(
|
|
493
|
+
"SELECT queue, status, COUNT(*) AS cnt FROM jobs WHERE queue=? GROUP BY queue, status",
|
|
494
|
+
(queue,),
|
|
495
|
+
).fetchall()
|
|
496
|
+
queue_names = [queue]
|
|
497
|
+
else:
|
|
498
|
+
status_rows = c.execute(
|
|
499
|
+
"SELECT queue, status, COUNT(*) AS cnt FROM jobs GROUP BY queue, status"
|
|
500
|
+
).fetchall()
|
|
501
|
+
qrows = c.execute("SELECT DISTINCT queue FROM jobs ORDER BY queue").fetchall()
|
|
502
|
+
queue_names = [r["queue"] for r in qrows]
|
|
503
|
+
|
|
504
|
+
result: dict[str, Any] = {
|
|
505
|
+
q: {"pending": 0, "processing": 0, "completed": 0, "failed": 0}
|
|
506
|
+
for q in queue_names
|
|
507
|
+
}
|
|
508
|
+
for r in status_rows:
|
|
509
|
+
q = r["queue"]
|
|
510
|
+
if q not in result:
|
|
511
|
+
result[q] = {"pending": 0, "processing": 0, "completed": 0, "failed": 0}
|
|
512
|
+
if r["status"] in result[q]:
|
|
513
|
+
result[q][r["status"]] = r["cnt"]
|
|
514
|
+
return result
|
|
515
|
+
|
|
516
|
+
return await self._run(_do)
|
|
517
|
+
|
|
518
|
+
async def report_supervisor(self, stats: dict[str, Any]) -> None:
|
|
519
|
+
name = str(stats.get("name", "")).strip()
|
|
520
|
+
if not name:
|
|
521
|
+
return
|
|
522
|
+
now = _now_ts()
|
|
523
|
+
payload = json.dumps(stats)
|
|
524
|
+
async with self._lock:
|
|
525
|
+
def _do():
|
|
526
|
+
c = self._get_conn()
|
|
527
|
+
c.execute(
|
|
528
|
+
"""INSERT INTO supervisors (name, data, heartbeat_at)
|
|
529
|
+
VALUES (?, ?, ?)
|
|
530
|
+
ON CONFLICT(name) DO UPDATE SET
|
|
531
|
+
data=excluded.data,
|
|
532
|
+
heartbeat_at=excluded.heartbeat_at""",
|
|
533
|
+
(name, payload, now),
|
|
534
|
+
)
|
|
535
|
+
c.commit()
|
|
536
|
+
await self._execute_with_retry(_do)
|
|
537
|
+
|
|
538
|
+
async def get_supervisor_stats(self, stale_after: float = 10.0) -> list[dict[str, Any]]:
|
|
539
|
+
cutoff = _now_ts() - stale_after
|
|
540
|
+
|
|
541
|
+
def _do():
|
|
542
|
+
return self._get_conn().execute(
|
|
543
|
+
"SELECT data FROM supervisors WHERE heartbeat_at >= ? ORDER BY name",
|
|
544
|
+
(cutoff,),
|
|
545
|
+
).fetchall()
|
|
546
|
+
|
|
547
|
+
rows = await self._run(_do)
|
|
548
|
+
out: list[dict[str, Any]] = []
|
|
549
|
+
for row in rows:
|
|
550
|
+
raw = row["data"]
|
|
551
|
+
try:
|
|
552
|
+
data = json.loads(raw) if isinstance(raw, str) else raw
|
|
553
|
+
except json.JSONDecodeError:
|
|
554
|
+
continue
|
|
555
|
+
if not isinstance(data, dict):
|
|
556
|
+
continue
|
|
557
|
+
if not data.get("running", False):
|
|
558
|
+
continue
|
|
559
|
+
out.append(data)
|
|
560
|
+
return out
|
|
561
|
+
|
|
562
|
+
# ── Batch helpers ───────────────────────────────────────────
|
|
563
|
+
|
|
564
|
+
async def store_batch(self, batch_id: str, data: dict[str, Any]) -> None:
|
|
565
|
+
json_data = json.dumps(data)
|
|
566
|
+
async with self._lock:
|
|
567
|
+
def _do():
|
|
568
|
+
c = self._get_conn()
|
|
569
|
+
c.execute("INSERT OR REPLACE INTO batches (id, data) VALUES (?, ?)", (batch_id, json_data))
|
|
570
|
+
c.commit()
|
|
571
|
+
await self._execute_with_retry(_do)
|
|
572
|
+
|
|
573
|
+
async def get_batch(self, batch_id: str) -> dict[str, Any] | None:
|
|
574
|
+
def _do():
|
|
575
|
+
return self._get_conn().execute(
|
|
576
|
+
"SELECT data FROM batches WHERE id=?", (batch_id,)
|
|
577
|
+
).fetchone()
|
|
578
|
+
row = await self._run(_do)
|
|
579
|
+
if row:
|
|
580
|
+
return json.loads(row["data"]) if isinstance(row["data"], str) else row["data"]
|
|
581
|
+
return None
|
|
582
|
+
|
|
583
|
+
async def update_batch(self, batch_id: str, data: dict[str, Any]) -> None:
|
|
584
|
+
json_data = json.dumps(data)
|
|
585
|
+
async with self._lock:
|
|
586
|
+
def _do():
|
|
587
|
+
c = self._get_conn()
|
|
588
|
+
c.execute("UPDATE batches SET data=? WHERE id=?", (json_data, batch_id))
|
|
589
|
+
c.commit()
|
|
590
|
+
await self._execute_with_retry(_do)
|
|
591
|
+
|
|
592
|
+
async def increment_batch_counter(
|
|
593
|
+
self, batch_id: str, field: str, delta: int = 1,
|
|
594
|
+
) -> dict[str, Any] | None:
|
|
595
|
+
path = f"$.{field}"
|
|
596
|
+
result: list[dict[str, Any] | None] = [None]
|
|
597
|
+
async with self._lock:
|
|
598
|
+
def _do():
|
|
599
|
+
c = self._get_conn()
|
|
600
|
+
c.execute(
|
|
601
|
+
"""UPDATE batches
|
|
602
|
+
SET data = json_set(
|
|
603
|
+
data, ?,
|
|
604
|
+
COALESCE(json_extract(data, ?), 0) + ?
|
|
605
|
+
)
|
|
606
|
+
WHERE id = ?""",
|
|
607
|
+
(path, path, delta, batch_id),
|
|
608
|
+
)
|
|
609
|
+
c.commit()
|
|
610
|
+
row = c.execute("SELECT data FROM batches WHERE id=?", (batch_id,)).fetchone()
|
|
611
|
+
if row is None:
|
|
612
|
+
return
|
|
613
|
+
raw = row["data"]
|
|
614
|
+
result[0] = json.loads(raw) if isinstance(raw, str) else raw
|
|
615
|
+
await self._execute_with_retry(_do)
|
|
616
|
+
return result[0]
|
|
617
|
+
|
|
618
|
+
# ── Pruning ─────────────────────────────────────────────────
|
|
619
|
+
|
|
620
|
+
async def prune(
|
|
621
|
+
self,
|
|
622
|
+
status: str | None = None,
|
|
623
|
+
tag: str | None = None,
|
|
624
|
+
older_than_seconds: float | None = None,
|
|
625
|
+
queue: str | None = None,
|
|
626
|
+
) -> int:
|
|
627
|
+
conditions = []
|
|
628
|
+
params: list[Any] = []
|
|
629
|
+
|
|
630
|
+
if status:
|
|
631
|
+
conditions.append("status=?")
|
|
632
|
+
params.append(status)
|
|
633
|
+
if tag:
|
|
634
|
+
conditions.append("tags LIKE ?")
|
|
635
|
+
params.append(f'%"{tag}"%')
|
|
636
|
+
if older_than_seconds:
|
|
637
|
+
cutoff = _now_ts() - older_than_seconds
|
|
638
|
+
conditions.append("updated_at < ?")
|
|
639
|
+
params.append(cutoff)
|
|
640
|
+
if queue:
|
|
641
|
+
conditions.append("queue=?")
|
|
642
|
+
params.append(queue)
|
|
643
|
+
|
|
644
|
+
if not conditions:
|
|
645
|
+
return 0
|
|
646
|
+
|
|
647
|
+
where = " AND ".join(conditions)
|
|
648
|
+
result = [0]
|
|
649
|
+
async with self._lock:
|
|
650
|
+
def _do():
|
|
651
|
+
c = self._get_conn()
|
|
652
|
+
cursor = c.execute(f"DELETE FROM jobs WHERE {where}", params)
|
|
653
|
+
c.commit()
|
|
654
|
+
result[0] = cursor.rowcount
|
|
655
|
+
await self._execute_with_retry(_do)
|
|
656
|
+
return result[0]
|
|
657
|
+
|
|
658
|
+
async def flush(self, queue: str | None = None) -> None:
|
|
659
|
+
async with self._lock:
|
|
660
|
+
def _do():
|
|
661
|
+
c = self._get_conn()
|
|
662
|
+
if queue:
|
|
663
|
+
c.execute("DELETE FROM jobs WHERE queue=?", (queue,))
|
|
664
|
+
else:
|
|
665
|
+
c.executescript("DELETE FROM jobs; DELETE FROM batches; DELETE FROM metrics; DELETE FROM supervisors;")
|
|
666
|
+
c.commit()
|
|
667
|
+
await self._execute_with_retry(_do)
|
|
668
|
+
|
|
669
|
+
async def prune_metrics(self, older_than_seconds: float) -> int:
|
|
670
|
+
cutoff = _now_ts() - older_than_seconds
|
|
671
|
+
result = [0]
|
|
672
|
+
async with self._lock:
|
|
673
|
+
def _do():
|
|
674
|
+
c = self._get_conn()
|
|
675
|
+
cur = c.execute("DELETE FROM metrics WHERE recorded_at < ?", (cutoff,))
|
|
676
|
+
c.commit()
|
|
677
|
+
result[0] = cur.rowcount
|
|
678
|
+
await self._execute_with_retry(_do)
|
|
679
|
+
return result[0]
|
|
680
|
+
|
|
681
|
+
async def recent_throughput(
|
|
682
|
+
self, seconds: int = 60, queue: str | None = None,
|
|
683
|
+
) -> dict[str, int]:
|
|
684
|
+
cutoff = _now_ts() - seconds
|
|
685
|
+
def _do() -> dict[str, int]:
|
|
686
|
+
c = self._get_conn()
|
|
687
|
+
if queue:
|
|
688
|
+
rows = c.execute(
|
|
689
|
+
"""SELECT metric, COUNT(*) AS cnt FROM metrics
|
|
690
|
+
WHERE recorded_at > ? AND queue = ?
|
|
691
|
+
GROUP BY metric""",
|
|
692
|
+
(cutoff, queue),
|
|
693
|
+
).fetchall()
|
|
694
|
+
else:
|
|
695
|
+
rows = c.execute(
|
|
696
|
+
"""SELECT metric, COUNT(*) AS cnt FROM metrics
|
|
697
|
+
WHERE recorded_at > ?
|
|
698
|
+
GROUP BY metric""",
|
|
699
|
+
(cutoff,),
|
|
700
|
+
).fetchall()
|
|
701
|
+
out = {"processing": 0, "completed": 0, "failed": 0}
|
|
702
|
+
for r in rows:
|
|
703
|
+
if r["metric"] in out:
|
|
704
|
+
out[r["metric"]] = r["cnt"]
|
|
705
|
+
return out
|
|
706
|
+
return await self._run(_do)
|