djobs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- djobs/__init__.py +49 -0
- djobs/api/__init__.py +1 -0
- djobs/api/ai_handlers.py +100 -0
- djobs/cli.py +160 -0
- djobs/core/__init__.py +1 -0
- djobs/core/config.py +25 -0
- djobs/core/errors.py +35 -0
- djobs/core/models.py +45 -0
- djobs/core/retry.py +39 -0
- djobs/core/states.py +52 -0
- djobs/daemon.py +190 -0
- djobs/mcp_server.py +508 -0
- djobs/observability/__init__.py +6 -0
- djobs/observability/inspect.py +47 -0
- djobs/observability/logging.py +62 -0
- djobs/observability/metrics.py +70 -0
- djobs/queue/__init__.py +1 -0
- djobs/queue/service.py +173 -0
- djobs/scheduler/__init__.py +5 -0
- djobs/scheduler/scheduler.py +132 -0
- djobs/storage/__init__.py +1 -0
- djobs/storage/events.py +20 -0
- djobs/storage/postgres.py +631 -0
- djobs/storage/sqlite.py +633 -0
- djobs/worker/__init__.py +1 -0
- djobs/worker/pool.py +200 -0
- djobs/worker/registry.py +31 -0
- djobs/worker/runner.py +48 -0
- djobs-0.1.0.dist-info/METADATA +261 -0
- djobs-0.1.0.dist-info/RECORD +33 -0
- djobs-0.1.0.dist-info/WHEEL +4 -0
- djobs-0.1.0.dist-info/entry_points.txt +2 -0
- djobs-0.1.0.dist-info/licenses/LICENSE +21 -0
djobs/__init__.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Distributed Job System — a durable job queue with retry, lease, and observability.
|
|
2
|
+
|
|
3
|
+
Public API
|
|
4
|
+
----------
|
|
5
|
+
>>> from djobs import Job, QueueService, HandlerRegistry, WorkerPool, Daemon
|
|
6
|
+
>>> repo = SQLiteJobRepository.from_path("jobs.db")
|
|
7
|
+
>>> queue = QueueService(repo)
|
|
8
|
+
>>> job = queue.submit("my_task", {"key": "value"})
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
|
|
15
|
+
from djobs.core.errors import (
|
|
16
|
+
DJobsError,
|
|
17
|
+
DuplicateHandlerError,
|
|
18
|
+
HandlerNotFoundError,
|
|
19
|
+
InvalidStateTransitionError,
|
|
20
|
+
JobNotFoundError,
|
|
21
|
+
NonRetryableJobError,
|
|
22
|
+
RetryableJobError,
|
|
23
|
+
)
|
|
24
|
+
from djobs.core.models import Job
|
|
25
|
+
from djobs.core.states import JobStatus
|
|
26
|
+
from djobs.daemon import Daemon
|
|
27
|
+
from djobs.queue.service import QueueService
|
|
28
|
+
from djobs.storage.sqlite import SQLiteJobRepository
|
|
29
|
+
from djobs.worker.pool import WorkerPool
|
|
30
|
+
from djobs.worker.registry import HandlerRegistry
|
|
31
|
+
from djobs.worker.runner import WorkerRunner
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"DJobsError",
|
|
35
|
+
"Daemon",
|
|
36
|
+
"DuplicateHandlerError",
|
|
37
|
+
"HandlerNotFoundError",
|
|
38
|
+
"HandlerRegistry",
|
|
39
|
+
"InvalidStateTransitionError",
|
|
40
|
+
"Job",
|
|
41
|
+
"JobNotFoundError",
|
|
42
|
+
"JobStatus",
|
|
43
|
+
"NonRetryableJobError",
|
|
44
|
+
"QueueService",
|
|
45
|
+
"RetryableJobError",
|
|
46
|
+
"SQLiteJobRepository",
|
|
47
|
+
"WorkerPool",
|
|
48
|
+
"WorkerRunner",
|
|
49
|
+
]
|
djobs/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""External API and CLI entry points."""
|
djobs/api/ai_handlers.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Simulated AI job handlers for Phase 8 demo.
|
|
2
|
+
|
|
3
|
+
These handlers simulate real AI workloads:
|
|
4
|
+
- ai.summarize: text summarization (simulated token usage + latency)
|
|
5
|
+
- ai.classify: text classification (fast, cheap)
|
|
6
|
+
- ai.generate: long-running generation (expensive, may fail with rate limit)
|
|
7
|
+
|
|
8
|
+
Each handler writes cost/token metadata into the job payload for tracking.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import random
|
|
14
|
+
import time
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from djobs.core.errors import RetryableJobError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def ai_summarize_handler(payload: dict[str, Any]) -> Any:
|
|
21
|
+
"""Simulate an AI summarization call.
|
|
22
|
+
|
|
23
|
+
Payload keys:
|
|
24
|
+
text: str — input text to summarize.
|
|
25
|
+
|
|
26
|
+
Side effects:
|
|
27
|
+
Adds ``result``, ``tokens_used``, ``cost_usd`` to payload.
|
|
28
|
+
"""
|
|
29
|
+
text = payload.get("text", "")
|
|
30
|
+
tokens = max(len(text.split()), 10)
|
|
31
|
+
|
|
32
|
+
# Simulate 5% transient failure (rate limit)
|
|
33
|
+
if random.random() < 0.05:
|
|
34
|
+
raise RetryableJobError("AI API rate limit exceeded (429)")
|
|
35
|
+
|
|
36
|
+
time.sleep(0.05) # simulate latency
|
|
37
|
+
|
|
38
|
+
payload["result"] = f"Summary of {tokens} tokens: {text[:60]}..."
|
|
39
|
+
payload["tokens_used"] = tokens
|
|
40
|
+
payload["cost_usd"] = round(tokens * 0.00003, 6)
|
|
41
|
+
return payload
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ai_classify_handler(payload: dict[str, Any]) -> Any:
|
|
45
|
+
"""Simulate an AI classification call.
|
|
46
|
+
|
|
47
|
+
Payload keys:
|
|
48
|
+
text: str — input text to classify.
|
|
49
|
+
labels: list[str] — candidate labels.
|
|
50
|
+
"""
|
|
51
|
+
text = payload.get("text", "")
|
|
52
|
+
labels = payload.get("labels", ["positive", "negative", "neutral"])
|
|
53
|
+
tokens = max(len(text.split()), 5)
|
|
54
|
+
|
|
55
|
+
time.sleep(0.02)
|
|
56
|
+
|
|
57
|
+
chosen = random.choice(labels)
|
|
58
|
+
payload["result"] = chosen
|
|
59
|
+
payload["confidence"] = round(random.uniform(0.7, 0.99), 3)
|
|
60
|
+
payload["tokens_used"] = tokens
|
|
61
|
+
payload["cost_usd"] = round(tokens * 0.00001, 6)
|
|
62
|
+
return payload
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def ai_generate_handler(payload: dict[str, Any]) -> Any:
|
|
66
|
+
"""Simulate a long-running AI generation call.
|
|
67
|
+
|
|
68
|
+
Payload keys:
|
|
69
|
+
prompt: str — generation prompt.
|
|
70
|
+
max_tokens: int — maximum output tokens (default 500).
|
|
71
|
+
|
|
72
|
+
This handler has a higher failure rate to demonstrate retry + cost control.
|
|
73
|
+
"""
|
|
74
|
+
prompt = payload.get("prompt", "")
|
|
75
|
+
max_tokens = payload.get("max_tokens", 500)
|
|
76
|
+
|
|
77
|
+
# Simulate 15% transient failure
|
|
78
|
+
if random.random() < 0.15:
|
|
79
|
+
raise RetryableJobError("AI API timeout — generation too slow")
|
|
80
|
+
|
|
81
|
+
time.sleep(0.1) # longer latency
|
|
82
|
+
|
|
83
|
+
output_tokens = random.randint(max_tokens // 2, max_tokens)
|
|
84
|
+
input_tokens = max(len(prompt.split()), 10)
|
|
85
|
+
total_tokens = input_tokens + output_tokens
|
|
86
|
+
|
|
87
|
+
payload["result"] = f"Generated {output_tokens} tokens for: {prompt[:40]}..."
|
|
88
|
+
payload["input_tokens"] = input_tokens
|
|
89
|
+
payload["output_tokens"] = output_tokens
|
|
90
|
+
payload["tokens_used"] = total_tokens
|
|
91
|
+
payload["cost_usd"] = round(total_tokens * 0.00006, 6)
|
|
92
|
+
return payload
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Registry helper
|
|
96
|
+
AI_HANDLERS: dict[str, Any] = {
|
|
97
|
+
"ai.summarize": ai_summarize_handler,
|
|
98
|
+
"ai.classify": ai_classify_handler,
|
|
99
|
+
"ai.generate": ai_generate_handler,
|
|
100
|
+
}
|
djobs/cli.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""CLI entry point for the distributed job system.
|
|
2
|
+
|
|
3
|
+
Commands::
|
|
4
|
+
|
|
5
|
+
djobs serve Start the background daemon
|
|
6
|
+
djobs serve --db my.db Use a custom database path
|
|
7
|
+
djobs serve --workers 8 Set max concurrent workers
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import importlib
|
|
14
|
+
import logging
|
|
15
|
+
import sys
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
Handler = Callable[[dict[str, Any]], Any]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Built-in handlers
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _echo_handler(payload: dict[str, Any]) -> dict[str, Any]:
|
|
28
|
+
"""Built-in handler that logs and returns the payload (for testing)."""
|
|
29
|
+
logging.getLogger("djobs.handlers.echo").info("echo: %s", payload)
|
|
30
|
+
return {"echoed": payload}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
BUILTIN_HANDLERS: dict[str, Handler] = {
|
|
34
|
+
"echo": _echo_handler,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Handler loading
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _load_handlers_module(dotted_path: str) -> dict[str, Handler]:
|
|
44
|
+
"""Import a module and collect its HANDLERS dict.
|
|
45
|
+
|
|
46
|
+
The module must define ``HANDLERS: dict[str, Callable]``.
|
|
47
|
+
"""
|
|
48
|
+
module = importlib.import_module(dotted_path)
|
|
49
|
+
handlers = getattr(module, "HANDLERS", None)
|
|
50
|
+
if handlers is None:
|
|
51
|
+
raise ImportError(f"Module {dotted_path!r} does not export a HANDLERS dict")
|
|
52
|
+
if not isinstance(handlers, dict):
|
|
53
|
+
raise TypeError(
|
|
54
|
+
f"HANDLERS in {dotted_path!r} must be a dict, got {type(handlers).__name__}"
|
|
55
|
+
)
|
|
56
|
+
return handlers
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# serve command
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _cmd_serve(args: argparse.Namespace) -> None:
|
|
65
|
+
"""Run the background daemon."""
|
|
66
|
+
from djobs.daemon import Daemon
|
|
67
|
+
|
|
68
|
+
# Collect handlers: builtins + user-provided modules.
|
|
69
|
+
handlers: dict[str, Handler] = dict(BUILTIN_HANDLERS)
|
|
70
|
+
for module_path in args.handlers or []:
|
|
71
|
+
loaded = _load_handlers_module(module_path)
|
|
72
|
+
handlers.update(loaded)
|
|
73
|
+
|
|
74
|
+
daemon = Daemon.from_db(
|
|
75
|
+
db_path=args.db,
|
|
76
|
+
handlers=handlers,
|
|
77
|
+
max_concurrent=args.workers,
|
|
78
|
+
poll_interval=args.poll_interval,
|
|
79
|
+
scheduler_interval=args.scheduler_interval,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
registered = ", ".join(sorted(handlers)) or "(none)"
|
|
83
|
+
print(
|
|
84
|
+
f"djobs daemon starting\n"
|
|
85
|
+
f" db: {args.db}\n"
|
|
86
|
+
f" workers: {args.workers}\n"
|
|
87
|
+
f" handlers: {registered}\n"
|
|
88
|
+
f" worker_id: {daemon.worker_id}\n"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
daemon.run()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Main
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def main(argv: list[str] | None = None) -> None:
|
|
100
|
+
"""CLI entry point."""
|
|
101
|
+
logging.basicConfig(
|
|
102
|
+
level=logging.INFO,
|
|
103
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
104
|
+
datefmt="%H:%M:%S",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
parser = argparse.ArgumentParser(
|
|
108
|
+
prog="djobs",
|
|
109
|
+
description="Distributed job system CLI",
|
|
110
|
+
)
|
|
111
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
112
|
+
|
|
113
|
+
# --- serve ---
|
|
114
|
+
serve_parser = subparsers.add_parser(
|
|
115
|
+
"serve",
|
|
116
|
+
help="Start the background daemon",
|
|
117
|
+
)
|
|
118
|
+
serve_parser.add_argument(
|
|
119
|
+
"--db",
|
|
120
|
+
default="djobs_mcp.db",
|
|
121
|
+
help="SQLite database path (default: djobs_mcp.db — same as MCP server)",
|
|
122
|
+
)
|
|
123
|
+
serve_parser.add_argument(
|
|
124
|
+
"--workers",
|
|
125
|
+
type=int,
|
|
126
|
+
default=4,
|
|
127
|
+
help="Max concurrent workers (default: 4)",
|
|
128
|
+
)
|
|
129
|
+
serve_parser.add_argument(
|
|
130
|
+
"--poll-interval",
|
|
131
|
+
type=float,
|
|
132
|
+
default=1.0,
|
|
133
|
+
help="Seconds between job claim attempts (default: 1.0)",
|
|
134
|
+
)
|
|
135
|
+
serve_parser.add_argument(
|
|
136
|
+
"--scheduler-interval",
|
|
137
|
+
type=float,
|
|
138
|
+
default=5.0,
|
|
139
|
+
help="Seconds between scheduler ticks (default: 5.0)",
|
|
140
|
+
)
|
|
141
|
+
serve_parser.add_argument(
|
|
142
|
+
"--handlers",
|
|
143
|
+
nargs="*",
|
|
144
|
+
metavar="MODULE",
|
|
145
|
+
help=(
|
|
146
|
+
"Python modules exporting a HANDLERS dict. "
|
|
147
|
+
"E.g. --handlers myapp.handlers myapp.extra_handlers"
|
|
148
|
+
),
|
|
149
|
+
)
|
|
150
|
+
serve_parser.set_defaults(func=_cmd_serve)
|
|
151
|
+
|
|
152
|
+
args = parser.parse_args(argv)
|
|
153
|
+
if not hasattr(args, "func"):
|
|
154
|
+
parser.print_help()
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
args.func(args)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
main()
|
djobs/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core domain models, state machine, and domain errors."""
|
djobs/core/config.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Minimal configuration loader using dataclass + environment variables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Config:
|
|
11
|
+
"""Application configuration.
|
|
12
|
+
|
|
13
|
+
Reads from environment variables with sensible defaults.
|
|
14
|
+
Prefix: DJOBS_
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
db_path: str = field(default_factory=lambda: os.getenv("DJOBS_DB_PATH", "djobs.db"))
|
|
18
|
+
log_level: str = field(default_factory=lambda: os.getenv("DJOBS_LOG_LEVEL", "INFO"))
|
|
19
|
+
log_format: str = field(default_factory=lambda: os.getenv("DJOBS_LOG_FORMAT", "json"))
|
|
20
|
+
worker_id: str = field(default_factory=lambda: os.getenv("DJOBS_WORKER_ID", "worker-1"))
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_env(cls) -> Config:
|
|
24
|
+
"""Build config from current environment variables."""
|
|
25
|
+
return cls()
|
djobs/core/errors.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Domain exceptions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DJobsError(Exception):
|
|
7
|
+
"""Base exception for all djobs errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InvalidStateTransitionError(DJobsError):
|
|
11
|
+
"""Raised when a job state transition is not allowed."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Alias for backward compatibility
|
|
15
|
+
InvalidStateTransition = InvalidStateTransitionError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class JobNotFoundError(DJobsError):
|
|
19
|
+
"""Raised when a job id cannot be found."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HandlerNotFoundError(DJobsError):
|
|
23
|
+
"""Raised when a worker cannot find a handler for a job type."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DuplicateHandlerError(DJobsError):
|
|
27
|
+
"""Raised when a handler is registered more than once."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class RetryableJobError(DJobsError):
|
|
31
|
+
"""Raised by a handler when a failure should be retried."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NonRetryableJobError(DJobsError):
|
|
35
|
+
"""Raised by a handler when a failure should become terminal failed."""
|
djobs/core/models.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Job domain model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from djobs.core.states import JobStatus
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow() -> datetime:
|
|
14
|
+
return datetime.now(UTC)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _new_id() -> str:
|
|
18
|
+
return str(uuid.uuid4())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Job:
|
|
23
|
+
"""A durable unit of work.
|
|
24
|
+
|
|
25
|
+
Fields cover Phase 1 (basic lifecycle), Phase 2 (retry, idempotency),
|
|
26
|
+
Phase 3 (lease, heartbeat, crash recovery), and Phase 6 (correlation id,
|
|
27
|
+
execution duration).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
type: str
|
|
31
|
+
payload: dict[str, Any] = field(default_factory=dict)
|
|
32
|
+
id: str = field(default_factory=_new_id)
|
|
33
|
+
status: JobStatus = JobStatus.PENDING
|
|
34
|
+
attempt: int = 0
|
|
35
|
+
max_attempts: int = 1
|
|
36
|
+
run_after: datetime | None = None
|
|
37
|
+
idempotency_key: str | None = None
|
|
38
|
+
correlation_id: str = field(default_factory=_new_id)
|
|
39
|
+
last_error: str | None = None
|
|
40
|
+
leased_by: str | None = None
|
|
41
|
+
lease_expires_at: datetime | None = None
|
|
42
|
+
heartbeat_at: datetime | None = None
|
|
43
|
+
started_at: datetime | None = None
|
|
44
|
+
created_at: datetime = field(default_factory=_utcnow)
|
|
45
|
+
updated_at: datetime = field(default_factory=_utcnow)
|
djobs/core/retry.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Retry policy and backoff calculation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import UTC, datetime, timedelta
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class RetryPolicy:
|
|
11
|
+
"""Exponential backoff policy for retryable job failures."""
|
|
12
|
+
|
|
13
|
+
base_delay_seconds: float = 1.0
|
|
14
|
+
multiplier: float = 2.0
|
|
15
|
+
max_delay_seconds: float = 60.0
|
|
16
|
+
|
|
17
|
+
def __post_init__(self) -> None:
|
|
18
|
+
if self.base_delay_seconds <= 0:
|
|
19
|
+
raise ValueError("base_delay_seconds must be greater than 0")
|
|
20
|
+
if self.multiplier < 1:
|
|
21
|
+
raise ValueError("multiplier must be greater than or equal to 1")
|
|
22
|
+
if self.max_delay_seconds < self.base_delay_seconds:
|
|
23
|
+
raise ValueError(
|
|
24
|
+
"max_delay_seconds must be greater than or equal to base_delay_seconds"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def calculate_delay(self, attempt: int) -> timedelta:
|
|
28
|
+
"""Return retry delay for the attempt that just failed."""
|
|
29
|
+
if attempt <= 0:
|
|
30
|
+
raise ValueError("attempt must be greater than 0")
|
|
31
|
+
delay_seconds = self.base_delay_seconds * (self.multiplier ** (attempt - 1))
|
|
32
|
+
return timedelta(seconds=min(delay_seconds, self.max_delay_seconds))
|
|
33
|
+
|
|
34
|
+
def next_run_after(self, attempt: int, now: datetime | None = None) -> datetime:
|
|
35
|
+
"""Return the UTC time when the next retry should become eligible."""
|
|
36
|
+
current_time = now or datetime.now(UTC)
|
|
37
|
+
if current_time.tzinfo is None:
|
|
38
|
+
current_time = current_time.replace(tzinfo=UTC)
|
|
39
|
+
return current_time.astimezone(UTC) + self.calculate_delay(attempt)
|
djobs/core/states.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Job status enum and state transition validation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JobStatus(enum.StrEnum):
|
|
9
|
+
"""All possible job states.
|
|
10
|
+
|
|
11
|
+
Phase 1 transitions:
|
|
12
|
+
pending -> running -> succeeded
|
|
13
|
+
pending -> running -> failed
|
|
14
|
+
|
|
15
|
+
Phase 2 transitions:
|
|
16
|
+
running -> retry_scheduled -> pending
|
|
17
|
+
running -> dead_lettered
|
|
18
|
+
|
|
19
|
+
Phase 3 transitions (lease recovery):
|
|
20
|
+
running -> pending (only via recover_expired_leases)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
PENDING = "pending"
|
|
24
|
+
RUNNING = "running"
|
|
25
|
+
SUCCEEDED = "succeeded"
|
|
26
|
+
FAILED = "failed"
|
|
27
|
+
RETRY_SCHEDULED = "retry_scheduled"
|
|
28
|
+
DEAD_LETTERED = "dead_lettered"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Valid (from_status -> to_status) pairs.
|
|
32
|
+
_VALID_TRANSITIONS: set[tuple[JobStatus, JobStatus]] = {
|
|
33
|
+
(JobStatus.PENDING, JobStatus.RUNNING),
|
|
34
|
+
(JobStatus.PENDING, JobStatus.SUCCEEDED), # AI agent completes without claim
|
|
35
|
+
(JobStatus.PENDING, JobStatus.FAILED), # AI agent fails without claim
|
|
36
|
+
(JobStatus.RUNNING, JobStatus.SUCCEEDED),
|
|
37
|
+
(JobStatus.RUNNING, JobStatus.FAILED),
|
|
38
|
+
(JobStatus.RUNNING, JobStatus.RETRY_SCHEDULED),
|
|
39
|
+
(JobStatus.RUNNING, JobStatus.DEAD_LETTERED),
|
|
40
|
+
(JobStatus.RUNNING, JobStatus.PENDING), # lease recovery
|
|
41
|
+
(JobStatus.RETRY_SCHEDULED, JobStatus.PENDING),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def validate_transition(from_status: JobStatus, to_status: JobStatus) -> None:
|
|
46
|
+
"""Raise InvalidStateTransition if the transition is not allowed."""
|
|
47
|
+
from djobs.core.errors import InvalidStateTransitionError
|
|
48
|
+
|
|
49
|
+
if (from_status, to_status) not in _VALID_TRANSITIONS:
|
|
50
|
+
raise InvalidStateTransitionError(
|
|
51
|
+
f"Cannot transition from {from_status.value!r} to {to_status.value!r}"
|
|
52
|
+
)
|