pg-scheduler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pg_scheduler-0.1.0/LICENSE +21 -0
- pg_scheduler-0.1.0/MANIFEST.in +15 -0
- pg_scheduler-0.1.0/PKG-INFO +356 -0
- pg_scheduler-0.1.0/README.md +315 -0
- pg_scheduler-0.1.0/pg_scheduler/__init__.py +36 -0
- pg_scheduler-0.1.0/pg_scheduler/examples.py +153 -0
- pg_scheduler-0.1.0/pg_scheduler/scheduler.py +1433 -0
- pg_scheduler-0.1.0/pg_scheduler.egg-info/PKG-INFO +356 -0
- pg_scheduler-0.1.0/pg_scheduler.egg-info/SOURCES.txt +13 -0
- pg_scheduler-0.1.0/pg_scheduler.egg-info/dependency_links.txt +1 -0
- pg_scheduler-0.1.0/pg_scheduler.egg-info/requires.txt +13 -0
- pg_scheduler-0.1.0/pg_scheduler.egg-info/top_level.txt +1 -0
- pg_scheduler-0.1.0/pyproject.toml +89 -0
- pg_scheduler-0.1.0/requirements.txt +1 -0
- pg_scheduler-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Pg-Job-Runner
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include requirements.txt
|
|
3
|
+
recursive-include pg_scheduler *.py
|
|
4
|
+
recursive-exclude * __pycache__
|
|
5
|
+
recursive-exclude * *.py[co]
|
|
6
|
+
exclude *.py
|
|
7
|
+
exclude docker-compose*.yaml
|
|
8
|
+
exclude Dockerfile
|
|
9
|
+
exclude race_condition_test.sh
|
|
10
|
+
exclude *_test.py
|
|
11
|
+
exclude *_demo.py
|
|
12
|
+
exclude *_example.py
|
|
13
|
+
exclude test_*.py
|
|
14
|
+
exclude db_monitor.py
|
|
15
|
+
exclude periodic_test_multi_replica.py
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pg-scheduler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A PostgreSQL-based async job scheduler with deduplication, periodic jobs, and reliability features
|
|
5
|
+
Author-email: Miguel Rebelo <miguel.python.dev@gmail.com>
|
|
6
|
+
Maintainer-email: Miguel Rebelo <miguel.python.dev@gmail.com>
|
|
7
|
+
Project-URL: Homepage, https://github.com/m1guelvrrl0/pg-scheduler
|
|
8
|
+
Project-URL: Documentation, https://github.com/m1guelvrrl0/pg-scheduler#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/m1guelvrrl0/pg-scheduler.git
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/m1guelvrrl0/pg-scheduler/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/m1guelvrrl0/pg-scheduler/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: postgresql,job-scheduler,async,periodic-jobs,task-queue,deduplication,reliability,distributed
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Database
|
|
24
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
25
|
+
Classifier: Framework :: AsyncIO
|
|
26
|
+
Classifier: Operating System :: OS Independent
|
|
27
|
+
Requires-Python: >=3.9
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: asyncpg>=0.25.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
|
35
|
+
Requires-Dist: isort>=5.0; extra == "dev"
|
|
36
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
37
|
+
Requires-Dist: flake8>=5.0; extra == "dev"
|
|
38
|
+
Provides-Extra: examples
|
|
39
|
+
Requires-Dist: fastapi>=0.68.0; extra == "examples"
|
|
40
|
+
Requires-Dist: uvicorn>=0.15.0; extra == "examples"
|
|
41
|
+
|
|
42
|
+
# PG Scheduler
|
|
43
|
+
|
|
44
|
+
[](https://badge.fury.io/py/pg-scheduler)
|
|
45
|
+
[](https://pypi.org/project/pg-scheduler/)
|
|
46
|
+
[](https://opensource.org/licenses/MIT)
|
|
47
|
+
|
|
48
|
+
A simple lightweight async first job scheduler for Python that uses PostgreSQL to allow you to schedule and manage the execution of asynchronous tasks.
|
|
49
|
+
|
|
50
|
+
It's heavily inspired by APScheduler in its API but horizontally scalable and much more focused in the features it provides and technologies it uses.
|
|
51
|
+
|
|
52
|
+
## โ ๏ธ Project Status
|
|
53
|
+
|
|
54
|
+
This project is currently in **early development** (v0.1.0). While functional, please note:
|
|
55
|
+
- The API may change in future versions
|
|
56
|
+
- Recommended for testing and development environments
|
|
57
|
+
- Production use should be carefully evaluated
|
|
58
|
+
|
|
59
|
+
## โจ Key Features
|
|
60
|
+
|
|
61
|
+
- **๐ Periodic Jobs**: Simple `@periodic` decorator for recurring tasks
|
|
62
|
+
- **๐ Deduplication**: Guarantees exactly one execution per window across replicas
|
|
63
|
+
- **โก Self-Rescheduling**: Jobs automatically schedule their next execution
|
|
64
|
+
- **๐ก๏ธ Advisory Locks**: Optional PostgreSQL advisory locks for exclusive execution
|
|
65
|
+
- **๐ฏ Priority Queues**: Support for job priorities and retry logic
|
|
66
|
+
- **๐งน Vacuum Policies**: Automatic cleanup of completed jobs
|
|
67
|
+
- **๐ช Reliability**: Graceful shutdown, error handling, and orphan recovery
|
|
68
|
+
|
|
69
|
+
## ๐ฆ Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install pg-scheduler
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Requirements
|
|
76
|
+
|
|
77
|
+
- Python 3.9+
|
|
78
|
+
- PostgreSQL 12+
|
|
79
|
+
- asyncpg
|
|
80
|
+
|
|
81
|
+
## ๐ Quick Start
|
|
82
|
+
|
|
83
|
+
### Basic Job Scheduling
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import asyncio
|
|
87
|
+
import asyncpg
|
|
88
|
+
from datetime import datetime, timedelta, UTC
|
|
89
|
+
from pg_scheduler import Scheduler, JobPriority
|
|
90
|
+
|
|
91
|
+
async def send_email(recipient: str, subject: str):
|
|
92
|
+
"""Example job function"""
|
|
93
|
+
print(f"๐ง Sending email to {recipient}: {subject}")
|
|
94
|
+
await asyncio.sleep(1) # Simulate async work
|
|
95
|
+
print(f"โ
Email sent to {recipient}")
|
|
96
|
+
|
|
97
|
+
async def main():
|
|
98
|
+
# Create database connection pool
|
|
99
|
+
db_pool = await asyncpg.create_pool(
|
|
100
|
+
user='scheduler',
|
|
101
|
+
password='password',
|
|
102
|
+
database='scheduler_db',
|
|
103
|
+
host='localhost',
|
|
104
|
+
port=5432
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Initialize scheduler
|
|
108
|
+
scheduler = Scheduler(db_pool=db_pool, max_concurrent_jobs=10)
|
|
109
|
+
await scheduler.start()
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
# Schedule a job
|
|
113
|
+
job_id = await scheduler.schedule(
|
|
114
|
+
send_email,
|
|
115
|
+
execution_time=datetime.now(UTC) + timedelta(minutes=5),
|
|
116
|
+
args=("user@example.com", "Welcome!"),
|
|
117
|
+
priority=JobPriority.NORMAL,
|
|
118
|
+
max_retries=3
|
|
119
|
+
)
|
|
120
|
+
print(f"Scheduled job: {job_id}")
|
|
121
|
+
|
|
122
|
+
# Keep running
|
|
123
|
+
await asyncio.sleep(300) # Run for 5 minutes
|
|
124
|
+
|
|
125
|
+
finally:
|
|
126
|
+
await scheduler.shutdown()
|
|
127
|
+
await db_pool.close()
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__":
|
|
130
|
+
asyncio.run(main())
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Periodic Jobs with @periodic Decorator
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from datetime import timedelta
|
|
137
|
+
from pg_scheduler import periodic, JobPriority
|
|
138
|
+
|
|
139
|
+
@periodic(every=timedelta(minutes=15))
|
|
140
|
+
async def cleanup_temp_files():
|
|
141
|
+
"""Clean up temporary files every 15 minutes"""
|
|
142
|
+
print("๐งน Cleaning up temporary files...")
|
|
143
|
+
# Your cleanup logic here
|
|
144
|
+
print("โ
Cleanup completed")
|
|
145
|
+
|
|
146
|
+
@periodic(every=timedelta(hours=1), priority=JobPriority.CRITICAL, max_retries=3)
|
|
147
|
+
async def generate_hourly_report():
|
|
148
|
+
"""Generate hourly reports with high priority and retries"""
|
|
149
|
+
print("๐ Generating hourly report...")
|
|
150
|
+
# Your report generation logic here
|
|
151
|
+
print("โ
Report generated")
|
|
152
|
+
|
|
153
|
+
# Advisory locks for exclusive execution
|
|
154
|
+
@periodic(every=timedelta(minutes=30), use_advisory_lock=True)
|
|
155
|
+
async def exclusive_maintenance():
|
|
156
|
+
"""Exclusive operation - only one instance across entire cluster"""
|
|
157
|
+
print("๐ Running exclusive maintenance...")
|
|
158
|
+
# Your maintenance logic here
|
|
159
|
+
print("โ
Maintenance completed")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Note**: Most jobs don't need `use_advisory_lock=True`. Use it only when you need absolute exclusivity across all workers (e.g., database migrations, leader coordination).
|
|
163
|
+
|
|
164
|
+
## ๐ Periodic Jobs Features
|
|
165
|
+
|
|
166
|
+
The `@periodic` decorator provides powerful recurring job functionality with **built-in cross-node deduplication**:
|
|
167
|
+
|
|
168
|
+
- **Cross-Node Deduplication**: Automatically prevents duplicate executions across multiple nodes sharing the same database
|
|
169
|
+
- **Automatic Registration**: Jobs are automatically registered when decorated
|
|
170
|
+
- **Self-rescheduling**: Automatically schedules the next execution after completion
|
|
171
|
+
- **Priority Support**: Use `JobPriority.NORMAL` or `JobPriority.CRITICAL`
|
|
172
|
+
- **Retry Logic**: Configure `max_retries` for failed executions
|
|
173
|
+
- **Advisory Locks**: Optional exclusive execution (rarely needed - see note above)
|
|
174
|
+
- **Management**: Enable/disable jobs dynamically
|
|
175
|
+
|
|
176
|
+
### Decorator Parameters
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
@periodic(
|
|
180
|
+
every=timedelta(minutes=15), # Required: execution interval
|
|
181
|
+
use_advisory_lock=False, # Optional: exclusive execution (default dedup is usually sufficient)
|
|
182
|
+
priority=JobPriority.NORMAL, # Optional: job priority
|
|
183
|
+
max_retries=0, # Optional: retry attempts on failure
|
|
184
|
+
job_name=None, # Optional: custom job name (auto-generated)
|
|
185
|
+
dedup_key=None, # Optional: custom dedup key (auto-generated)
|
|
186
|
+
enabled=True # Optional: whether job is enabled
|
|
187
|
+
)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Cross-Node Deduplication
|
|
191
|
+
|
|
192
|
+
The `@periodic` decorator automatically prevents duplicate executions across multiple nodes:
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
# Multiple nodes running the same code
|
|
196
|
+
@periodic(every=timedelta(minutes=5))
|
|
197
|
+
async def cleanup_task():
|
|
198
|
+
print("Running cleanup...")
|
|
199
|
+
|
|
200
|
+
# What happens:
|
|
201
|
+
# - Node 1: Schedules job for 10:05 โ โ
Success
|
|
202
|
+
# - Node 2: Tries to schedule same job โ โ "Already exists, ignoring"
|
|
203
|
+
# - Node 3: Tries to schedule same job โ โ "Already exists, ignoring"
|
|
204
|
+
# - Result: Only Node 1 executes the cleanup at 10:05
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**No configuration needed** - this works automatically for any nodes sharing the same PostgreSQL database.
|
|
208
|
+
|
|
209
|
+
### Management API
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
# Get all periodic jobs
|
|
213
|
+
periodic_jobs = scheduler.get_periodic_jobs()
|
|
214
|
+
|
|
215
|
+
# Get status of a specific job
|
|
216
|
+
status = scheduler.get_periodic_job_status(dedup_key)
|
|
217
|
+
|
|
218
|
+
# Enable/disable jobs
|
|
219
|
+
scheduler.enable_periodic_job(dedup_key)
|
|
220
|
+
scheduler.disable_periodic_job(dedup_key)
|
|
221
|
+
|
|
222
|
+
# Manually trigger a job
|
|
223
|
+
job_id = await scheduler.trigger_periodic_job(dedup_key)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## ๐ฏ Job Scheduling Features
|
|
227
|
+
|
|
228
|
+
### Priority Support
|
|
229
|
+
- `JobPriority.NORMAL` (default) or `JobPriority.CRITICAL`
|
|
230
|
+
- Higher priority jobs execute first
|
|
231
|
+
|
|
232
|
+
### Conflict Resolution
|
|
233
|
+
Handle duplicate job IDs with flexible strategies:
|
|
234
|
+
- `ConflictResolution.RAISE` (default): Raise error for duplicates
|
|
235
|
+
- `ConflictResolution.IGNORE`: Ignore new job, return existing ID
|
|
236
|
+
- `ConflictResolution.REPLACE`: Update existing job with new parameters
|
|
237
|
+
|
|
238
|
+
### Retry Logic
|
|
239
|
+
- Configure `max_retries` for failed jobs
|
|
240
|
+
- Exponential backoff between retries
|
|
241
|
+
- Comprehensive error handling
|
|
242
|
+
|
|
243
|
+
## ๐งน Vacuum Policies
|
|
244
|
+
|
|
245
|
+
Automatic cleanup of completed jobs with flexible policies:
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from pg_scheduler import VacuumConfig, VacuumPolicy
|
|
249
|
+
|
|
250
|
+
# Configure cleanup policies
|
|
251
|
+
vacuum_config = VacuumConfig(
|
|
252
|
+
completed=VacuumPolicy.after_days(1), # Clean completed jobs after 1 day
|
|
253
|
+
failed=VacuumPolicy.after_days(7), # Keep failed jobs for 7 days
|
|
254
|
+
cancelled=VacuumPolicy.after_days(3), # Clean cancelled jobs after 3 days
|
|
255
|
+
interval_minutes=60, # Run vacuum every hour
|
|
256
|
+
track_metrics=True # Store vacuum statistics
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
scheduler = Scheduler(db_pool, vacuum_config=vacuum_config)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## ๐ก๏ธ Reliability Features
|
|
263
|
+
|
|
264
|
+
### Built-in Reliability
|
|
265
|
+
- **Lease-based Execution**: Explicit job ownership with timeouts
|
|
266
|
+
- **Heartbeat Monitoring**: Detect and recover from crashed workers
|
|
267
|
+
- **Atomic Job Claiming**: Race-condition-free job distribution
|
|
268
|
+
- **Orphan Recovery**: Automatic cleanup of abandoned jobs
|
|
269
|
+
- **Graceful Shutdown**: Waits for active jobs to complete
|
|
270
|
+
|
|
271
|
+
### Deduplication
|
|
272
|
+
- **Cross-replica Safety**: Same job won't run twice across multiple workers
|
|
273
|
+
- **Deterministic Job IDs**: Based on function signature and parameters
|
|
274
|
+
- **Window-based Deduplication**: Prevents duplicate executions in time windows
|
|
275
|
+
|
|
276
|
+
## ๐ง Configuration
|
|
277
|
+
|
|
278
|
+
### Scheduler Options
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
scheduler = Scheduler(
|
|
282
|
+
db_pool=db_pool,
|
|
283
|
+
max_concurrent_jobs=25, # Maximum concurrent job execution
|
|
284
|
+
misfire_grace_time=300, # Seconds before jobs expire (5 minutes)
|
|
285
|
+
vacuum_enabled=True, # Enable automatic job cleanup
|
|
286
|
+
vacuum_config=vacuum_config # Custom vacuum policies
|
|
287
|
+
)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## ๐ Database Schema
|
|
291
|
+
|
|
292
|
+
The scheduler automatically creates the required PostgreSQL table:
|
|
293
|
+
|
|
294
|
+
```sql
|
|
295
|
+
CREATE TABLE scheduled_jobs (
|
|
296
|
+
job_id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text,
|
|
297
|
+
job_name TEXT NOT NULL,
|
|
298
|
+
execution_time TIMESTAMPTZ NOT NULL,
|
|
299
|
+
status TEXT DEFAULT 'pending',
|
|
300
|
+
task_data JSONB,
|
|
301
|
+
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
302
|
+
last_heartbeat TIMESTAMPTZ,
|
|
303
|
+
lease_until TIMESTAMPTZ,
|
|
304
|
+
priority INTEGER DEFAULT 5,
|
|
305
|
+
retry_count INTEGER DEFAULT 0,
|
|
306
|
+
max_retries INTEGER DEFAULT 0,
|
|
307
|
+
worker_id TEXT,
|
|
308
|
+
error_message TEXT
|
|
309
|
+
);
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## ๐ Production Deployment
|
|
313
|
+
|
|
314
|
+
### Docker Example
|
|
315
|
+
|
|
316
|
+
```dockerfile
|
|
317
|
+
FROM python:3.11-slim
|
|
318
|
+
|
|
319
|
+
WORKDIR /app
|
|
320
|
+
COPY requirements.txt .
|
|
321
|
+
RUN pip install -r requirements.txt
|
|
322
|
+
|
|
323
|
+
COPY . .
|
|
324
|
+
CMD ["python", "app.py"]
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### Environment Variables
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
DATABASE_URL=postgresql://user:pass@localhost:5432/scheduler_db
|
|
331
|
+
MAX_CONCURRENT_JOBS=25
|
|
332
|
+
MISFIRE_GRACE_TIME=300
|
|
333
|
+
VACUUM_ENABLED=true
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
## ๐ค Contributing
|
|
337
|
+
|
|
338
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
339
|
+
|
|
340
|
+
## ๐ TODO / Roadmap
|
|
341
|
+
|
|
342
|
+
### Planned Features
|
|
343
|
+
|
|
344
|
+
- **Cron-like Scheduling**: Support for cron expressions (e.g., `@periodic(cron="0 0 * * SUN")` for "every Sunday at midnight")
|
|
345
|
+
- **Timezone Support**: Specify timezones for periodic jobs
|
|
346
|
+
|
|
347
|
+
## ๐ License
|
|
348
|
+
|
|
349
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
350
|
+
|
|
351
|
+
## ๐ Links
|
|
352
|
+
|
|
353
|
+
- **PyPI**: https://pypi.org/project/pg-scheduler/
|
|
354
|
+
- **GitHub**: https://github.com/miguelrebelo/pg-scheduler
|
|
355
|
+
- **Documentation**: https://github.com/miguelrebelo/pg-scheduler#readme
|
|
356
|
+
- **Issues**: https://github.com/miguelrebelo/pg-scheduler/issues
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# PG Scheduler
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/pg-scheduler)
|
|
4
|
+
[](https://pypi.org/project/pg-scheduler/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
A simple lightweight async first job scheduler for Python that uses PostgreSQL to allow you to schedule and manage the execution of asynchronous tasks.
|
|
8
|
+
|
|
9
|
+
It's heavily inspired by APScheduler in its API but horizontally scalable and much more focused in the features it provides and technologies it uses.
|
|
10
|
+
|
|
11
|
+
## โ ๏ธ Project Status
|
|
12
|
+
|
|
13
|
+
This project is currently in **early development** (v0.1.0). While functional, please note:
|
|
14
|
+
- The API may change in future versions
|
|
15
|
+
- Recommended for testing and development environments
|
|
16
|
+
- Production use should be carefully evaluated
|
|
17
|
+
|
|
18
|
+
## โจ Key Features
|
|
19
|
+
|
|
20
|
+
- **๐ Periodic Jobs**: Simple `@periodic` decorator for recurring tasks
|
|
21
|
+
- **๐ Deduplication**: Guarantees exactly one execution per window across replicas
|
|
22
|
+
- **โก Self-Rescheduling**: Jobs automatically schedule their next execution
|
|
23
|
+
- **๐ก๏ธ Advisory Locks**: Optional PostgreSQL advisory locks for exclusive execution
|
|
24
|
+
- **๐ฏ Priority Queues**: Support for job priorities and retry logic
|
|
25
|
+
- **๐งน Vacuum Policies**: Automatic cleanup of completed jobs
|
|
26
|
+
- **๐ช Reliability**: Graceful shutdown, error handling, and orphan recovery
|
|
27
|
+
|
|
28
|
+
## ๐ฆ Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install pg-scheduler
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Requirements
|
|
35
|
+
|
|
36
|
+
- Python 3.9+
|
|
37
|
+
- PostgreSQL 12+
|
|
38
|
+
- asyncpg
|
|
39
|
+
|
|
40
|
+
## ๐ Quick Start
|
|
41
|
+
|
|
42
|
+
### Basic Job Scheduling
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import asyncio
|
|
46
|
+
import asyncpg
|
|
47
|
+
from datetime import datetime, timedelta, UTC
|
|
48
|
+
from pg_scheduler import Scheduler, JobPriority
|
|
49
|
+
|
|
50
|
+
async def send_email(recipient: str, subject: str):
|
|
51
|
+
"""Example job function"""
|
|
52
|
+
print(f"๐ง Sending email to {recipient}: {subject}")
|
|
53
|
+
await asyncio.sleep(1) # Simulate async work
|
|
54
|
+
print(f"โ
Email sent to {recipient}")
|
|
55
|
+
|
|
56
|
+
async def main():
|
|
57
|
+
# Create database connection pool
|
|
58
|
+
db_pool = await asyncpg.create_pool(
|
|
59
|
+
user='scheduler',
|
|
60
|
+
password='password',
|
|
61
|
+
database='scheduler_db',
|
|
62
|
+
host='localhost',
|
|
63
|
+
port=5432
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Initialize scheduler
|
|
67
|
+
scheduler = Scheduler(db_pool=db_pool, max_concurrent_jobs=10)
|
|
68
|
+
await scheduler.start()
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
# Schedule a job
|
|
72
|
+
job_id = await scheduler.schedule(
|
|
73
|
+
send_email,
|
|
74
|
+
execution_time=datetime.now(UTC) + timedelta(minutes=5),
|
|
75
|
+
args=("user@example.com", "Welcome!"),
|
|
76
|
+
priority=JobPriority.NORMAL,
|
|
77
|
+
max_retries=3
|
|
78
|
+
)
|
|
79
|
+
print(f"Scheduled job: {job_id}")
|
|
80
|
+
|
|
81
|
+
# Keep running
|
|
82
|
+
await asyncio.sleep(300) # Run for 5 minutes
|
|
83
|
+
|
|
84
|
+
finally:
|
|
85
|
+
await scheduler.shutdown()
|
|
86
|
+
await db_pool.close()
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
asyncio.run(main())
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Periodic Jobs with @periodic Decorator
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from datetime import timedelta
|
|
96
|
+
from pg_scheduler import periodic, JobPriority
|
|
97
|
+
|
|
98
|
+
@periodic(every=timedelta(minutes=15))
|
|
99
|
+
async def cleanup_temp_files():
|
|
100
|
+
"""Clean up temporary files every 15 minutes"""
|
|
101
|
+
print("๐งน Cleaning up temporary files...")
|
|
102
|
+
# Your cleanup logic here
|
|
103
|
+
print("โ
Cleanup completed")
|
|
104
|
+
|
|
105
|
+
@periodic(every=timedelta(hours=1), priority=JobPriority.CRITICAL, max_retries=3)
|
|
106
|
+
async def generate_hourly_report():
|
|
107
|
+
"""Generate hourly reports with high priority and retries"""
|
|
108
|
+
print("๐ Generating hourly report...")
|
|
109
|
+
# Your report generation logic here
|
|
110
|
+
print("โ
Report generated")
|
|
111
|
+
|
|
112
|
+
# Advisory locks for exclusive execution
|
|
113
|
+
@periodic(every=timedelta(minutes=30), use_advisory_lock=True)
|
|
114
|
+
async def exclusive_maintenance():
|
|
115
|
+
"""Exclusive operation - only one instance across entire cluster"""
|
|
116
|
+
print("๐ Running exclusive maintenance...")
|
|
117
|
+
# Your maintenance logic here
|
|
118
|
+
print("โ
Maintenance completed")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Note**: Most jobs don't need `use_advisory_lock=True`. Use it only when you need absolute exclusivity across all workers (e.g., database migrations, leader coordination).
|
|
122
|
+
|
|
123
|
+
## ๐ Periodic Jobs Features
|
|
124
|
+
|
|
125
|
+
The `@periodic` decorator provides powerful recurring job functionality with **built-in cross-node deduplication**:
|
|
126
|
+
|
|
127
|
+
- **Cross-Node Deduplication**: Automatically prevents duplicate executions across multiple nodes sharing the same database
|
|
128
|
+
- **Automatic Registration**: Jobs are automatically registered when decorated
|
|
129
|
+
- **Self-rescheduling**: Automatically schedules the next execution after completion
|
|
130
|
+
- **Priority Support**: Use `JobPriority.NORMAL` or `JobPriority.CRITICAL`
|
|
131
|
+
- **Retry Logic**: Configure `max_retries` for failed executions
|
|
132
|
+
- **Advisory Locks**: Optional exclusive execution (rarely needed - see note above)
|
|
133
|
+
- **Management**: Enable/disable jobs dynamically
|
|
134
|
+
|
|
135
|
+
### Decorator Parameters
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
@periodic(
|
|
139
|
+
every=timedelta(minutes=15), # Required: execution interval
|
|
140
|
+
use_advisory_lock=False, # Optional: exclusive execution (default dedup is usually sufficient)
|
|
141
|
+
priority=JobPriority.NORMAL, # Optional: job priority
|
|
142
|
+
max_retries=0, # Optional: retry attempts on failure
|
|
143
|
+
job_name=None, # Optional: custom job name (auto-generated)
|
|
144
|
+
dedup_key=None, # Optional: custom dedup key (auto-generated)
|
|
145
|
+
enabled=True # Optional: whether job is enabled
|
|
146
|
+
)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Cross-Node Deduplication
|
|
150
|
+
|
|
151
|
+
The `@periodic` decorator automatically prevents duplicate executions across multiple nodes:
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
# Multiple nodes running the same code
|
|
155
|
+
@periodic(every=timedelta(minutes=5))
|
|
156
|
+
async def cleanup_task():
|
|
157
|
+
print("Running cleanup...")
|
|
158
|
+
|
|
159
|
+
# What happens:
|
|
160
|
+
# - Node 1: Schedules job for 10:05 โ โ
Success
|
|
161
|
+
# - Node 2: Tries to schedule same job โ โ "Already exists, ignoring"
|
|
162
|
+
# - Node 3: Tries to schedule same job โ โ "Already exists, ignoring"
|
|
163
|
+
# - Result: Only Node 1 executes the cleanup at 10:05
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**No configuration needed** - this works automatically for any nodes sharing the same PostgreSQL database.
|
|
167
|
+
|
|
168
|
+
### Management API
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
# Get all periodic jobs
|
|
172
|
+
periodic_jobs = scheduler.get_periodic_jobs()
|
|
173
|
+
|
|
174
|
+
# Get status of a specific job
|
|
175
|
+
status = scheduler.get_periodic_job_status(dedup_key)
|
|
176
|
+
|
|
177
|
+
# Enable/disable jobs
|
|
178
|
+
scheduler.enable_periodic_job(dedup_key)
|
|
179
|
+
scheduler.disable_periodic_job(dedup_key)
|
|
180
|
+
|
|
181
|
+
# Manually trigger a job
|
|
182
|
+
job_id = await scheduler.trigger_periodic_job(dedup_key)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## ๐ฏ Job Scheduling Features
|
|
186
|
+
|
|
187
|
+
### Priority Support
|
|
188
|
+
- `JobPriority.NORMAL` (default) or `JobPriority.CRITICAL`
|
|
189
|
+
- Higher priority jobs execute first
|
|
190
|
+
|
|
191
|
+
### Conflict Resolution
|
|
192
|
+
Handle duplicate job IDs with flexible strategies:
|
|
193
|
+
- `ConflictResolution.RAISE` (default): Raise error for duplicates
|
|
194
|
+
- `ConflictResolution.IGNORE`: Ignore new job, return existing ID
|
|
195
|
+
- `ConflictResolution.REPLACE`: Update existing job with new parameters
|
|
196
|
+
|
|
197
|
+
### Retry Logic
|
|
198
|
+
- Configure `max_retries` for failed jobs
|
|
199
|
+
- Exponential backoff between retries
|
|
200
|
+
- Comprehensive error handling
|
|
201
|
+
|
|
202
|
+
## ๐งน Vacuum Policies
|
|
203
|
+
|
|
204
|
+
Automatic cleanup of completed jobs with flexible policies:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from pg_scheduler import VacuumConfig, VacuumPolicy
|
|
208
|
+
|
|
209
|
+
# Configure cleanup policies
|
|
210
|
+
vacuum_config = VacuumConfig(
|
|
211
|
+
completed=VacuumPolicy.after_days(1), # Clean completed jobs after 1 day
|
|
212
|
+
failed=VacuumPolicy.after_days(7), # Keep failed jobs for 7 days
|
|
213
|
+
cancelled=VacuumPolicy.after_days(3), # Clean cancelled jobs after 3 days
|
|
214
|
+
interval_minutes=60, # Run vacuum every hour
|
|
215
|
+
track_metrics=True # Store vacuum statistics
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
scheduler = Scheduler(db_pool, vacuum_config=vacuum_config)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## ๐ก๏ธ Reliability Features
|
|
222
|
+
|
|
223
|
+
### Built-in Reliability
|
|
224
|
+
- **Lease-based Execution**: Explicit job ownership with timeouts
|
|
225
|
+
- **Heartbeat Monitoring**: Detect and recover from crashed workers
|
|
226
|
+
- **Atomic Job Claiming**: Race-condition-free job distribution
|
|
227
|
+
- **Orphan Recovery**: Automatic cleanup of abandoned jobs
|
|
228
|
+
- **Graceful Shutdown**: Waits for active jobs to complete
|
|
229
|
+
|
|
230
|
+
### Deduplication
|
|
231
|
+
- **Cross-replica Safety**: Same job won't run twice across multiple workers
|
|
232
|
+
- **Deterministic Job IDs**: Based on function signature and parameters
|
|
233
|
+
- **Window-based Deduplication**: Prevents duplicate executions in time windows
|
|
234
|
+
|
|
235
|
+
## ๐ง Configuration
|
|
236
|
+
|
|
237
|
+
### Scheduler Options
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
scheduler = Scheduler(
|
|
241
|
+
db_pool=db_pool,
|
|
242
|
+
max_concurrent_jobs=25, # Maximum concurrent job execution
|
|
243
|
+
misfire_grace_time=300, # Seconds before jobs expire (5 minutes)
|
|
244
|
+
vacuum_enabled=True, # Enable automatic job cleanup
|
|
245
|
+
vacuum_config=vacuum_config # Custom vacuum policies
|
|
246
|
+
)
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## ๐ Database Schema
|
|
250
|
+
|
|
251
|
+
The scheduler automatically creates the required PostgreSQL table:
|
|
252
|
+
|
|
253
|
+
```sql
|
|
254
|
+
CREATE TABLE scheduled_jobs (
|
|
255
|
+
job_id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text,
|
|
256
|
+
job_name TEXT NOT NULL,
|
|
257
|
+
execution_time TIMESTAMPTZ NOT NULL,
|
|
258
|
+
status TEXT DEFAULT 'pending',
|
|
259
|
+
task_data JSONB,
|
|
260
|
+
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
261
|
+
last_heartbeat TIMESTAMPTZ,
|
|
262
|
+
lease_until TIMESTAMPTZ,
|
|
263
|
+
priority INTEGER DEFAULT 5,
|
|
264
|
+
retry_count INTEGER DEFAULT 0,
|
|
265
|
+
max_retries INTEGER DEFAULT 0,
|
|
266
|
+
worker_id TEXT,
|
|
267
|
+
error_message TEXT
|
|
268
|
+
);
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## ๐ Production Deployment
|
|
272
|
+
|
|
273
|
+
### Docker Example
|
|
274
|
+
|
|
275
|
+
```dockerfile
|
|
276
|
+
FROM python:3.11-slim
|
|
277
|
+
|
|
278
|
+
WORKDIR /app
|
|
279
|
+
COPY requirements.txt .
|
|
280
|
+
RUN pip install -r requirements.txt
|
|
281
|
+
|
|
282
|
+
COPY . .
|
|
283
|
+
CMD ["python", "app.py"]
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Environment Variables
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
DATABASE_URL=postgresql://user:pass@localhost:5432/scheduler_db
|
|
290
|
+
MAX_CONCURRENT_JOBS=25
|
|
291
|
+
MISFIRE_GRACE_TIME=300
|
|
292
|
+
VACUUM_ENABLED=true
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## ๐ค Contributing
|
|
296
|
+
|
|
297
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
298
|
+
|
|
299
|
+
## ๐ TODO / Roadmap
|
|
300
|
+
|
|
301
|
+
### Planned Features
|
|
302
|
+
|
|
303
|
+
- **Cron-like Scheduling**: Support for cron expressions (e.g., `@periodic(cron="0 0 * * SUN")` for "every Sunday at midnight")
|
|
304
|
+
- **Timezone Support**: Specify timezones for periodic jobs
|
|
305
|
+
|
|
306
|
+
## ๐ License
|
|
307
|
+
|
|
308
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
309
|
+
|
|
310
|
+
## ๐ Links
|
|
311
|
+
|
|
312
|
+
- **PyPI**: https://pypi.org/project/pg-scheduler/
|
|
313
|
+
- **GitHub**: https://github.com/miguelrebelo/pg-scheduler
|
|
314
|
+
- **Documentation**: https://github.com/miguelrebelo/pg-scheduler#readme
|
|
315
|
+
- **Issues**: https://github.com/miguelrebelo/pg-scheduler/issues
|