pydocket 0.7.1__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydocket might be problematic. Click here for more details.
- {pydocket-0.7.1 → pydocket-0.9.0}/PKG-INFO +3 -1
- {pydocket-0.7.1 → pydocket-0.9.0}/README.md +2 -0
- pydocket-0.9.0/docs/advanced-patterns.md +558 -0
- pydocket-0.9.0/docs/dependencies.md +394 -0
- pydocket-0.9.0/docs/getting-started.md +198 -0
- pydocket-0.9.0/docs/production.md +407 -0
- pydocket-0.9.0/docs/testing.md +356 -0
- pydocket-0.9.0/examples/concurrency_control.py +114 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/mkdocs.yml +4 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/__init__.py +2 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/cli.py +112 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/dependencies.py +89 -3
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/docket.py +151 -35
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/worker.py +169 -13
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_snapshot.py +153 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_worker.py +4 -3
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/conftest.py +1 -1
- pydocket-0.9.0/tests/test_concurrency_basic.py +31 -0
- pydocket-0.9.0/tests/test_concurrency_control.py +336 -0
- pydocket-0.9.0/tests/test_concurrency_refresh.py +196 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_fundamentals.py +24 -3
- pydocket-0.9.0/tests/test_worker.py +1506 -0
- pydocket-0.7.1/docs/getting-started.md +0 -286
- pydocket-0.7.1/tests/test_worker.py +0 -515
- {pydocket-0.7.1 → pydocket-0.9.0}/.cursor/rules/general.mdc +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.cursor/rules/python-style.mdc +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.github/codecov.yml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.github/workflows/chaos.yml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.github/workflows/ci.yml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.github/workflows/docs.yml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.github/workflows/publish.yml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.gitignore +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/.pre-commit-config.yaml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/CLAUDE.md +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/LICENSE +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/README.md +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/__init__.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/driver.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/producer.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/run +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/chaos/tasks.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/docs/api-reference.md +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/docs/index.md +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/examples/__init__.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/examples/common.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/examples/find_and_flood.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/examples/self_perpetuating.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/pyproject.toml +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/__main__.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/annotations.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/execution.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/instrumentation.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/py.typed +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/src/docket/tasks.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/telemetry/.gitignore +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/telemetry/start +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/telemetry/stop +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/__init__.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/__init__.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/conftest.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_clear.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_module.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_parsing.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_striking.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_tasks.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_version.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/cli/test_workers.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_dependencies.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_docket.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_execution.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_instrumentation.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/tests/test_striking.py +0 -0
- {pydocket-0.7.1 → pydocket-0.9.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydocket
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: A distributed background task system for Python functions
|
|
5
5
|
Project-URL: Homepage, https://github.com/chrisguidry/docket
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/chrisguidry/docket/issues
|
|
@@ -93,6 +93,8 @@ reference](https://chrisguidry.github.io/docket/api-reference/).
|
|
|
93
93
|
|
|
94
94
|
🧩 Fully type-complete and type-aware for your background task functions
|
|
95
95
|
|
|
96
|
+
💉 Dependency injection like FastAPI, Typer, and FastMCP for reusable resources
|
|
97
|
+
|
|
96
98
|
## Installing `docket`
|
|
97
99
|
|
|
98
100
|
Docket is [available on PyPI](https://pypi.org/project/pydocket/) under the package name
|
|
@@ -57,6 +57,8 @@ reference](https://chrisguidry.github.io/docket/api-reference/).
|
|
|
57
57
|
|
|
58
58
|
🧩 Fully type-complete and type-aware for your background task functions
|
|
59
59
|
|
|
60
|
+
💉 Dependency injection like FastAPI, Typer, and FastMCP for reusable resources
|
|
61
|
+
|
|
60
62
|
## Installing `docket`
|
|
61
63
|
|
|
62
64
|
Docket is [available on PyPI](https://pypi.org/project/pydocket/) under the package name
|
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
# Advanced Task Patterns
|
|
2
|
+
|
|
3
|
+
Docket is made for building complex distributed systems, and the patterns below highlight some of the original use cases for Docket.
|
|
4
|
+
|
|
5
|
+
## Perpetual Tasks
|
|
6
|
+
|
|
7
|
+
Perpetual tasks automatically reschedule themselves, making them well-suited for recurring work like health checks, data synchronization, or periodic cleanup operations.
|
|
8
|
+
|
|
9
|
+
### Basic Perpetual Tasks
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from docket import Perpetual
|
|
13
|
+
|
|
14
|
+
async def health_check_service(
|
|
15
|
+
service_url: str,
|
|
16
|
+
perpetual: Perpetual = Perpetual(every=timedelta(minutes=5))
|
|
17
|
+
) -> None:
|
|
18
|
+
try:
|
|
19
|
+
response = await http_client.get(f"{service_url}/health")
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
print(f"✓ {service_url} is healthy")
|
|
22
|
+
except Exception as e:
|
|
23
|
+
print(f"✗ {service_url} failed health check: {e}")
|
|
24
|
+
await send_alert(f"Service {service_url} is down")
|
|
25
|
+
|
|
26
|
+
# Schedule the task once, it will run every 5 minutes forever
|
|
27
|
+
await docket.add(health_check_service)("https://api.example.com")
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
After each execution, the task automatically schedules itself to run again after the specified interval.
|
|
31
|
+
|
|
32
|
+
### Automatic Startup
|
|
33
|
+
|
|
34
|
+
Perpetual tasks can start themselves automatically when a worker sees them, without needing to be explicitly scheduled:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
async def background_cleanup(
|
|
38
|
+
perpetual: Perpetual = Perpetual(
|
|
39
|
+
every=timedelta(hours=1),
|
|
40
|
+
automatic=True
|
|
41
|
+
)
|
|
42
|
+
) -> None:
|
|
43
|
+
deleted_count = await cleanup_old_records()
|
|
44
|
+
print(f"Cleaned up {deleted_count} old records")
|
|
45
|
+
|
|
46
|
+
# Just register the task - no need to schedule it
|
|
47
|
+
docket.register(background_cleanup)
|
|
48
|
+
|
|
49
|
+
# When a worker starts, it will automatically begin running this task
|
|
50
|
+
# The task key will be the function name: "background_cleanup"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Self-Canceling Tasks
|
|
54
|
+
|
|
55
|
+
Perpetual tasks can stop themselves when their work is done:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
async def monitor_deployment(
|
|
59
|
+
deployment_id: str,
|
|
60
|
+
perpetual: Perpetual = Perpetual(every=timedelta(seconds=30))
|
|
61
|
+
) -> None:
|
|
62
|
+
status = await check_deployment_status(deployment_id)
|
|
63
|
+
|
|
64
|
+
if status in ["completed", "failed"]:
|
|
65
|
+
await notify_deployment_finished(deployment_id, status)
|
|
66
|
+
perpetual.cancel() # Stop monitoring this deployment
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
print(f"Deployment {deployment_id} status: {status}")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Dynamic Parameters
|
|
73
|
+
|
|
74
|
+
Perpetual tasks can change their arguments or timing for the next execution:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
async def adaptive_rate_limiter(
|
|
78
|
+
api_endpoint: str,
|
|
79
|
+
requests_per_minute: int = 60,
|
|
80
|
+
perpetual: Perpetual = Perpetual(every=timedelta(minutes=1))
|
|
81
|
+
) -> None:
|
|
82
|
+
# Check current API load
|
|
83
|
+
current_load = await check_api_load(api_endpoint)
|
|
84
|
+
|
|
85
|
+
if current_load > 0.8: # High load
|
|
86
|
+
new_rate = max(30, requests_per_minute - 10)
|
|
87
|
+
perpetual.every = timedelta(seconds=30) # Check more frequently
|
|
88
|
+
print(f"High load detected, reducing rate to {new_rate} req/min")
|
|
89
|
+
else: # Normal load
|
|
90
|
+
new_rate = min(120, requests_per_minute + 5)
|
|
91
|
+
perpetual.every = timedelta(minutes=1) # Normal check interval
|
|
92
|
+
print(f"Normal load, increasing rate to {new_rate} req/min")
|
|
93
|
+
|
|
94
|
+
# Schedule next run with updated parameters
|
|
95
|
+
perpetual.perpetuate(api_endpoint, new_rate)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Error Resilience
|
|
99
|
+
|
|
100
|
+
Perpetual tasks automatically reschedule themselves regardless of success or failure:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
async def resilient_sync(
|
|
104
|
+
source_url: str,
|
|
105
|
+
perpetual: Perpetual = Perpetual(every=timedelta(minutes=15))
|
|
106
|
+
) -> None:
|
|
107
|
+
# This will ALWAYS reschedule, whether it succeeds or fails
|
|
108
|
+
await sync_data_from_source(source_url)
|
|
109
|
+
print(f"Successfully synced data from {source_url}")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
You don't need try/except blocks to ensure rescheduling - Docket handles this automatically. Whether the task completes successfully or raises an exception, the next execution will be scheduled according to the `every` interval.
|
|
113
|
+
|
|
114
|
+
### Find & Flood Pattern
|
|
115
|
+
|
|
116
|
+
A common perpetual task pattern is "find & flood" - a single perpetual task that periodically discovers work to do, then creates many smaller tasks to handle the actual work:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from docket import CurrentDocket, Perpetual
|
|
120
|
+
|
|
121
|
+
async def find_pending_orders(
|
|
122
|
+
docket: Docket = CurrentDocket(),
|
|
123
|
+
perpetual: Perpetual = Perpetual(every=timedelta(minutes=1))
|
|
124
|
+
) -> None:
|
|
125
|
+
# Find all orders that need processing
|
|
126
|
+
pending_orders = await database.fetch_pending_orders()
|
|
127
|
+
|
|
128
|
+
# Flood the queue with individual processing tasks
|
|
129
|
+
for order in pending_orders:
|
|
130
|
+
await docket.add(process_single_order)(order.id)
|
|
131
|
+
|
|
132
|
+
print(f"Queued {len(pending_orders)} orders for processing")
|
|
133
|
+
|
|
134
|
+
async def process_single_order(order_id: int) -> None:
|
|
135
|
+
# Handle one specific order
|
|
136
|
+
await process_order_payment(order_id)
|
|
137
|
+
await update_inventory(order_id)
|
|
138
|
+
await send_confirmation_email(order_id)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
This pattern separates discovery (finding work) from execution (doing work), allowing for better load distribution and fault isolation. The perpetual task stays lightweight and fast, while the actual work is distributed across many workers.
|
|
142
|
+
|
|
143
|
+
## Striking and Restoring Tasks
|
|
144
|
+
|
|
145
|
+
Striking allows you to temporarily disable tasks without redeploying code. This is invaluable for incident response, gradual rollouts, or handling problematic customers.
|
|
146
|
+
|
|
147
|
+
### Striking Entire Task Types
|
|
148
|
+
|
|
149
|
+
Disable all instances of a specific task:
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
# Disable all order processing during maintenance
|
|
153
|
+
await docket.strike(process_order)
|
|
154
|
+
|
|
155
|
+
# Orders added during this time won't be processed
|
|
156
|
+
await docket.add(process_order)(order_id=12345) # Won't run
|
|
157
|
+
await docket.add(process_order)(order_id=67890) # Won't run
|
|
158
|
+
|
|
159
|
+
# Re-enable when ready
|
|
160
|
+
await docket.restore(process_order)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Striking by Parameter Values
|
|
164
|
+
|
|
165
|
+
Disable tasks based on their arguments using comparison operators:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
# Block all tasks for a problematic customer
|
|
169
|
+
await docket.strike(None, "customer_id", "==", "12345")
|
|
170
|
+
|
|
171
|
+
# Block low-priority work during high load
|
|
172
|
+
await docket.strike(process_order, "priority", "<=", "low")
|
|
173
|
+
|
|
174
|
+
# Block all orders above a certain value during fraud investigation
|
|
175
|
+
await docket.strike(process_payment, "amount", ">", 10000)
|
|
176
|
+
|
|
177
|
+
# Later, restore them
|
|
178
|
+
await docket.restore(None, "customer_id", "==", "12345")
|
|
179
|
+
await docket.restore(process_order, "priority", "<=", "low")
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Supported operators include `==`, `!=`, `<`, `<=`, `>`, `>=`.
|
|
183
|
+
|
|
184
|
+
### Striking Specific Task-Parameter Combinations
|
|
185
|
+
|
|
186
|
+
Target very specific scenarios:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# Block only high-value orders for a specific customer
|
|
190
|
+
await docket.strike(process_order, "customer_id", "==", "12345")
|
|
191
|
+
await docket.strike(process_order, "amount", ">", 1000)
|
|
192
|
+
|
|
193
|
+
# This order won't run (blocked customer)
|
|
194
|
+
await docket.add(process_order)(customer_id="12345", amount=500)
|
|
195
|
+
|
|
196
|
+
# This order won't run (blocked customer AND high amount)
|
|
197
|
+
await docket.add(process_order)(customer_id="12345", amount=2000)
|
|
198
|
+
|
|
199
|
+
# This order WILL run (different customer)
|
|
200
|
+
await docket.add(process_order)(customer_id="67890", amount=2000)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Striking is useful for incident response when you need to quickly disable failing tasks, customer management to block problematic accounts, gradual rollouts where you disable features for certain parameters, load management during high traffic, and debugging to isolate specific scenarios.
|
|
204
|
+
|
|
205
|
+
## Advanced Logging and Debugging
|
|
206
|
+
|
|
207
|
+
### Argument Logging
|
|
208
|
+
|
|
209
|
+
Control which task arguments appear in logs using the `Logged` annotation:
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
from typing import Annotated
|
|
213
|
+
from docket import Logged
|
|
214
|
+
|
|
215
|
+
async def process_payment(
|
|
216
|
+
customer_id: Annotated[str, Logged], # Will be logged
|
|
217
|
+
credit_card: str, # Won't be logged
|
|
218
|
+
amount: Annotated[float, Logged()] = 0.0, # Will be logged
|
|
219
|
+
trace_id: Annotated[str, Logged] = "unknown" # Will be logged
|
|
220
|
+
) -> None:
|
|
221
|
+
# Process the payment...
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
# Log output will show:
|
|
225
|
+
# process_payment('12345', credit_card=..., amount=150.0, trace_id='abc-123')
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Collection Length Logging
|
|
229
|
+
|
|
230
|
+
For large collections, log just their size instead of contents:
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
async def bulk_update_users(
|
|
234
|
+
user_ids: Annotated[list[str], Logged(length_only=True)],
|
|
235
|
+
metadata: Annotated[dict[str, str], Logged(length_only=True)],
|
|
236
|
+
options: Annotated[set[str], Logged(length_only=True)]
|
|
237
|
+
) -> None:
|
|
238
|
+
# Process users...
|
|
239
|
+
pass
|
|
240
|
+
|
|
241
|
+
# Log output will show:
|
|
242
|
+
# bulk_update_users([len 150], metadata={len 5}, options={len 3})
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
This prevents logs from being overwhelmed with large data structures while still providing useful information.
|
|
246
|
+
|
|
247
|
+
### Task Context Logging
|
|
248
|
+
|
|
249
|
+
Use `TaskLogger` for structured logging with task context:
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
from logging import Logger, LoggerAdapter
|
|
253
|
+
from docket import TaskLogger
|
|
254
|
+
|
|
255
|
+
async def complex_data_pipeline(
|
|
256
|
+
dataset_id: str,
|
|
257
|
+
logger: LoggerAdapter[Logger] = TaskLogger()
|
|
258
|
+
) -> None:
|
|
259
|
+
logger.info("Starting data pipeline", extra={"dataset_id": dataset_id})
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
await extract_data(dataset_id)
|
|
263
|
+
logger.info("Data extraction completed")
|
|
264
|
+
|
|
265
|
+
await transform_data(dataset_id)
|
|
266
|
+
logger.info("Data transformation completed")
|
|
267
|
+
|
|
268
|
+
await load_data(dataset_id)
|
|
269
|
+
logger.info("Data loading completed")
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.error("Pipeline failed", extra={"error": str(e)})
|
|
273
|
+
raise
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
The logger automatically includes task context like the task name, key, and worker information.
|
|
277
|
+
|
|
278
|
+
### Built-in Utility Tasks
|
|
279
|
+
|
|
280
|
+
Docket provides helpful debugging tasks:
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
from docket import tasks
|
|
284
|
+
|
|
285
|
+
# Simple trace logging
|
|
286
|
+
await docket.add(tasks.trace)("System startup completed")
|
|
287
|
+
await docket.add(tasks.trace)("Processing batch 123")
|
|
288
|
+
|
|
289
|
+
# Intentional failures for testing error handling
|
|
290
|
+
await docket.add(tasks.fail)("Testing error notification system")
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
These are particularly useful for:
|
|
294
|
+
- Marking milestones in complex workflows
|
|
295
|
+
- Testing monitoring and alerting systems
|
|
296
|
+
- Debugging task execution order
|
|
297
|
+
- Creating synthetic load for testing
|
|
298
|
+
|
|
299
|
+
## Task Chain Patterns
|
|
300
|
+
|
|
301
|
+
### Sequential Processing
|
|
302
|
+
|
|
303
|
+
Create chains of related tasks that pass data forward:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
async def download_data(
|
|
307
|
+
url: str,
|
|
308
|
+
docket: Docket = CurrentDocket()
|
|
309
|
+
) -> None:
|
|
310
|
+
file_path = await download_file(url)
|
|
311
|
+
await docket.add(validate_data)(file_path)
|
|
312
|
+
|
|
313
|
+
async def validate_data(
|
|
314
|
+
file_path: str,
|
|
315
|
+
docket: Docket = CurrentDocket()
|
|
316
|
+
) -> None:
|
|
317
|
+
if await is_valid_data(file_path):
|
|
318
|
+
await docket.add(process_data)(file_path)
|
|
319
|
+
else:
|
|
320
|
+
await docket.add(handle_invalid_data)(file_path)
|
|
321
|
+
|
|
322
|
+
async def process_data(file_path: str) -> None:
|
|
323
|
+
# Final processing step
|
|
324
|
+
await transform_and_store(file_path)
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### Fan-out Processing
|
|
328
|
+
|
|
329
|
+
Break large tasks into parallel subtasks:
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
async def process_large_dataset(
|
|
333
|
+
dataset_id: str,
|
|
334
|
+
docket: Docket = CurrentDocket()
|
|
335
|
+
) -> None:
|
|
336
|
+
chunk_ids = await split_dataset_into_chunks(dataset_id)
|
|
337
|
+
|
|
338
|
+
# Schedule parallel processing of all chunks
|
|
339
|
+
for chunk_id in chunk_ids:
|
|
340
|
+
await docket.add(process_chunk)(dataset_id, chunk_id)
|
|
341
|
+
|
|
342
|
+
# Schedule a task to run after all chunks should be done
|
|
343
|
+
estimated_completion = datetime.now(timezone.utc) + timedelta(hours=2)
|
|
344
|
+
await docket.add(
|
|
345
|
+
finalize_dataset,
|
|
346
|
+
when=estimated_completion,
|
|
347
|
+
key=f"finalize-{dataset_id}"
|
|
348
|
+
)(dataset_id, len(chunk_ids))
|
|
349
|
+
|
|
350
|
+
async def process_chunk(dataset_id: str, chunk_id: str) -> None:
|
|
351
|
+
await process_data_chunk(dataset_id, chunk_id)
|
|
352
|
+
await mark_chunk_complete(dataset_id, chunk_id)
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
### Conditional Workflows
|
|
356
|
+
|
|
357
|
+
Tasks can make decisions about what work to schedule next:
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
async def analyze_user_behavior(
|
|
361
|
+
user_id: str,
|
|
362
|
+
docket: Docket = CurrentDocket()
|
|
363
|
+
) -> None:
|
|
364
|
+
behavior_data = await collect_user_behavior(user_id)
|
|
365
|
+
|
|
366
|
+
if behavior_data.indicates_churn_risk():
|
|
367
|
+
await docket.add(create_retention_campaign)(user_id)
|
|
368
|
+
elif behavior_data.indicates_upsell_opportunity():
|
|
369
|
+
await docket.add(create_upsell_campaign)(user_id)
|
|
370
|
+
elif behavior_data.indicates_satisfaction():
|
|
371
|
+
# Schedule a follow-up check in 30 days
|
|
372
|
+
future_check = datetime.now(timezone.utc) + timedelta(days=30)
|
|
373
|
+
await docket.add(
|
|
374
|
+
analyze_user_behavior,
|
|
375
|
+
when=future_check,
|
|
376
|
+
key=f"behavior-check-{user_id}"
|
|
377
|
+
)(user_id)
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Concurrency Control
|
|
381
|
+
|
|
382
|
+
Docket provides fine-grained concurrency control that allows you to limit the number of concurrent tasks based on specific argument values. This is essential for protecting shared resources, preventing overwhelming external services, and managing database connections.
|
|
383
|
+
|
|
384
|
+
### Basic Concurrency Limits
|
|
385
|
+
|
|
386
|
+
Use `ConcurrencyLimit` to restrict concurrent execution based on task arguments:
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
from docket import ConcurrencyLimit
|
|
390
|
+
|
|
391
|
+
async def process_customer_data(
|
|
392
|
+
customer_id: int,
|
|
393
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit("customer_id", max_concurrent=1)
|
|
394
|
+
) -> None:
|
|
395
|
+
# Only one task per customer_id can run at a time
|
|
396
|
+
await update_customer_profile(customer_id)
|
|
397
|
+
await recalculate_customer_metrics(customer_id)
|
|
398
|
+
|
|
399
|
+
# These will run sequentially for the same customer
|
|
400
|
+
await docket.add(process_customer_data)(customer_id=1001)
|
|
401
|
+
await docket.add(process_customer_data)(customer_id=1001)
|
|
402
|
+
await docket.add(process_customer_data)(customer_id=1001)
|
|
403
|
+
|
|
404
|
+
# But different customers can run concurrently
|
|
405
|
+
await docket.add(process_customer_data)(customer_id=2001) # Runs in parallel
|
|
406
|
+
await docket.add(process_customer_data)(customer_id=3001) # Runs in parallel
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
### Database Connection Pooling
|
|
410
|
+
|
|
411
|
+
Limit concurrent database operations to prevent overwhelming your database:
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
async def backup_database_table(
|
|
415
|
+
db_name: str,
|
|
416
|
+
table_name: str,
|
|
417
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit("db_name", max_concurrent=2)
|
|
418
|
+
) -> None:
|
|
419
|
+
# Maximum 2 backup operations per database at once
|
|
420
|
+
await create_table_backup(db_name, table_name)
|
|
421
|
+
await verify_backup_integrity(db_name, table_name)
|
|
422
|
+
|
|
423
|
+
# Schedule many backup tasks - only 2 per database will run concurrently
|
|
424
|
+
tables = ["users", "orders", "products", "analytics", "logs"]
|
|
425
|
+
for table in tables:
|
|
426
|
+
await docket.add(backup_database_table)("production", table)
|
|
427
|
+
await docket.add(backup_database_table)("staging", table)
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
### API Rate Limiting
|
|
431
|
+
|
|
432
|
+
Protect external APIs from being overwhelmed:
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
async def sync_user_with_external_service(
|
|
436
|
+
user_id: int,
|
|
437
|
+
service_name: str,
|
|
438
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit("service_name", max_concurrent=5)
|
|
439
|
+
) -> None:
|
|
440
|
+
# Limit to 5 concurrent API calls per external service
|
|
441
|
+
api_client = get_api_client(service_name)
|
|
442
|
+
user_data = await fetch_user_data(user_id)
|
|
443
|
+
await api_client.sync_user(user_data)
|
|
444
|
+
|
|
445
|
+
# These respect per-service limits
|
|
446
|
+
await docket.add(sync_user_with_external_service)(123, "salesforce")
|
|
447
|
+
await docket.add(sync_user_with_external_service)(456, "salesforce") # Will queue if needed
|
|
448
|
+
await docket.add(sync_user_with_external_service)(789, "hubspot") # Different service, runs in parallel
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
### File Processing Limits
|
|
452
|
+
|
|
453
|
+
Control concurrent file operations to manage disk I/O:
|
|
454
|
+
|
|
455
|
+
```python
|
|
456
|
+
async def process_media_file(
|
|
457
|
+
file_path: str,
|
|
458
|
+
operation_type: str,
|
|
459
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit("operation_type", max_concurrent=3)
|
|
460
|
+
) -> None:
|
|
461
|
+
# Limit concurrent operations by type (e.g., 3 video transcodes, 3 image resizes)
|
|
462
|
+
if operation_type == "video_transcode":
|
|
463
|
+
await transcode_video(file_path)
|
|
464
|
+
elif operation_type == "image_resize":
|
|
465
|
+
await resize_image(file_path)
|
|
466
|
+
elif operation_type == "audio_compress":
|
|
467
|
+
await compress_audio(file_path)
|
|
468
|
+
|
|
469
|
+
# Different operation types can run concurrently, but each type is limited
|
|
470
|
+
await docket.add(process_media_file)("/videos/movie1.mp4", "video_transcode")
|
|
471
|
+
await docket.add(process_media_file)("/videos/movie2.mp4", "video_transcode")
|
|
472
|
+
await docket.add(process_media_file)("/images/photo1.jpg", "image_resize") # Runs in parallel
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
### Custom Scopes
|
|
476
|
+
|
|
477
|
+
Use custom scopes to create independent concurrency limits:
|
|
478
|
+
|
|
479
|
+
```python
|
|
480
|
+
async def process_tenant_data(
|
|
481
|
+
tenant_id: str,
|
|
482
|
+
operation: str,
|
|
483
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit(
|
|
484
|
+
"tenant_id",
|
|
485
|
+
max_concurrent=2,
|
|
486
|
+
scope="tenant_operations"
|
|
487
|
+
)
|
|
488
|
+
) -> None:
|
|
489
|
+
# Each tenant can have up to 2 concurrent operations
|
|
490
|
+
await perform_tenant_operation(tenant_id, operation)
|
|
491
|
+
|
|
492
|
+
async def process_global_data(
|
|
493
|
+
data_type: str,
|
|
494
|
+
concurrency: ConcurrencyLimit = ConcurrencyLimit(
|
|
495
|
+
"data_type",
|
|
496
|
+
max_concurrent=1,
|
|
497
|
+
scope="global_operations" # Separate from tenant operations
|
|
498
|
+
)
|
|
499
|
+
) -> None:
|
|
500
|
+
# Global operations have their own concurrency limits
|
|
501
|
+
await process_global_data_type(data_type)
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
### Multi-Level Concurrency
|
|
505
|
+
|
|
506
|
+
Combine multiple concurrency controls for complex scenarios:
|
|
507
|
+
|
|
508
|
+
```python
|
|
509
|
+
async def process_user_export(
|
|
510
|
+
user_id: int,
|
|
511
|
+
export_type: str,
|
|
512
|
+
region: str,
|
|
513
|
+
user_limit: ConcurrencyLimit = ConcurrencyLimit("user_id", max_concurrent=1),
|
|
514
|
+
type_limit: ConcurrencyLimit = ConcurrencyLimit("export_type", max_concurrent=3),
|
|
515
|
+
region_limit: ConcurrencyLimit = ConcurrencyLimit("region", max_concurrent=10)
|
|
516
|
+
) -> None:
|
|
517
|
+
# This task respects ALL concurrency limits:
|
|
518
|
+
# - Only 1 export per user at a time
|
|
519
|
+
# - Only 3 exports of each type globally
|
|
520
|
+
# - Only 10 exports per region
|
|
521
|
+
await generate_user_export(user_id, export_type, region)
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
**Note**: When using multiple `ConcurrencyLimit` dependencies, all limits must be satisfied before the task can start.
|
|
525
|
+
|
|
526
|
+
### Monitoring Concurrency
|
|
527
|
+
|
|
528
|
+
Concurrency limits are enforced using Redis sets, so you can monitor them:
|
|
529
|
+
|
|
530
|
+
```python
|
|
531
|
+
async def monitor_concurrency_usage() -> None:
|
|
532
|
+
async with docket.redis() as redis:
|
|
533
|
+
# Check how many tasks are running for a specific limit
|
|
534
|
+
active_count = await redis.scard("docket:concurrency:customer_id:1001")
|
|
535
|
+
print(f"Customer 1001 has {active_count} active tasks")
|
|
536
|
+
|
|
537
|
+
# List all active concurrency keys
|
|
538
|
+
keys = await redis.keys("docket:concurrency:*")
|
|
539
|
+
for key in keys:
|
|
540
|
+
count = await redis.scard(key)
|
|
541
|
+
print(f"{key}: {count} active tasks")
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
### Best Practices
|
|
545
|
+
|
|
546
|
+
1. **Choose appropriate argument names**: Use arguments that represent the resource you want to protect (database name, customer ID, API endpoint).
|
|
547
|
+
|
|
548
|
+
2. **Set reasonable limits**: Base limits on your system's capacity and external service constraints.
|
|
549
|
+
|
|
550
|
+
3. **Use descriptive scopes**: When you have multiple unrelated concurrency controls, use different scopes to avoid conflicts.
|
|
551
|
+
|
|
552
|
+
4. **Monitor blocked tasks**: Tasks that can't start due to concurrency limits are automatically rescheduled with small delays.
|
|
553
|
+
|
|
554
|
+
5. **Consider cascading effects**: Concurrency limits can create queuing effects - monitor your system to ensure tasks don't back up excessively.
|
|
555
|
+
|
|
556
|
+
Concurrency control helps you build robust systems that respect resource limits while maintaining high throughput for independent operations.
|
|
557
|
+
|
|
558
|
+
These advanced patterns enable building sophisticated distributed systems that can adapt to changing conditions, handle operational requirements, and provide the debugging and testing capabilities needed for production deployments.
|