avtomatika 1.0b1__tar.gz → 1.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b1 → avtomatika-1.0b3}/PKG-INFO +37 -12
- avtomatika-1.0b1/src/avtomatika.egg-info/PKG-INFO → avtomatika-1.0b3/README.md +27 -48
- {avtomatika-1.0b1 → avtomatika-1.0b3}/pyproject.toml +10 -10
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/__init__.py +2 -3
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/api.html +14 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/blueprint.py +13 -8
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/client_config_loader.py +18 -6
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/dispatcher.py +13 -19
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/engine.py +28 -15
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/executor.py +6 -3
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/ratelimit.py +3 -10
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/reputation.py +11 -2
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/security.py +5 -3
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/__init__.py +3 -3
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/base.py +23 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/memory.py +34 -8
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/redis.py +37 -20
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/telemetry.py +3 -3
- avtomatika-1.0b3/src/avtomatika/watcher.py +82 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/worker_config_loader.py +11 -3
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/ws_manager.py +2 -1
- avtomatika-1.0b1/README.md → avtomatika-1.0b3/src/avtomatika.egg-info/PKG-INFO +73 -2
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/SOURCES.txt +5 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/requires.txt +8 -8
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_client_config_loader.py +7 -6
- avtomatika-1.0b3/tests/test_compression.py +121 -0
- avtomatika-1.0b3/tests/test_config_validation.py +60 -0
- avtomatika-1.0b3/tests/test_dispatcher_extended.py +95 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_integration.py +9 -2
- avtomatika-1.0b3/tests/test_memory_locking.py +44 -0
- avtomatika-1.0b3/tests/test_redis_locking.py +45 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_watcher.py +3 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_worker_config_loader.py +13 -6
- avtomatika-1.0b1/src/avtomatika/watcher.py +0 -68
- {avtomatika-1.0b1 → avtomatika-1.0b3}/LICENSE +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/setup.cfg +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/config.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/context.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/data_types.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/datastore.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/base.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/noop.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/postgres.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/sqlite.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/logging_config.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_context.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_engine.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_error_handling.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_executor.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_history.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_logging_config.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_ws_manager.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary: A state-machine based orchestrator for long-running jobs.
|
|
3
|
+
Version: 1.0b3
|
|
4
|
+
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
7
7
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -18,25 +18,25 @@ Requires-Dist: graphviz~=0.21
|
|
|
18
18
|
Requires-Dist: zstandard~=0.24
|
|
19
19
|
Requires-Dist: aioprometheus~=23.12
|
|
20
20
|
Provides-Extra: redis
|
|
21
|
-
Requires-Dist: redis~=
|
|
21
|
+
Requires-Dist: redis~=7.1; extra == "redis"
|
|
22
22
|
Requires-Dist: orjson~=3.11; extra == "redis"
|
|
23
23
|
Provides-Extra: history
|
|
24
|
-
Requires-Dist: aiosqlite~=0.
|
|
24
|
+
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
25
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
26
|
Requires-Dist: orjson~=3.11; extra == "history"
|
|
27
27
|
Provides-Extra: telemetry
|
|
28
|
-
Requires-Dist: opentelemetry-api~=1.
|
|
29
|
-
Requires-Dist: opentelemetry-sdk~=1.
|
|
30
|
-
Requires-Dist: opentelemetry-exporter-otlp~=1.
|
|
28
|
+
Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
|
|
29
|
+
Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
|
|
30
|
+
Requires-Dist: opentelemetry-exporter-otlp~=1.39; extra == "telemetry"
|
|
31
31
|
Requires-Dist: opentelemetry-instrumentation-aiohttp-client~=0.59b0; extra == "telemetry"
|
|
32
32
|
Provides-Extra: test
|
|
33
|
-
Requires-Dist: pytest~=
|
|
33
|
+
Requires-Dist: pytest~=9.0; extra == "test"
|
|
34
34
|
Requires-Dist: pytest-asyncio~=1.1; extra == "test"
|
|
35
|
-
Requires-Dist: fakeredis~=2.
|
|
35
|
+
Requires-Dist: fakeredis~=2.33; extra == "test"
|
|
36
36
|
Requires-Dist: pytest-aiohttp~=1.1; extra == "test"
|
|
37
37
|
Requires-Dist: pytest-mock~=3.14; extra == "test"
|
|
38
38
|
Requires-Dist: aioresponses~=0.7; extra == "test"
|
|
39
|
-
Requires-Dist: backports.zstd; extra == "test"
|
|
39
|
+
Requires-Dist: backports.zstd~=1.2; extra == "test"
|
|
40
40
|
Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
|
|
41
41
|
Provides-Extra: all
|
|
42
42
|
Requires-Dist: avtomatika[redis]; extra == "all"
|
|
@@ -251,6 +251,11 @@ async def handle_normal(actions):
|
|
|
251
251
|
actions.transition_to("normal_processing")
|
|
252
252
|
```
|
|
253
253
|
|
|
254
|
+
> **Note on Limitations:** The current version of `.when()` uses a simple parser with the following limitations:
|
|
255
|
+
> * **No Nested Attributes:** You can only access direct fields of `context.initial_data` or `context.state_history` (e.g., `context.initial_data.field`). Nested objects (e.g., `context.initial_data.area.field`) are not supported.
|
|
256
|
+
> * **Simple Comparisons Only:** Only the following operators are supported: `==`, `!=`, `>`, `<`, `>=`, `<=`. Complex logical expressions with `AND`, `OR`, or `NOT` are not allowed.
|
|
257
|
+
> * **Limited Value Types:** The parser only recognizes strings (in quotes), integers, and floats. Boolean values (`True`, `False`) and `None` are not correctly parsed and will be treated as strings.
|
|
258
|
+
|
|
254
259
|
### 2. Delegating Tasks to Workers (`dispatch_task`)
|
|
255
260
|
|
|
256
261
|
This is the primary function for delegating work. The orchestrator will queue the task and wait for a worker to pick it up and return a result.
|
|
@@ -280,7 +285,7 @@ Run multiple tasks simultaneously and gather their results.
|
|
|
280
285
|
@my_blueprint.handler_for("process_files")
|
|
281
286
|
async def fan_out_handler(initial_data, actions):
|
|
282
287
|
tasks_to_dispatch = [
|
|
283
|
-
{"task_type": "file_analysis", "params": {"file": file}}
|
|
288
|
+
{"task_type": "file_analysis", "params": {"file": file}})
|
|
284
289
|
for file in initial_data.get("files", [])
|
|
285
290
|
]
|
|
286
291
|
# Use dispatch_parallel to send all tasks at once.
|
|
@@ -327,6 +332,8 @@ async def cache_handler(data_stores):
|
|
|
327
332
|
|
|
328
333
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
329
334
|
|
|
335
|
+
**Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
|
|
336
|
+
|
|
330
337
|
### Fault Tolerance
|
|
331
338
|
|
|
332
339
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
@@ -335,6 +342,13 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
335
342
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
336
343
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
337
344
|
|
|
345
|
+
### High Availability & Distributed Locking
|
|
346
|
+
|
|
347
|
+
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
348
|
+
|
|
349
|
+
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
350
|
+
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
351
|
+
|
|
338
352
|
### Storage Backend
|
|
339
353
|
|
|
340
354
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
@@ -368,7 +382,9 @@ The orchestrator uses tokens to authenticate API requests.
|
|
|
368
382
|
* **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
|
|
369
383
|
* **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
|
|
370
384
|
* `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
|
|
371
|
-
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable.
|
|
385
|
+
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
|
|
386
|
+
|
|
387
|
+
> **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
|
|
372
388
|
|
|
373
389
|
### Observability
|
|
374
390
|
|
|
@@ -401,3 +417,12 @@ To run the `avtomatika` test suite:
|
|
|
401
417
|
```bash
|
|
402
418
|
pytest avtomatika/tests/
|
|
403
419
|
```
|
|
420
|
+
|
|
421
|
+
## Detailed Documentation
|
|
422
|
+
|
|
423
|
+
For a deeper dive into the system, please refer to the following documents in the `docs/` directory:
|
|
424
|
+
|
|
425
|
+
- [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
426
|
+
- [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
|
|
427
|
+
- [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
428
|
+
- [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -1,49 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avtomatika
|
|
3
|
-
Version: 1.0b1
|
|
4
|
-
Summary: A state-machine based orchestrator for long-running jobs.
|
|
5
|
-
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
7
|
-
Classifier: Development Status :: 4 - Beta
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.11
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiohttp~=3.12
|
|
15
|
-
Requires-Dist: aiocache~=0.12
|
|
16
|
-
Requires-Dist: python-json-logger~=4.0
|
|
17
|
-
Requires-Dist: graphviz~=0.21
|
|
18
|
-
Requires-Dist: zstandard~=0.24
|
|
19
|
-
Requires-Dist: aioprometheus~=23.12
|
|
20
|
-
Provides-Extra: redis
|
|
21
|
-
Requires-Dist: redis~=6.4; extra == "redis"
|
|
22
|
-
Requires-Dist: orjson~=3.11; extra == "redis"
|
|
23
|
-
Provides-Extra: history
|
|
24
|
-
Requires-Dist: aiosqlite~=0.21; extra == "history"
|
|
25
|
-
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
|
-
Requires-Dist: orjson~=3.11; extra == "history"
|
|
27
|
-
Provides-Extra: telemetry
|
|
28
|
-
Requires-Dist: opentelemetry-api~=1.38; extra == "telemetry"
|
|
29
|
-
Requires-Dist: opentelemetry-sdk~=1.38; extra == "telemetry"
|
|
30
|
-
Requires-Dist: opentelemetry-exporter-otlp~=1.36; extra == "telemetry"
|
|
31
|
-
Requires-Dist: opentelemetry-instrumentation-aiohttp-client~=0.59b0; extra == "telemetry"
|
|
32
|
-
Provides-Extra: test
|
|
33
|
-
Requires-Dist: pytest~=8.4; extra == "test"
|
|
34
|
-
Requires-Dist: pytest-asyncio~=1.1; extra == "test"
|
|
35
|
-
Requires-Dist: fakeredis~=2.31; extra == "test"
|
|
36
|
-
Requires-Dist: pytest-aiohttp~=1.1; extra == "test"
|
|
37
|
-
Requires-Dist: pytest-mock~=3.14; extra == "test"
|
|
38
|
-
Requires-Dist: aioresponses~=0.7; extra == "test"
|
|
39
|
-
Requires-Dist: backports.zstd; extra == "test"
|
|
40
|
-
Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
|
|
41
|
-
Provides-Extra: all
|
|
42
|
-
Requires-Dist: avtomatika[redis]; extra == "all"
|
|
43
|
-
Requires-Dist: avtomatika[history]; extra == "all"
|
|
44
|
-
Requires-Dist: avtomatika[telemetry]; extra == "all"
|
|
45
|
-
Dynamic: license-file
|
|
46
|
-
|
|
47
1
|
# Avtomatika Orchestrator
|
|
48
2
|
|
|
49
3
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
@@ -251,6 +205,11 @@ async def handle_normal(actions):
|
|
|
251
205
|
actions.transition_to("normal_processing")
|
|
252
206
|
```
|
|
253
207
|
|
|
208
|
+
> **Note on Limitations:** The current version of `.when()` uses a simple parser with the following limitations:
|
|
209
|
+
> * **No Nested Attributes:** You can only access direct fields of `context.initial_data` or `context.state_history` (e.g., `context.initial_data.field`). Nested objects (e.g., `context.initial_data.area.field`) are not supported.
|
|
210
|
+
> * **Simple Comparisons Only:** Only the following operators are supported: `==`, `!=`, `>`, `<`, `>=`, `<=`. Complex logical expressions with `AND`, `OR`, or `NOT` are not allowed.
|
|
211
|
+
> * **Limited Value Types:** The parser only recognizes strings (in quotes), integers, and floats. Boolean values (`True`, `False`) and `None` are not correctly parsed and will be treated as strings.
|
|
212
|
+
|
|
254
213
|
### 2. Delegating Tasks to Workers (`dispatch_task`)
|
|
255
214
|
|
|
256
215
|
This is the primary function for delegating work. The orchestrator will queue the task and wait for a worker to pick it up and return a result.
|
|
@@ -280,7 +239,7 @@ Run multiple tasks simultaneously and gather their results.
|
|
|
280
239
|
@my_blueprint.handler_for("process_files")
|
|
281
240
|
async def fan_out_handler(initial_data, actions):
|
|
282
241
|
tasks_to_dispatch = [
|
|
283
|
-
{"task_type": "file_analysis", "params": {"file": file}}
|
|
242
|
+
{"task_type": "file_analysis", "params": {"file": file}})
|
|
284
243
|
for file in initial_data.get("files", [])
|
|
285
244
|
]
|
|
286
245
|
# Use dispatch_parallel to send all tasks at once.
|
|
@@ -327,6 +286,8 @@ async def cache_handler(data_stores):
|
|
|
327
286
|
|
|
328
287
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
329
288
|
|
|
289
|
+
**Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
|
|
290
|
+
|
|
330
291
|
### Fault Tolerance
|
|
331
292
|
|
|
332
293
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
@@ -335,6 +296,13 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
335
296
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
336
297
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
337
298
|
|
|
299
|
+
### High Availability & Distributed Locking
|
|
300
|
+
|
|
301
|
+
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
302
|
+
|
|
303
|
+
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
304
|
+
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
305
|
+
|
|
338
306
|
### Storage Backend
|
|
339
307
|
|
|
340
308
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
@@ -368,7 +336,9 @@ The orchestrator uses tokens to authenticate API requests.
|
|
|
368
336
|
* **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
|
|
369
337
|
* **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
|
|
370
338
|
* `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
|
|
371
|
-
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable.
|
|
339
|
+
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
|
|
340
|
+
|
|
341
|
+
> **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
|
|
372
342
|
|
|
373
343
|
### Observability
|
|
374
344
|
|
|
@@ -401,3 +371,12 @@ To run the `avtomatika` test suite:
|
|
|
401
371
|
```bash
|
|
402
372
|
pytest avtomatika/tests/
|
|
403
373
|
```
|
|
374
|
+
|
|
375
|
+
## Detailed Documentation
|
|
376
|
+
|
|
377
|
+
For a deeper dive into the system, please refer to the following documents in the `docs/` directory:
|
|
378
|
+
|
|
379
|
+
- [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
380
|
+
- [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
|
|
381
|
+
- [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
382
|
+
- [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
8
|
-
description = "A state-machine based orchestrator for long-running jobs."
|
|
7
|
+
version = "1.0b3"
|
|
8
|
+
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
11
11
|
classifiers = [
|
|
@@ -24,22 +24,22 @@ dependencies = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
[project.optional-dependencies]
|
|
27
|
-
redis = ["redis~=
|
|
28
|
-
history = ["aiosqlite~=0.
|
|
27
|
+
redis = ["redis~=7.1", "orjson~=3.11"]
|
|
28
|
+
history = ["aiosqlite~=0.22", "asyncpg~=0.30", "orjson~=3.11"]
|
|
29
29
|
telemetry = [
|
|
30
|
-
"opentelemetry-api~=1.
|
|
31
|
-
"opentelemetry-sdk~=1.
|
|
32
|
-
"opentelemetry-exporter-otlp~=1.
|
|
30
|
+
"opentelemetry-api~=1.39",
|
|
31
|
+
"opentelemetry-sdk~=1.39",
|
|
32
|
+
"opentelemetry-exporter-otlp~=1.39",
|
|
33
33
|
"opentelemetry-instrumentation-aiohttp-client~=0.59b0",
|
|
34
34
|
]
|
|
35
35
|
test = [
|
|
36
|
-
"pytest~=
|
|
36
|
+
"pytest~=9.0",
|
|
37
37
|
"pytest-asyncio~=1.1",
|
|
38
|
-
"fakeredis~=2.
|
|
38
|
+
"fakeredis~=2.33",
|
|
39
39
|
"pytest-aiohttp~=1.1",
|
|
40
40
|
"pytest-mock~=3.14",
|
|
41
41
|
"aioresponses~=0.7",
|
|
42
|
-
"backports.zstd",
|
|
42
|
+
"backports.zstd~=1.2",
|
|
43
43
|
"opentelemetry-instrumentation-aiohttp-client",
|
|
44
44
|
]
|
|
45
45
|
all = [
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
This module exposes the primary classes for building and running state-driven automations.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import contextlib
|
|
7
8
|
from importlib.metadata import version
|
|
8
9
|
|
|
9
10
|
__version__ = version("avtomatika")
|
|
@@ -22,9 +23,7 @@ __all__ = [
|
|
|
22
23
|
"StorageBackend",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
with contextlib.suppress(ImportError):
|
|
26
27
|
from .storage.redis import RedisStorage # noqa: F401
|
|
27
28
|
|
|
28
29
|
__all__.append("RedisStorage")
|
|
29
|
-
except ImportError:
|
|
30
|
-
pass
|
|
@@ -305,6 +305,20 @@
|
|
|
305
305
|
responses: [
|
|
306
306
|
{ code: '200 OK', description: 'Successful response.', body: "{...}" }
|
|
307
307
|
]
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
id: 'post-reload-worker-configs',
|
|
311
|
+
name: 'Reload Worker Configurations',
|
|
312
|
+
method: 'POST',
|
|
313
|
+
path: '/api/{version}/admin/reload-workers',
|
|
314
|
+
description: 'Triggers a dynamic reload of worker configurations from the TOML file. Requires client authentication.',
|
|
315
|
+
parameters: [
|
|
316
|
+
{ name: 'version', type: 'string', description: 'API Version', example: 'v1' }
|
|
317
|
+
],
|
|
318
|
+
request: { body: null },
|
|
319
|
+
responses: [
|
|
320
|
+
{ code: '200 OK', description: 'Successful response.', body: { "status": "worker_configs_reloaded" } }
|
|
321
|
+
]
|
|
308
322
|
}
|
|
309
323
|
]
|
|
310
324
|
},
|
|
@@ -168,8 +168,7 @@ class StateMachineBlueprint:
|
|
|
168
168
|
for handler in self.conditional_handlers:
|
|
169
169
|
if handler.state == state and handler.evaluate(context):
|
|
170
170
|
return handler.func
|
|
171
|
-
default_handler
|
|
172
|
-
if default_handler:
|
|
171
|
+
if default_handler := self.handlers.get(state):
|
|
173
172
|
return default_handler
|
|
174
173
|
raise ValueError(
|
|
175
174
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
@@ -178,10 +177,13 @@ class StateMachineBlueprint:
|
|
|
178
177
|
def render_graph(self, output_filename: Optional[str] = None, output_format: str = "png"):
|
|
179
178
|
import ast
|
|
180
179
|
import inspect
|
|
180
|
+
import logging
|
|
181
181
|
import textwrap
|
|
182
182
|
|
|
183
183
|
from graphviz import Digraph # type: ignore[import]
|
|
184
184
|
|
|
185
|
+
logger = logging.getLogger(__name__)
|
|
186
|
+
|
|
185
187
|
dot = Digraph(comment=f"State Machine for {self.name}")
|
|
186
188
|
dot.attr("node", shape="box", style="rounded")
|
|
187
189
|
all_handlers = list(self.handlers.items()) + [(ch.state, ch.func) for ch in self.conditional_handlers]
|
|
@@ -222,13 +224,16 @@ class StateMachineBlueprint:
|
|
|
222
224
|
value,
|
|
223
225
|
label=f"on {key}",
|
|
224
226
|
)
|
|
225
|
-
except (TypeError, OSError):
|
|
226
|
-
|
|
227
|
+
except (TypeError, OSError) as e:
|
|
228
|
+
logger.warning(
|
|
229
|
+
f"Could not parse handler '{handler_func.__name__}' for state '{handler_state}'. "
|
|
230
|
+
f"Graph may be incomplete. Error: {e}"
|
|
231
|
+
)
|
|
227
232
|
for state in states:
|
|
228
233
|
dot.node(state, state)
|
|
229
234
|
|
|
230
|
-
if output_filename:
|
|
231
|
-
dot.render(output_filename, format=output_format, cleanup=True)
|
|
232
|
-
print(f"Graph rendered to {output_filename}.{output_format}")
|
|
233
|
-
else:
|
|
235
|
+
if not output_filename:
|
|
234
236
|
return dot.source
|
|
237
|
+
dot.render(output_filename, format=output_format, cleanup=True)
|
|
238
|
+
print(f"Graph rendered to {output_filename}.{output_format}")
|
|
239
|
+
return None
|
|
@@ -26,25 +26,37 @@ async def load_client_configs_to_redis(
|
|
|
26
26
|
config_path,
|
|
27
27
|
)
|
|
28
28
|
return
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logger.error(f"Failed to parse client config file '{config_path}': {e}")
|
|
31
|
+
raise ValueError(f"Invalid client configuration file: {e}") from e
|
|
29
32
|
|
|
30
33
|
loaded_count = 0
|
|
31
34
|
for client_name, config in clients_data.items():
|
|
35
|
+
if not isinstance(config, dict):
|
|
36
|
+
logger.error(f"Client '{client_name}' configuration must be a table (dict).")
|
|
37
|
+
raise ValueError(f"Invalid configuration for client '{client_name}'")
|
|
38
|
+
|
|
32
39
|
token = config.get("token")
|
|
33
40
|
if not token:
|
|
34
|
-
logger.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
logger.error(f"Client '{client_name}' is missing required 'token' field.")
|
|
42
|
+
raise ValueError(f"Missing token for client '{client_name}'")
|
|
43
|
+
|
|
44
|
+
if not isinstance(token, str):
|
|
45
|
+
logger.error(f"Token for client '{client_name}' must be a string.")
|
|
46
|
+
raise ValueError(f"Invalid token type for client '{client_name}'")
|
|
39
47
|
|
|
40
48
|
# Separate static config from dynamic quota values
|
|
41
49
|
static_config = {k: v for k, v in config.items() if k != "monthly_attempts"}
|
|
42
50
|
quota = config.get("monthly_attempts")
|
|
43
51
|
|
|
52
|
+
if quota is not None and not isinstance(quota, int):
|
|
53
|
+
logger.error(f"Quota 'monthly_attempts' for client '{client_name}' must be an integer.")
|
|
54
|
+
raise ValueError(f"Invalid quota type for client '{client_name}'")
|
|
55
|
+
|
|
44
56
|
try:
|
|
45
57
|
# Assume these storage methods will be implemented
|
|
46
58
|
await storage.save_client_config(token, static_config)
|
|
47
|
-
if quota is not None
|
|
59
|
+
if quota is not None:
|
|
48
60
|
await storage.initialize_client_quota(token, quota)
|
|
49
61
|
|
|
50
62
|
loaded_count += 1
|
|
@@ -28,15 +28,13 @@ class Dispatcher:
|
|
|
28
28
|
self.config = config
|
|
29
29
|
self._round_robin_indices: Dict[str, int] = defaultdict(int)
|
|
30
30
|
|
|
31
|
+
@staticmethod
|
|
31
32
|
def _is_worker_compliant(
|
|
32
|
-
self,
|
|
33
33
|
worker: Dict[str, Any],
|
|
34
34
|
requirements: Dict[str, Any],
|
|
35
35
|
) -> bool:
|
|
36
36
|
"""Checks if a worker meets the specified resource requirements."""
|
|
37
|
-
|
|
38
|
-
required_gpu = requirements.get("gpu_info")
|
|
39
|
-
if required_gpu:
|
|
37
|
+
if required_gpu := requirements.get("gpu_info"):
|
|
40
38
|
gpu_info = worker.get("resources", {}).get("gpu_info")
|
|
41
39
|
if not gpu_info:
|
|
42
40
|
return False
|
|
@@ -51,17 +49,15 @@ class Dispatcher:
|
|
|
51
49
|
):
|
|
52
50
|
return False
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
required_models = requirements.get("installed_models")
|
|
56
|
-
if required_models:
|
|
52
|
+
if required_models := requirements.get("installed_models"):
|
|
57
53
|
installed_models = {m["name"] for m in worker.get("installed_models", [])}
|
|
58
54
|
if not set(required_models).issubset(installed_models):
|
|
59
55
|
return False
|
|
60
56
|
|
|
61
57
|
return True
|
|
62
58
|
|
|
59
|
+
@staticmethod
|
|
63
60
|
def _select_default(
|
|
64
|
-
self,
|
|
65
61
|
workers: List[Dict[str, Any]],
|
|
66
62
|
task_type: str,
|
|
67
63
|
) -> Dict[str, Any]:
|
|
@@ -74,7 +70,7 @@ class Dispatcher:
|
|
|
74
70
|
"""
|
|
75
71
|
warm_workers = [w for w in workers if task_type in w.get("hot_cache", [])]
|
|
76
72
|
|
|
77
|
-
target_pool = warm_workers
|
|
73
|
+
target_pool = warm_workers or workers
|
|
78
74
|
|
|
79
75
|
# The `cost` field is deprecated but maintained for backward compatibility.
|
|
80
76
|
min_cost = min(w.get("cost", float("inf")) for w in target_pool)
|
|
@@ -95,8 +91,8 @@ class Dispatcher:
|
|
|
95
91
|
self._round_robin_indices[task_type] = idx + 1
|
|
96
92
|
return selected_worker
|
|
97
93
|
|
|
94
|
+
@staticmethod
|
|
98
95
|
def _select_least_connections(
|
|
99
|
-
self,
|
|
100
96
|
workers: List[Dict[str, Any]],
|
|
101
97
|
task_type: str,
|
|
102
98
|
) -> Dict[str, Any]:
|
|
@@ -105,15 +101,16 @@ class Dispatcher:
|
|
|
105
101
|
"""
|
|
106
102
|
return min(workers, key=lambda w: w.get("load", 0.0))
|
|
107
103
|
|
|
104
|
+
@staticmethod
|
|
108
105
|
def _select_cheapest(
|
|
109
|
-
self,
|
|
110
106
|
workers: List[Dict[str, Any]],
|
|
111
107
|
task_type: str,
|
|
112
108
|
) -> Dict[str, Any]:
|
|
113
109
|
"""Selects the cheapest worker based on 'cost_per_second'."""
|
|
114
110
|
return min(workers, key=lambda w: w.get("cost_per_second", float("inf")))
|
|
115
111
|
|
|
116
|
-
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _get_best_value_score(worker: Dict[str, Any]) -> float:
|
|
117
114
|
"""Calculates a "score" for a worker using the formula cost / reputation.
|
|
118
115
|
The lower the score, the better.
|
|
119
116
|
"""
|
|
@@ -121,9 +118,7 @@ class Dispatcher:
|
|
|
121
118
|
# Default reputation is 1.0 if absent
|
|
122
119
|
reputation = worker.get("reputation", 1.0)
|
|
123
120
|
# Avoid division by zero
|
|
124
|
-
if reputation == 0
|
|
125
|
-
return float("inf")
|
|
126
|
-
return cost / reputation
|
|
121
|
+
return float("inf") if reputation == 0 else cost / reputation
|
|
127
122
|
|
|
128
123
|
def _select_best_value(
|
|
129
124
|
self,
|
|
@@ -153,10 +148,9 @@ class Dispatcher:
|
|
|
153
148
|
idle_workers = [w for w in all_workers if w.get("status", "idle") == "idle"]
|
|
154
149
|
logger.debug(f"Idle workers: {[w['worker_id'] for w in idle_workers]}")
|
|
155
150
|
if not idle_workers:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if busy_mo_workers:
|
|
151
|
+
if busy_mo_workers := [
|
|
152
|
+
w for w in all_workers if w.get("status") == "busy" and "multi_orchestrator_info" in w
|
|
153
|
+
]:
|
|
160
154
|
logger.warning(
|
|
161
155
|
f"No idle workers. Found {len(busy_mo_workers)} busy workers "
|
|
162
156
|
f"in multi-orchestrator mode. They are likely performing tasks for other Orchestrators.",
|
|
@@ -485,8 +485,7 @@ class OrchestratorEngine:
|
|
|
485
485
|
await self.storage.save_job_state(job_id, job_state)
|
|
486
486
|
# Optionally, trigger a specific 'cancelled' transition if defined in the blueprint
|
|
487
487
|
transitions = job_state.get("current_task_transitions", {})
|
|
488
|
-
next_state
|
|
489
|
-
if next_state:
|
|
488
|
+
if next_state := transitions.get("cancelled"):
|
|
490
489
|
job_state["current_state"] = next_state
|
|
491
490
|
job_state["status"] = "running" # It's running the cancellation handler now
|
|
492
491
|
await self.storage.save_job_state(job_id, job_state)
|
|
@@ -494,9 +493,7 @@ class OrchestratorEngine:
|
|
|
494
493
|
return web.json_response({"status": "result_accepted_cancelled"}, status=200)
|
|
495
494
|
|
|
496
495
|
transitions = job_state.get("current_task_transitions", {})
|
|
497
|
-
next_state
|
|
498
|
-
|
|
499
|
-
if next_state:
|
|
496
|
+
if next_state := transitions.get(result_status):
|
|
500
497
|
logging.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
|
|
501
498
|
|
|
502
499
|
worker_data = result.get("data")
|
|
@@ -584,13 +581,26 @@ class OrchestratorEngine:
|
|
|
584
581
|
jobs = await self.storage.get_quarantined_jobs()
|
|
585
582
|
return web.json_response(jobs)
|
|
586
583
|
|
|
584
|
+
async def _reload_worker_configs_handler(self, request: web.Request) -> web.Response:
|
|
585
|
+
"""Handles the dynamic reloading of worker configurations."""
|
|
586
|
+
logger.info("Received request to reload worker configurations.")
|
|
587
|
+
if not self.config.WORKERS_CONFIG_PATH:
|
|
588
|
+
return web.json_response(
|
|
589
|
+
{"error": "WORKERS_CONFIG_PATH is not set, cannot reload configs."},
|
|
590
|
+
status=400,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
await load_worker_configs_to_redis(self.storage, self.config.WORKERS_CONFIG_PATH)
|
|
594
|
+
return web.json_response({"status": "worker_configs_reloaded"})
|
|
595
|
+
|
|
587
596
|
async def _flush_db_handler(self, request: web.Request) -> web.Response:
|
|
588
597
|
logger.warning("Received request to flush the database.")
|
|
589
598
|
await self.storage.flush_all()
|
|
590
599
|
await load_client_configs_to_redis(self.storage)
|
|
591
600
|
return web.json_response({"status": "db_flushed"}, status=200)
|
|
592
601
|
|
|
593
|
-
|
|
602
|
+
@staticmethod
|
|
603
|
+
async def _docs_handler(request: web.Request) -> web.Response:
|
|
594
604
|
from importlib import resources
|
|
595
605
|
|
|
596
606
|
try:
|
|
@@ -635,15 +645,7 @@ class OrchestratorEngine:
|
|
|
635
645
|
all_protected_apps.append(protected_app)
|
|
636
646
|
|
|
637
647
|
for app in all_protected_apps:
|
|
638
|
-
|
|
639
|
-
app.router.add_post("/jobs/{job_id}/cancel", self._cancel_job_handler)
|
|
640
|
-
if not isinstance(self.history_storage, NoOpHistoryStorage):
|
|
641
|
-
app.router.add_get("/jobs/{job_id}/history", self._get_job_history_handler)
|
|
642
|
-
app.router.add_get("/blueprints/{blueprint_name}/graph", self._get_blueprint_graph_handler)
|
|
643
|
-
app.router.add_get("/workers", self._get_workers_handler)
|
|
644
|
-
app.router.add_get("/jobs", self._get_jobs_handler)
|
|
645
|
-
app.router.add_get("/dashboard", self._get_dashboard_handler)
|
|
646
|
-
|
|
648
|
+
self._register_common_routes(app)
|
|
647
649
|
if has_unversioned_routes:
|
|
648
650
|
self.app.add_subapp("/api/", protected_app)
|
|
649
651
|
for version, app in versioned_apps.items():
|
|
@@ -663,6 +665,17 @@ class OrchestratorEngine:
|
|
|
663
665
|
worker_app.router.add_get("/ws/{worker_id}", self._websocket_handler)
|
|
664
666
|
self.app.add_subapp("/_worker/", worker_app)
|
|
665
667
|
|
|
668
|
+
def _register_common_routes(self, app):
|
|
669
|
+
app.router.add_get("/jobs/{job_id}", self._get_job_status_handler)
|
|
670
|
+
app.router.add_post("/jobs/{job_id}/cancel", self._cancel_job_handler)
|
|
671
|
+
if not isinstance(self.history_storage, NoOpHistoryStorage):
|
|
672
|
+
app.router.add_get("/jobs/{job_id}/history", self._get_job_history_handler)
|
|
673
|
+
app.router.add_get("/blueprints/{blueprint_name}/graph", self._get_blueprint_graph_handler)
|
|
674
|
+
app.router.add_get("/workers", self._get_workers_handler)
|
|
675
|
+
app.router.add_get("/jobs", self._get_jobs_handler)
|
|
676
|
+
app.router.add_get("/dashboard", self._get_dashboard_handler)
|
|
677
|
+
app.router.add_post("/admin/reload-workers", self._reload_worker_configs_handler)
|
|
678
|
+
|
|
666
679
|
async def _websocket_handler(self, request: web.Request) -> web.WebSocketResponse:
|
|
667
680
|
worker_id = request.match_info.get("worker_id")
|
|
668
681
|
if not worker_id:
|
|
@@ -35,11 +35,13 @@ except ImportError:
|
|
|
35
35
|
def inject(self, *args, **kwargs):
|
|
36
36
|
pass
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
@staticmethod
|
|
39
|
+
def extract(*args, **kwargs):
|
|
39
40
|
return None
|
|
40
41
|
|
|
41
42
|
class NoOpTraceContextTextMapPropagator:
|
|
42
|
-
|
|
43
|
+
@staticmethod
|
|
44
|
+
def extract(*args, **kwargs):
|
|
43
45
|
return None
|
|
44
46
|
|
|
45
47
|
trace = NoOpTracer()
|
|
@@ -485,7 +487,8 @@ class JobExecutor:
|
|
|
485
487
|
await self.storage.save_job_state(parent_job_id, parent_job_state)
|
|
486
488
|
await self.storage.enqueue_job(parent_job_id)
|
|
487
489
|
|
|
488
|
-
|
|
490
|
+
@staticmethod
|
|
491
|
+
def _handle_task_completion(task: Task):
|
|
489
492
|
"""Callback to handle completion of a job processing task."""
|
|
490
493
|
try:
|
|
491
494
|
# This will re-raise any exception caught in the task
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from contextlib import suppress
|
|
1
2
|
from typing import Awaitable, Callable
|
|
2
3
|
|
|
3
4
|
from aiohttp import web
|
|
@@ -23,23 +24,15 @@ def rate_limit_middleware_factory(
|
|
|
23
24
|
"""Rate-limiting middleware that uses the provided storage backend."""
|
|
24
25
|
# Determine the key for rate limiting (e.g., by worker_id or IP)
|
|
25
26
|
# For worker endpoints, we key by worker_id. For others, by IP.
|
|
26
|
-
key_identifier = request.match_info.get("worker_id", request.remote)
|
|
27
|
-
if not key_identifier:
|
|
28
|
-
# Fallback for cases where remote IP might not be available
|
|
29
|
-
key_identifier = "unknown"
|
|
27
|
+
key_identifier = request.match_info.get("worker_id", request.remote) or "unknown"
|
|
30
28
|
|
|
31
29
|
# Key by identifier and path to have per-endpoint limits
|
|
32
30
|
rate_limit_key = f"ratelimit:{key_identifier}:{request.path}"
|
|
33
31
|
|
|
34
|
-
|
|
32
|
+
with suppress(Exception):
|
|
35
33
|
count = await storage.increment_key_with_ttl(rate_limit_key, period)
|
|
36
34
|
if count > limit:
|
|
37
35
|
return web.json_response({"error": "Too Many Requests"}, status=429)
|
|
38
|
-
except Exception:
|
|
39
|
-
# If the rate limiter fails for any reason (e.g., Redis down),
|
|
40
|
-
# it's safer to let the request through than to block everything.
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
36
|
return await handler(request)
|
|
44
37
|
|
|
45
38
|
return rate_limit_middleware
|