agentexec 0.1.7__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. agentexec-0.2.0/.github/workflows/ci.yml +125 -0
  2. {agentexec-0.1.7 → agentexec-0.2.0}/CHANGELOG.md +92 -0
  3. {agentexec-0.1.7 → agentexec-0.2.0}/PKG-INFO +101 -54
  4. {agentexec-0.1.7 → agentexec-0.2.0}/README.md +97 -52
  5. agentexec-0.2.0/RELEASE.md +91 -0
  6. agentexec-0.2.0/docker-compose.kafka.yml +48 -0
  7. agentexec-0.2.0/docs/api-reference/activity.md +401 -0
  8. agentexec-0.2.0/docs/api-reference/core.md +532 -0
  9. {agentexec-0.1.7 → agentexec-0.2.0}/docs/api-reference/pipeline.md +8 -8
  10. {agentexec-0.1.7 → agentexec-0.2.0}/docs/api-reference/runner.md +5 -5
  11. agentexec-0.2.0/docs/concepts/activity-tracking.md +431 -0
  12. {agentexec-0.1.7 → agentexec-0.2.0}/docs/concepts/architecture.md +63 -44
  13. {agentexec-0.1.7 → agentexec-0.2.0}/docs/concepts/task-lifecycle.md +33 -27
  14. agentexec-0.2.0/docs/concepts/worker-pool.md +363 -0
  15. {agentexec-0.1.7 → agentexec-0.2.0}/docs/contributing.md +19 -9
  16. {agentexec-0.1.7 → agentexec-0.2.0}/docs/deployment/docker.md +23 -18
  17. {agentexec-0.1.7 → agentexec-0.2.0}/docs/deployment/production.md +49 -62
  18. {agentexec-0.1.7 → agentexec-0.2.0}/docs/getting-started/configuration.md +30 -22
  19. {agentexec-0.1.7 → agentexec-0.2.0}/docs/getting-started/installation.md +2 -2
  20. agentexec-0.2.0/docs/getting-started/quickstart.md +253 -0
  21. {agentexec-0.1.7 → agentexec-0.2.0}/docs/guides/basic-usage.md +112 -114
  22. {agentexec-0.1.7 → agentexec-0.2.0}/docs/guides/custom-runners.md +34 -34
  23. agentexec-0.2.0/docs/guides/fastapi-integration.md +296 -0
  24. {agentexec-0.1.7 → agentexec-0.2.0}/docs/guides/openai-runner.md +5 -5
  25. {agentexec-0.1.7 → agentexec-0.2.0}/docs/guides/pipelines.md +6 -6
  26. {agentexec-0.1.7 → agentexec-0.2.0}/docs/index.md +23 -10
  27. {agentexec-0.1.7 → agentexec-0.2.0}/pyproject.toml +22 -5
  28. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/__init__.py +4 -2
  29. agentexec-0.2.0/src/agentexec/activity/__init__.py +122 -0
  30. agentexec-0.2.0/src/agentexec/activity/events.py +30 -0
  31. agentexec-0.2.0/src/agentexec/activity/handlers.py +94 -0
  32. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/activity/models.py +107 -130
  33. agentexec-0.2.0/src/agentexec/activity/producer.py +170 -0
  34. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/activity/schemas.py +9 -5
  35. agentexec-0.2.0/src/agentexec/activity/status.py +11 -0
  36. agentexec-0.2.0/src/agentexec/cli.py +162 -0
  37. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/config.py +59 -10
  38. agentexec-0.2.0/src/agentexec/core/db.py +65 -0
  39. agentexec-0.2.0/src/agentexec/core/queue.py +65 -0
  40. agentexec-0.2.0/src/agentexec/core/results.py +41 -0
  41. agentexec-0.2.0/src/agentexec/core/task.py +213 -0
  42. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/pipeline.py +1 -1
  43. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/runners/base.py +2 -2
  44. agentexec-0.2.0/src/agentexec/schedule.py +102 -0
  45. agentexec-0.2.0/src/agentexec/state/__init__.py +38 -0
  46. agentexec-0.2.0/src/agentexec/state/base.py +106 -0
  47. agentexec-0.2.0/src/agentexec/state/kafka.py +304 -0
  48. agentexec-0.2.0/src/agentexec/state/redis.py +272 -0
  49. agentexec-0.2.0/src/agentexec/tracker.py +48 -0
  50. agentexec-0.2.0/src/agentexec/worker/event.py +31 -0
  51. agentexec-0.2.0/src/agentexec/worker/pool.py +580 -0
  52. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_activity_schemas.py +0 -2
  53. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_activity_tracking.py +136 -151
  54. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_config.py +7 -9
  55. agentexec-0.2.0/tests/test_db.py +39 -0
  56. agentexec-0.2.0/tests/test_kafka_integration.py +158 -0
  57. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_pipeline.py +0 -2
  58. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_pipeline_flow.py +0 -49
  59. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_public_api.py +2 -4
  60. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_queue.py +20 -60
  61. agentexec-0.2.0/tests/test_queue_partitions.py +174 -0
  62. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_results.py +29 -52
  63. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_runners.py +13 -16
  64. agentexec-0.2.0/tests/test_schedule.py +390 -0
  65. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_self_describing_results.py +17 -42
  66. agentexec-0.2.0/tests/test_state.py +60 -0
  67. agentexec-0.2.0/tests/test_state_backend.py +123 -0
  68. agentexec-0.2.0/tests/test_task.py +378 -0
  69. agentexec-0.2.0/tests/test_task_locking.py +153 -0
  70. agentexec-0.2.0/tests/test_worker_event.py +71 -0
  71. agentexec-0.2.0/tests/test_worker_logging.py +439 -0
  72. agentexec-0.2.0/tests/test_worker_pool.py +487 -0
  73. agentexec-0.2.0/tests/test_worker_resilience.py +198 -0
  74. agentexec-0.1.7/.claude/settings.local.json +0 -7
  75. agentexec-0.1.7/.claude/skills/prepare-release/SKILL.md +0 -57
  76. agentexec-0.1.7/docs/api-reference/activity.md +0 -455
  77. agentexec-0.1.7/docs/api-reference/core.md +0 -476
  78. agentexec-0.1.7/docs/concepts/activity-tracking.md +0 -520
  79. agentexec-0.1.7/docs/concepts/worker-pool.md +0 -499
  80. agentexec-0.1.7/docs/getting-started/quickstart.md +0 -262
  81. agentexec-0.1.7/docs/guides/fastapi-integration.md +0 -301
  82. agentexec-0.1.7/examples/multi-tenancy/README.md +0 -92
  83. agentexec-0.1.7/examples/multi-tenancy/example.py +0 -188
  84. agentexec-0.1.7/examples/openai-agents-fastapi/README.md +0 -147
  85. agentexec-0.1.7/examples/openai-agents-fastapi/alembic/README +0 -1
  86. agentexec-0.1.7/examples/openai-agents-fastapi/alembic/env.py +0 -82
  87. agentexec-0.1.7/examples/openai-agents-fastapi/alembic/script.py.mako +0 -28
  88. agentexec-0.1.7/examples/openai-agents-fastapi/alembic.ini +0 -148
  89. agentexec-0.1.7/examples/openai-agents-fastapi/compose.yml +0 -10
  90. agentexec-0.1.7/examples/openai-agents-fastapi/context.py +0 -21
  91. agentexec-0.1.7/examples/openai-agents-fastapi/db.py +0 -27
  92. agentexec-0.1.7/examples/openai-agents-fastapi/main.py +0 -45
  93. agentexec-0.1.7/examples/openai-agents-fastapi/models.py +0 -9
  94. agentexec-0.1.7/examples/openai-agents-fastapi/pipeline.py +0 -246
  95. agentexec-0.1.7/examples/openai-agents-fastapi/pyproject.toml +0 -35
  96. agentexec-0.1.7/examples/openai-agents-fastapi/tools.py +0 -46
  97. agentexec-0.1.7/examples/openai-agents-fastapi/ui/bun.lock +0 -422
  98. agentexec-0.1.7/examples/openai-agents-fastapi/ui/index.html +0 -13
  99. agentexec-0.1.7/examples/openai-agents-fastapi/ui/package.json +0 -27
  100. agentexec-0.1.7/examples/openai-agents-fastapi/ui/public/vite.svg +0 -4
  101. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/App.tsx +0 -35
  102. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/api/agents.ts +0 -37
  103. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/api/queries.ts +0 -51
  104. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/components/Layout.tsx +0 -38
  105. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/index.css +0 -263
  106. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/main.tsx +0 -10
  107. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/pages/AgentDetailPage.tsx +0 -45
  108. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/pages/AgentListPage.tsx +0 -68
  109. agentexec-0.1.7/examples/openai-agents-fastapi/ui/src/styles/github-dark.css +0 -617
  110. agentexec-0.1.7/examples/openai-agents-fastapi/ui/tsconfig.json +0 -21
  111. agentexec-0.1.7/examples/openai-agents-fastapi/ui/tsconfig.node.json +0 -11
  112. agentexec-0.1.7/examples/openai-agents-fastapi/ui/vite.config.ts +0 -19
  113. agentexec-0.1.7/examples/openai-agents-fastapi/views.py +0 -118
  114. agentexec-0.1.7/examples/openai-agents-fastapi/worker.py +0 -86
  115. agentexec-0.1.7/src/agentexec/activity/__init__.py +0 -39
  116. agentexec-0.1.7/src/agentexec/activity/tracker.py +0 -286
  117. agentexec-0.1.7/src/agentexec/core/db.py +0 -62
  118. agentexec-0.1.7/src/agentexec/core/logging.py +0 -33
  119. agentexec-0.1.7/src/agentexec/core/queue.py +0 -132
  120. agentexec-0.1.7/src/agentexec/core/results.py +0 -64
  121. agentexec-0.1.7/src/agentexec/core/task.py +0 -336
  122. agentexec-0.1.7/src/agentexec/schedule.py +0 -144
  123. agentexec-0.1.7/src/agentexec/state/__init__.py +0 -266
  124. agentexec-0.1.7/src/agentexec/state/backend.py +0 -363
  125. agentexec-0.1.7/src/agentexec/state/redis_backend.py +0 -491
  126. agentexec-0.1.7/src/agentexec/tracker.py +0 -67
  127. agentexec-0.1.7/src/agentexec/worker/event.py +0 -48
  128. agentexec-0.1.7/src/agentexec/worker/logging.py +0 -104
  129. agentexec-0.1.7/src/agentexec/worker/pool.py +0 -488
  130. agentexec-0.1.7/tests/test_activity_tracking.py.bak +0 -427
  131. agentexec-0.1.7/tests/test_db.py +0 -134
  132. agentexec-0.1.7/tests/test_schedule.py +0 -447
  133. agentexec-0.1.7/tests/test_state.py +0 -185
  134. agentexec-0.1.7/tests/test_state_backend.py +0 -292
  135. agentexec-0.1.7/tests/test_task.py +0 -316
  136. agentexec-0.1.7/tests/test_task_locking.py +0 -260
  137. agentexec-0.1.7/tests/test_worker_event.py +0 -133
  138. agentexec-0.1.7/tests/test_worker_logging.py +0 -280
  139. agentexec-0.1.7/tests/test_worker_pool.py +0 -284
  140. agentexec-0.1.7/ui/.gitignore +0 -3
  141. {agentexec-0.1.7 → agentexec-0.2.0}/.github/workflows/docker-publish.yml +0 -0
  142. {agentexec-0.1.7 → agentexec-0.2.0}/.github/workflows/npm-publish.yml +0 -0
  143. {agentexec-0.1.7 → agentexec-0.2.0}/.github/workflows/publish.yml +0 -0
  144. {agentexec-0.1.7 → agentexec-0.2.0}/.gitignore +0 -0
  145. {agentexec-0.1.7 → agentexec-0.2.0}/docker/Dockerfile +0 -0
  146. {agentexec-0.1.7 → agentexec-0.2.0}/docker/README.md +0 -0
  147. {agentexec-0.1.7 → agentexec-0.2.0}/docker/entrypoint.py +0 -0
  148. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/core/__init__.py +0 -0
  149. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/runners/__init__.py +0 -0
  150. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/runners/openai.py +0 -0
  151. {agentexec-0.1.7 → agentexec-0.2.0}/src/agentexec/worker/__init__.py +0 -0
  152. {agentexec-0.1.7 → agentexec-0.2.0}/tests/test_task_types.py +0 -0
  153. {agentexec-0.1.7/examples/openai-agents-fastapi → agentexec-0.2.0}/ui/.gitignore +0 -0
  154. {agentexec-0.1.7 → agentexec-0.2.0}/ui/README.md +0 -0
  155. {agentexec-0.1.7 → agentexec-0.2.0}/ui/bun.lock +0 -0
  156. {agentexec-0.1.7 → agentexec-0.2.0}/ui/package.json +0 -0
  157. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/ActiveAgentsBadge.tsx +0 -0
  158. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/ProgressBar.tsx +0 -0
  159. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/StatusBadge.tsx +0 -0
  160. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/TaskDetail.tsx +0 -0
  161. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/TaskList.tsx +0 -0
  162. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/components/index.ts +0 -0
  163. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/index.ts +0 -0
  164. {agentexec-0.1.7 → agentexec-0.2.0}/ui/src/types.ts +0 -0
  165. {agentexec-0.1.7 → agentexec-0.2.0}/ui/tsconfig.json +0 -0
  166. {agentexec-0.1.7 → agentexec-0.2.0}/ui/vite.config.ts +0 -0
@@ -0,0 +1,125 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ # -----------------------------------------------------------------------
11
+ # Unit tests — no external services (fakeredis + SQLite)
12
+ # -----------------------------------------------------------------------
13
+
14
+ test:
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ python-version: ["3.12", "3.13"]
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v6
26
+ with:
27
+ enable-cache: true
28
+
29
+ - name: Set up Python ${{ matrix.python-version }}
30
+ run: uv python install ${{ matrix.python-version }}
31
+
32
+ - name: Install dependencies
33
+ run: uv sync --dev
34
+
35
+ - name: Run unit tests
36
+ run: |
37
+ uv run pytest tests/ \
38
+ --ignore=tests/test_kafka_integration.py \
39
+ -o "addopts=" \
40
+ -v --tb=long
41
+
42
+ # -----------------------------------------------------------------------
43
+ # Kafka integration tests — real broker via docker run
44
+ # -----------------------------------------------------------------------
45
+ test-kafka:
46
+ runs-on: ubuntu-latest
47
+
48
+ steps:
49
+ - uses: actions/checkout@v4
50
+
51
+ - name: Start Kafka broker
52
+ run: |
53
+ docker run -d --name kafka \
54
+ -p 9092:9092 \
55
+ -e KAFKA_NODE_ID=1 \
56
+ -e KAFKA_PROCESS_ROLES=broker,controller \
57
+ -e KAFKA_CONTROLLER_QUORUM_VOTERS=1@localhost:9093 \
58
+ -e KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER \
59
+ -e KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 \
60
+ -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 \
61
+ -e KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT \
62
+ -e KAFKA_INTER_BROKER_LISTENER_NAME=PLAINTEXT \
63
+ -e KAFKA_LOG_CLEANER_MIN_COMPACTION_LAG_MS=0 \
64
+ -e KAFKA_LOG_CLEANER_MIN_CLEANABLE_RATIO=0.01 \
65
+ -e KAFKA_LOG_RETENTION_MS=60000 \
66
+ -e KAFKA_NUM_PARTITIONS=1 \
67
+ -e KAFKA_AUTO_CREATE_TOPICS_ENABLE=true \
68
+ -e KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 \
69
+ -e KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 \
70
+ -e CLUSTER_ID=ciTestCluster0001 \
71
+ apache/kafka:3.9.0
72
+
73
+ - name: Install uv
74
+ uses: astral-sh/setup-uv@v6
75
+ with:
76
+ enable-cache: true
77
+
78
+ - name: Set up Python
79
+ run: uv python install 3.12
80
+
81
+ - name: Install dependencies
82
+ run: uv sync --dev --extra kafka
83
+
84
+ - name: Wait for Kafka to be ready
85
+ run: |
86
+ echo "Waiting for Kafka..."
87
+ for i in $(seq 1 30); do
88
+ if nc -z localhost 9092 2>/dev/null; then
89
+ echo "Kafka port is open"
90
+ sleep 5
91
+ echo "Kafka is ready"
92
+ exit 0
93
+ fi
94
+ echo " attempt $i/30..."
95
+ sleep 2
96
+ done
97
+ echo "Kafka failed to start"
98
+ docker logs kafka
99
+ exit 1
100
+
101
+ - name: Run Kafka integration tests
102
+ timeout-minutes: 2
103
+ run: |
104
+ uv run pytest tests/test_kafka_integration.py \
105
+ -o "addopts=" \
106
+ -v --tb=long 2>&1 | tee /tmp/kafka_test_output.txt
107
+ exit ${PIPESTATUS[0]}
108
+ env:
109
+ AGENTEXEC_STATE_BACKEND: agentexec.state.kafka
110
+ KAFKA_BOOTSTRAP_SERVERS: localhost:9092
111
+ AGENTEXEC_KAFKA_DEFAULT_PARTITIONS: "2"
112
+ AGENTEXEC_KAFKA_REPLICATION_FACTOR: "1"
113
+
114
+ - name: Show Kafka logs on failure
115
+ if: failure()
116
+ run: docker logs kafka 2>&1 | tail -50
117
+
118
+ - name: Create failure check annotation with output
119
+ if: failure()
120
+ run: |
121
+ if [ -f /tmp/kafka_test_output.txt ]; then
122
+ grep -E '\[queue_|FAILED|ERROR|AssertionError|TIMEOUT|short test summary' /tmp/kafka_test_output.txt | tail -9 | while IFS= read -r line; do
123
+ echo "::warning::$line"
124
+ done
125
+ fi
@@ -1,5 +1,97 @@
1
1
  # Changelog
2
2
 
3
+ ## v0.2.0
4
+
5
+ Major refactor of the backend, queue, activity, worker, and database layers.
6
+ If you're upgrading from 0.1.x, read the **Breaking Changes** section closely.
7
+
8
+ ### Breaking Changes
9
+
10
+ **Fully async database layer**
11
+ - `configure_engine()` and `get_session()` now require an async SQLAlchemy engine (`AsyncEngine`) and return `AsyncSession`
12
+ - Database URLs must use async drivers (e.g. `sqlite+aiosqlite://`, `postgresql+asyncpg://`)
13
+ - `sqlalchemy[asyncio]` is now a core dependency
14
+
15
+ **Async activity API**
16
+ - All activity functions are async: `await ax.activity.create(...)`, `await ax.activity.update(...)`, `await ax.activity.complete(...)`, `await ax.activity.error(...)`
17
+ - `activity.list()`, `activity.detail()`, and `activity.count_active()` are async and accept `AsyncSession`
18
+ - Activity handlers are async (`async def __call__`)
19
+ - The `session` parameter was removed from activity mutations — the handler owns its own session lifecycle
20
+
21
+ **Pool entry point**
22
+ - `pool.run()` was removed. Use `await pool.start()` in an asyncio loop, or the new `agentexec run mymodule:pool` CLI
23
+ - `AGENTEXEC_QUEUE_NAME` renamed to `AGENTEXEC_QUEUE_PREFIX` (old name still accepted as alias)
24
+ - `agentexec.state.redis_backend` renamed to `agentexec.state.redis` — update `AGENTEXEC_STATE_BACKEND` if set explicitly
25
+
26
+ **Task context serialization**
27
+ - `Task.context` is now `Mapping[str, Any]` (raw dict), not a typed BaseModel — hydration happens at execution time
28
+ - `Task.create()` is now async
29
+
30
+ **Queue backend protocol**
31
+ - `BaseQueueBackend.push()` signature changed from `high_priority: bool` to `priority: Priority | None` — affects Redis, Kafka, and any custom queue backend
32
+
33
+ **Removed APIs**
34
+ - `set_global_session`/`get_global_session`/`remove_global_session` — use `configure_engine`/`get_session`
35
+ - `state.backend.publish`/`subscribe` (pubsub), `index_add`/`index_range`/`index_remove`, `clear`, `configure`
36
+ - `worker/logging.py` and `core/logging.py` — all modules use stdlib `logging.getLogger(__name__)` directly
37
+
38
+ ### New Features
39
+
40
+ **CLI entrypoint**
41
+ - New `agentexec` CLI command: `agentexec run mymodule:pool --create-tables --workers 4`
42
+
43
+ **Partitioned Redis queues**
44
+ - Tasks with `lock_key` route to dedicated partition queues with per-partition locking and SCAN-based fair dequeue
45
+
46
+ **Activity handler pattern**
47
+ - Pluggable persistence via `PostgresHandler` (default) and `IPCHandler` (worker processes)
48
+
49
+ **Task retry**
50
+ - Failed tasks requeue as high priority with `AGENTEXEC_MAX_TASK_RETRIES` (default 3)
51
+
52
+ **Kafka backend (experimental)**
53
+ - `pip install agentexec[kafka]` for queue and schedule via Kafka
54
+
55
+ **Typed worker IPC**
56
+ - `TaskFailed` and `ActivityEvent` messages flow over `multiprocessing.Queue` with pydantic validation
57
+
58
+ **Schedule composite keys**
59
+ - `{task_name}:{cron}:{context_hash}` for unique schedule identity
60
+
61
+ **Activity model `create()` classmethod**
62
+ - `Activity.create()` encapsulates record + initial log entry creation in one async call
63
+
64
+ **Async engine disposal**
65
+ - `dispose_engine()` ensures the async engine's background threads exit cleanly on shutdown
66
+
67
+ ### Architecture Changes
68
+
69
+ **Worker pool refactor**
70
+ - Workers use the `spawn` multiprocessing start method with explicit context — no inherited state
71
+ - Event handling and scheduling extracted into `_EventHandler` and `_Scheduler` classes
72
+ - `StateEvent` replaced with stdlib `multiprocessing.Event` — removes dependency on the state backend for shutdown coordination
73
+ - Class-based backend architecture with ABCs (`BaseStateBackend`, `BaseQueueBackend`, `BaseScheduleBackend`)
74
+ - `Task` is pure data, `TaskDefinition` owns behavior
75
+ - Status enum extracted to `activity/status.py` (no SQLAlchemy dependency)
76
+
77
+ **Logging**
78
+ - All modules use stdlib `logging.getLogger(__name__)`
79
+ - Spawned workers bootstrap a `StreamHandler` on the root logger so logs reach stderr
80
+ - Pool messages use `logger.info`/`logger.error` instead of `print()`
81
+
82
+ ### Bug Fixes
83
+
84
+ - **Orphaned worker processes on shutdown.** SIGTERM (systemd/docker stop), SIGKILL, and SIGHUP were leaving worker processes running. Fixed via an asyncio SIGTERM handler in the CLI and `prctl(PR_SET_PDEATHSIG)` in each worker so the kernel terminates workers when the pool dies
85
+ - **Worker and scheduler error loops throttled.** Infra failures (e.g. Redis unreachable) were producing 100k+ log lines per second. Added a 1s sleep after outer-loop exceptions
86
+ - **Unregistered task name crash.** Worker now logs an error and skips instead of crashing when it receives a task for an unknown name
87
+ - Failed tasks now log full tracebacks via `logger.exception` instead of `logger.error`
88
+ - Kafka consumer handles `None` message values without crashing
89
+ - `ActivityUpdated.status` is a `Status` enum instead of raw string
90
+
91
+ ### Documentation
92
+
93
+ - Full documentation sweep for the async API — connection strings, CLI usage, `await` on activity calls across all guides and API references
94
+
3
95
  ## v0.1.7
4
96
 
5
97
  ### New Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentexec
3
- Version: 0.1.7
3
+ Version: 0.2.0
4
4
  Summary: Production-ready orchestration for OpenAI Agents with Redis-backed coordination, activity tracking, and workflow management
5
5
  Project-URL: Homepage, https://github.com/Agent-CI/agentexec
6
6
  Project-URL: Documentation, https://github.com/Agent-CI/agentexec#readme
@@ -21,7 +21,9 @@ Requires-Dist: openai-agents>=0.1.0
21
21
  Requires-Dist: pydantic-settings>=2.5.0
22
22
  Requires-Dist: pydantic>=2.12.0
23
23
  Requires-Dist: redis>=7.0.1
24
- Requires-Dist: sqlalchemy>=2.0.44
24
+ Requires-Dist: sqlalchemy[asyncio]>=2.0.44
25
+ Provides-Extra: kafka
26
+ Requires-Dist: aiokafka>=0.11.0; extra == 'kafka'
25
27
  Description-Content-Type: text/markdown
26
28
 
27
29
  # `agentexec`
@@ -148,8 +150,8 @@ async def start_research(company: str) -> dict:
148
150
  return {"agent_id": str(task.agent_id), "status": "queued"} # Return agent_id for status polling
149
151
 
150
152
  @router.get("/research/{agent_id}")
151
- def get_status(agent_id: UUID, db: Session = Depends(get_db)) -> ax.activity.ActivityDetailSchema:
152
- return ax.activity.detail(db, agent_id=agent_id) # Query by agent_id
153
+ async def get_status(agent_id: UUID) -> ax.activity.ActivityDetailSchema:
154
+ return await ax.activity.detail(agent_id=agent_id)
153
155
  ```
154
156
 
155
157
  ### 4. Run Workers
@@ -176,8 +178,8 @@ task = await ax.enqueue(
176
178
  )
177
179
 
178
180
  # Filter activities by metadata
179
- activities = ax.activity.list(db, metadata_filter={"organization_id": "org-123"})
180
- detail = ax.activity.detail(db, agent_id, metadata_filter={"organization_id": "org-123"})
181
+ activities = await ax.activity.list(metadata_filter={"organization_id": "org-123"})
182
+ detail = await ax.activity.detail(agent_id=agent_id, metadata_filter={"organization_id": "org-123"})
181
183
 
182
184
  # Access metadata programmatically (excluded from API serialization by default)
183
185
  org_id = detail.metadata["organization_id"]
@@ -212,7 +214,7 @@ agent = Agent(
212
214
  Update progress explicitly from your task:
213
215
 
214
216
  ```python
215
- ax.activity.update(agent_id, "Processing batch 3 of 10", percentage=30)
217
+ await ax.activity.update(agent_id, "Processing batch 3 of 10", percentage=30)
216
218
  ```
217
219
 
218
220
  ### Task Locking
@@ -228,11 +230,9 @@ async def associate(agent_id: UUID, context: ObservationContext):
228
230
  pool.add_task("associate_observation", handler, lock_key="user:{user_id}")
229
231
  ```
230
232
 
231
- The `lock_key` is a string template evaluated against the task context fields. When a worker dequeues a task whose lock is held, it puts the task back at the end of the queue and moves on. The lock is released automatically when the task completes or errors.
233
+ The `lock_key` is a string template evaluated against the task context fields. Tasks with the same evaluated lock key are routed to a dedicated partition queue (`{prefix}:{lock_key}`) where they execute one at a time. Workers skip locked partitions and move on to the next available one no requeuing, no wasted cycles.
232
234
 
233
- The lock TTL (`AGENTEXEC_LOCK_TTL`, default 1800s) is a safety net for worker process death — locks are always explicitly released on task completion or error. Set this higher than your longest expected task duration.
234
-
235
- **Note:** When a task is requeued due to a held lock, it goes to the back of the queue. This means strict FIFO ordering is not guaranteed between tasks sharing the same lock key — if tasks T2 and T3 are both waiting on T1's lock, either could run next after T1 completes.
235
+ The lock is released automatically when a task completes or errors. The lock TTL (`AGENTEXEC_LOCK_TTL`, default 1800s) is a safety net for worker process death (OOM, SIGKILL) under normal operation, locks are always explicitly released. Set this higher than your longest expected task duration.
236
236
 
237
237
  ### Scheduled Tasks
238
238
 
@@ -392,8 +392,7 @@ if __name__ == "__main__":
392
392
  try:
393
393
  pool.run()
394
394
  except KeyboardInterrupt:
395
- with Session(engine) as db:
396
- ax.activity.cancel_pending(db)
395
+ asyncio.run(ax.activity.cancel_pending())
397
396
  ```
398
397
 
399
398
  ### Docker Deployment
@@ -422,11 +421,10 @@ import agentexec as ax
422
421
  engine = create_engine(os.environ["DATABASE_URL"])
423
422
  pool = ax.Pool(engine=engine)
424
423
 
425
- def cleanup() -> None:
426
- with Session(engine) as db:
427
- ax.activity.cancel_pending(db)
424
+ async def cleanup() -> None:
425
+ await ax.activity.cancel_pending()
428
426
 
429
- atexit.register(cleanup)
427
+ atexit.register(lambda: asyncio.run(cleanup()))
430
428
 
431
429
  @pool.task("my_task")
432
430
  async def my_task(agent_id: UUID, context: MyContext) -> None:
@@ -447,11 +445,13 @@ docker run -e DATABASE_URL=... -e REDIS_URL=... -e OPENAI_API_KEY=... my-worker
447
445
 
448
446
  ## Backend Architecture
449
447
 
450
- ### Redis
448
+ ### Redis (Default)
449
+
450
+ agentexec uses Redis for task queuing, result storage, and coordination between workers. The queue uses a partitioned design where tasks with a `lock_key` go to dedicated partition queues (`{prefix}:{lock_key}`) and are serialized by a lock, while tasks without a lock key go to the default queue for concurrent processing.
451
451
 
452
- agentexec uses Redis for task queuing, result storage, real-time log streaming, and coordination between workers. We chose Redis because it provides exactly the primitives we need (lists, pubsub, atomic counters) with minimal operational overhead.
452
+ Workers dequeue using Redis `SCAN`, which iterates keys in hash-table order effectively random. This provides fair distribution across partitions without explicit round-robin. See `examples/queue-fairness/` for benchmarks showing uniform distribution at 1000+ partitions.
453
453
 
454
- **AWS Compatible:** Since we use standard Redis features, AWS ElastiCache works out of the box.
454
+ **AWS Compatible:** Standard Redis features only AWS ElastiCache works out of the box.
455
455
 
456
456
  ```bash
457
457
  AGENTEXEC_REDIS_URL=redis://localhost:6379/0
@@ -459,18 +459,45 @@ AGENTEXEC_REDIS_URL=redis://localhost:6379/0
459
459
  AGENTEXEC_REDIS_URL=redis://my-cluster.abc123.use1.cache.amazonaws.com:6379
460
460
  ```
461
461
 
462
+ ### Kafka (Experimental)
463
+
464
+ Kafka can be used as an alternative backend for task queuing and schedule storage. Activity tracking always uses PostgreSQL regardless of backend — Kafka is not a KV store, so state operations (`get`/`set`, counters) are not supported and will raise `NotImplementedError`.
465
+
466
+ ```bash
467
+ pip install agentexec[kafka]
468
+
469
+ AGENTEXEC_STATE_BACKEND=agentexec.state.kafka
470
+ KAFKA_BOOTSTRAP_SERVERS=localhost:9092
471
+ ```
472
+
473
+ Kafka uses consumer groups for work distribution instead of Redis's scan-based dequeue. Topics are auto-created on first use. Schedule storage uses a compacted topic that is replayed on each poll.
474
+
475
+ **When to consider Kafka:**
476
+ - You already run Kafka and want to avoid adding Redis
477
+ - You need durable, replayable task queues with built-in replication
478
+ - You want partition-level ordering guarantees (tasks with the same key go to the same partition)
479
+
480
+ **Limitations:**
481
+ - No KV state — `backend.state.get/set/delete` and counters raise `NotImplementedError`
482
+ - No partition-level locking (Kafka partition assignment handles isolation instead)
483
+ - Schedule `get_due()` replays the entire compacted topic on every poll
484
+ - `lock_key` is used as a Kafka partition key (routing), not as a mutex
485
+
486
+ See [Kafka configuration](#kafka-settings) below for all available settings.
487
+
462
488
  ### Extensible State Backend
463
489
 
464
- The state backend is pluggable. We're adding support for additional backends (DynamoDB, PostgreSQL, in-memory for testing). You can also implement your own:
490
+ The state backend is pluggable. Implement `BaseBackend` with `state`, `queue`, and `schedule` sub-backends:
465
491
 
466
492
  ```bash
467
- AGENTEXEC_STATE_BACKEND=agentexec.state.redis_backend # Default
468
- AGENTEXEC_STATE_BACKEND=myapp.state.dynamodb_backend # Custom
493
+ AGENTEXEC_STATE_BACKEND=agentexec.state.redis # Default
494
+ AGENTEXEC_STATE_BACKEND=agentexec.state.kafka # Experimental
495
+ AGENTEXEC_STATE_BACKEND=myapp.state.custom # Custom (must export Backend class)
469
496
  ```
470
497
 
471
498
  ### Database
472
499
 
473
- Activity tracking uses SQLAlchemy with two tables:
500
+ Activity tracking uses SQLAlchemy with two tables (always PostgreSQL/SQLite, independent of the state backend):
474
501
 
475
502
  **`agentexec_activity`** - Main activity records
476
503
  - `agent_id` - Unique identifier (UUID)
@@ -504,25 +531,23 @@ from agentexec.activity.schemas import (
504
531
  **List activities:**
505
532
 
506
533
  ```python
507
- with Session(engine) as db:
508
- result = ax.activity.list(db, page=1, page_size=20)
509
- # Returns ActivityListSchema:
510
- # {
511
- # "items": [...], # List of ActivityListItemSchema
512
- # "total": 150,
513
- # "page": 1,
514
- # "page_size": 20,
515
- # "total_pages": 8
516
- # }
534
+ result = await ax.activity.list(page=1, page_size=20)
535
+ # Returns ActivityListSchema:
536
+ # {
537
+ # "items": [...], # List of ActivityListItemSchema
538
+ # "total": 150,
539
+ # "page": 1,
540
+ # "page_size": 20,
541
+ # "total_pages": 8
542
+ # }
517
543
  ```
518
544
 
519
545
  **Get activity detail:**
520
546
 
521
547
  ```python
522
- activity = ax.activity.detail(db, agent_id=agent_id)
548
+ activity = await ax.activity.detail(agent_id=agent_id)
523
549
  # Returns ActivityDetailSchema:
524
550
  # {
525
- # "id": "...",
526
551
  # "agent_id": "...",
527
552
  # "agent_type": "research_company",
528
553
  # "created_at": "2024-01-15T10:30:00Z",
@@ -538,7 +563,7 @@ activity = ax.activity.detail(db, agent_id=agent_id)
538
563
  **Count active agents:**
539
564
 
540
565
  ```python
541
- count = ax.activity.active_count(db)
566
+ count = await ax.activity.count_active()
542
567
  # Returns number of agents with status QUEUED or RUNNING
543
568
  ```
544
569
 
@@ -553,13 +578,15 @@ from sqlalchemy.orm import Session
553
578
  import agentexec as ax
554
579
 
555
580
  def build_table(db: Session) -> Table:
556
- table = Table(title=f"Active Agents: {ax.activity.active_count(db)}")
581
+ count = asyncio.run(ax.activity.count_active())
582
+ table = Table(title=f"Active Agents: {count}")
557
583
  table.add_column("Status")
558
584
  table.add_column("Task")
559
585
  table.add_column("Message")
560
586
  table.add_column("Progress")
561
587
 
562
- for item in ax.activity.list(db, page=1, page_size=10).items:
588
+ activities = asyncio.run(ax.activity.list(page=1, page_size=10))
589
+ for item in activities.items:
563
590
  table.add_row(
564
591
  item.status,
565
592
  item.agent_type,
@@ -673,7 +700,7 @@ async def scheduled(agent_id: UUID, context: MyContext) -> None: ...
673
700
 
674
701
  pool.add_schedule("name", "0 * * * *", MyContext(), repeat=3) # Schedule separately
675
702
 
676
- pool.run() # Blocking - runs workers + scheduler
703
+ pool.run() # Blocking - runs workers + scheduler + retry handling
677
704
  pool.start() # Non-blocking - starts workers in background
678
705
  pool.shutdown() # Graceful shutdown
679
706
  ```
@@ -684,20 +711,20 @@ pool.shutdown() # Graceful shutdown
684
711
  import agentexec as ax
685
712
 
686
713
  # Create activity (returns agent_id for tracking)
687
- agent_id = ax.activity.create(task_name, message="Starting...")
714
+ agent_id = await ax.activity.create(task_name, message="Starting...")
688
715
 
689
716
  # Update progress
690
- ax.activity.update(agent_id, message, percentage=50)
691
- ax.activity.complete(agent_id, message="Done")
692
- ax.activity.error(agent_id, error="Failed: ...")
717
+ await ax.activity.update(agent_id, message, percentage=50)
718
+ await ax.activity.complete(agent_id, message="Done")
719
+ await ax.activity.error(agent_id, message="Failed: ...")
693
720
 
694
- # Query activities
695
- activities = ax.activity.list(db, page=1, page_size=20)
696
- activity = ax.activity.detail(db, agent_id=agent_id)
697
- count = ax.activity.active_count(db)
721
+ # Query activities (uses database session)
722
+ activities = await ax.activity.list(page=1, page_size=20)
723
+ activity = await ax.activity.detail(agent_id=agent_id)
724
+ count = await ax.activity.count_active()
698
725
 
699
726
  # Cleanup
700
- canceled = ax.activity.cancel_pending(db)
727
+ canceled = await ax.activity.cancel_pending()
701
728
  ```
702
729
 
703
730
  ### Runners
@@ -759,13 +786,16 @@ ax.Base # SQLAlchemy declarative base for activity tables
759
786
  All settings via environment variables:
760
787
 
761
788
  ```bash
762
- # Redis (required)
763
- AGENTEXEC_REDIS_URL=redis://localhost:6379/0
789
+ # Redis
790
+ AGENTEXEC_REDIS_URL=redis://localhost:6379/0 # Also accepts REDIS_URL
791
+ AGENTEXEC_REDIS_POOL_SIZE=10
792
+ AGENTEXEC_REDIS_POOL_TIMEOUT=5
764
793
 
765
794
  # Workers
766
795
  AGENTEXEC_NUM_WORKERS=4
767
- AGENTEXEC_QUEUE_NAME=agentexec_tasks
796
+ AGENTEXEC_QUEUE_PREFIX=agentexec_tasks # Also accepts AGENTEXEC_QUEUE_NAME
768
797
  AGENTEXEC_GRACEFUL_SHUTDOWN_TIMEOUT=300
798
+ AGENTEXEC_MAX_TASK_RETRIES=3 # 0 to disable retries
769
799
 
770
800
  # Database
771
801
  AGENTEXEC_TABLE_PREFIX=agentexec_
@@ -773,14 +803,15 @@ AGENTEXEC_TABLE_PREFIX=agentexec_
773
803
  # Results
774
804
  AGENTEXEC_RESULT_TTL=3600
775
805
 
776
- # Task locking
806
+ # Task locking (Redis backend only)
777
807
  AGENTEXEC_LOCK_TTL=1800
778
808
 
779
809
  # Scheduling
780
810
  AGENTEXEC_SCHEDULER_TIMEZONE=UTC
811
+ AGENTEXEC_SCHEDULER_POLL_INTERVAL=10
781
812
 
782
813
  # State backend
783
- AGENTEXEC_STATE_BACKEND=agentexec.state.redis_backend
814
+ AGENTEXEC_STATE_BACKEND=agentexec.state.redis # or agentexec.state.kafka
784
815
  AGENTEXEC_KEY_PREFIX=agentexec
785
816
 
786
817
  # Activity messages (customizable)
@@ -790,6 +821,21 @@ AGENTEXEC_ACTIVITY_MESSAGE_COMPLETE="Task completed successfully."
790
821
  AGENTEXEC_ACTIVITY_MESSAGE_ERROR="Task failed with error: {error}"
791
822
  ```
792
823
 
824
+ ### Kafka Settings
825
+
826
+ These settings only apply when using the Kafka state backend (`AGENTEXEC_STATE_BACKEND=agentexec.state.kafka`):
827
+
828
+ ```bash
829
+ KAFKA_BOOTSTRAP_SERVERS=localhost:9092 # Also accepts AGENTEXEC_KAFKA_BOOTSTRAP_SERVERS
830
+ AGENTEXEC_KAFKA_DEFAULT_PARTITIONS=6 # Partitions for auto-created topics
831
+ AGENTEXEC_KAFKA_REPLICATION_FACTOR=1 # Replication factor for auto-created topics
832
+ AGENTEXEC_KAFKA_MAX_BATCH_SIZE=16384 # Producer max batch size (bytes)
833
+ AGENTEXEC_KAFKA_LINGER_MS=5 # Producer linger time (ms)
834
+ AGENTEXEC_KAFKA_RETENTION_MS=-1 # Retention for compacted topics (-1 = forever)
835
+ ```
836
+
837
+ For single-node development, set `KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1` on your broker or consumer groups will hang.
838
+
793
839
  ---
794
840
 
795
841
  ## Development
@@ -838,4 +884,5 @@ MIT License - see [LICENSE](LICENSE) for details.
838
884
  - **Documentation**: [docs/](docs/)
839
885
  - **Example App**: [examples/openai-agents-fastapi/](examples/openai-agents-fastapi/)
840
886
  - **Multi-Tenancy Example**: [examples/multi-tenancy/](examples/multi-tenancy/)
887
+ - **Queue Fairness Benchmark**: [examples/queue-fairness/](examples/queue-fairness/)
841
888
  - **Issues**: [GitHub Issues](https://github.com/Agent-CI/agentexec/issues)