PyPI - agentexec - Versions diffs - 0.1.6__tar.gz → 0.2.0__tar.gz - Mend

agentexec 0.1.6tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

agentexec-0.2.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,125 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  # -----------------------------------------------------------------------
+  # Unit tests — no external services (fakeredis + SQLite)
+  # -----------------------------------------------------------------------
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Run unit tests
+        run: |
+          uv run pytest tests/ \
+            --ignore=tests/test_kafka_integration.py \
+            -o "addopts=" \
+            -v --tb=long
+  # -----------------------------------------------------------------------
+  # Kafka integration tests — real broker via docker run
+  # -----------------------------------------------------------------------
+  test-kafka:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Start Kafka broker
+        run: |
+          docker run -d --name kafka \
+            -p 9092:9092 \
+            -e KAFKA_NODE_ID=1 \
+            -e KAFKA_PROCESS_ROLES=broker,controller \
+            -e KAFKA_CONTROLLER_QUORUM_VOTERS=1@localhost:9093 \
+            -e KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER \
+            -e KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 \
+            -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 \
+            -e KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT \
+            -e KAFKA_INTER_BROKER_LISTENER_NAME=PLAINTEXT \
+            -e KAFKA_LOG_CLEANER_MIN_COMPACTION_LAG_MS=0 \
+            -e KAFKA_LOG_CLEANER_MIN_CLEANABLE_RATIO=0.01 \
+            -e KAFKA_LOG_RETENTION_MS=60000 \
+            -e KAFKA_NUM_PARTITIONS=1 \
+            -e KAFKA_AUTO_CREATE_TOPICS_ENABLE=true \
+            -e KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 \
+            -e KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 \
+            -e CLUSTER_ID=ciTestCluster0001 \
+            apache/kafka:3.9.0
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+      - name: Set up Python
+        run: uv python install 3.12
+      - name: Install dependencies
+        run: uv sync --dev --extra kafka
+      - name: Wait for Kafka to be ready
+        run: |
+          echo "Waiting for Kafka..."
+          for i in $(seq 1 30); do
+            if nc -z localhost 9092 2>/dev/null; then
+              echo "Kafka port is open"
+              sleep 5
+              echo "Kafka is ready"
+              exit 0
+            fi
+            echo "  attempt $i/30..."
+            sleep 2
+          done
+          echo "Kafka failed to start"
+          docker logs kafka
+          exit 1
+      - name: Run Kafka integration tests
+        timeout-minutes: 2
+        run: |
+          uv run pytest tests/test_kafka_integration.py \
+            -o "addopts=" \
+            -v --tb=long 2>&1 | tee /tmp/kafka_test_output.txt
+          exit ${PIPESTATUS[0]}
+        env:
+          AGENTEXEC_STATE_BACKEND: agentexec.state.kafka
+          KAFKA_BOOTSTRAP_SERVERS: localhost:9092
+          AGENTEXEC_KAFKA_DEFAULT_PARTITIONS: "2"
+          AGENTEXEC_KAFKA_REPLICATION_FACTOR: "1"
+      - name: Show Kafka logs on failure
+        if: failure()
+        run: docker logs kafka 2>&1 | tail -50
+      - name: Create failure check annotation with output
+        if: failure()
+        run: |
+          if [ -f /tmp/kafka_test_output.txt ]; then
+            grep -E '\[queue_|FAILED|ERROR|AssertionError|TIMEOUT|short test summary' /tmp/kafka_test_output.txt | tail -9 | while IFS= read -r line; do
+              echo "::warning::$line"
+            done
+          fi

{agentexec-0.1.6 → agentexec-0.2.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,118 @@
 # Changelog
+## v0.2.0
+Major refactor of the backend, queue, activity, worker, and database layers.
+If you're upgrading from 0.1.x, read the **Breaking Changes** section closely.
+### Breaking Changes
+**Fully async database layer**
+- `configure_engine()` and `get_session()` now require an async SQLAlchemy engine (`AsyncEngine`) and return `AsyncSession`
+- Database URLs must use async drivers (e.g. `sqlite+aiosqlite://`, `postgresql+asyncpg://`)
+- `sqlalchemy[asyncio]` is now a core dependency
+**Async activity API**
+- All activity functions are async: `await ax.activity.create(...)`, `await ax.activity.update(...)`, `await ax.activity.complete(...)`, `await ax.activity.error(...)`
+- `activity.list()`, `activity.detail()`, and `activity.count_active()` are async and accept `AsyncSession`
+- Activity handlers are async (`async def __call__`)
+- The `session` parameter was removed from activity mutations — the handler owns its own session lifecycle
+**Pool entry point**
+- `pool.run()` was removed. Use `await pool.start()` in an asyncio loop, or the new `agentexec run mymodule:pool` CLI
+- `AGENTEXEC_QUEUE_NAME` renamed to `AGENTEXEC_QUEUE_PREFIX` (old name still accepted as alias)
+- `agentexec.state.redis_backend` renamed to `agentexec.state.redis` — update `AGENTEXEC_STATE_BACKEND` if set explicitly
+**Task context serialization**
+- `Task.context` is now `Mapping[str, Any]` (raw dict), not a typed BaseModel — hydration happens at execution time
+- `Task.create()` is now async
+**Queue backend protocol**
+- `BaseQueueBackend.push()` signature changed from `high_priority: bool` to `priority: Priority | None` — affects Redis, Kafka, and any custom queue backend
+**Removed APIs**
+- `set_global_session`/`get_global_session`/`remove_global_session` — use `configure_engine`/`get_session`
+- `state.backend.publish`/`subscribe` (pubsub), `index_add`/`index_range`/`index_remove`, `clear`, `configure`
+- `worker/logging.py` and `core/logging.py` — all modules use stdlib `logging.getLogger(__name__)` directly
+### New Features
+**CLI entrypoint**
+- New `agentexec` CLI command: `agentexec run mymodule:pool --create-tables --workers 4`
+**Partitioned Redis queues**
+- Tasks with `lock_key` route to dedicated partition queues with per-partition locking and SCAN-based fair dequeue
+**Activity handler pattern**
+- Pluggable persistence via `PostgresHandler` (default) and `IPCHandler` (worker processes)
+**Task retry**
+- Failed tasks requeue as high priority with `AGENTEXEC_MAX_TASK_RETRIES` (default 3)
+**Kafka backend (experimental)**
+- `pip install agentexec[kafka]` for queue and schedule via Kafka
+**Typed worker IPC**
+- `TaskFailed` and `ActivityEvent` messages flow over `multiprocessing.Queue` with pydantic validation
+**Schedule composite keys**
+- `{task_name}:{cron}:{context_hash}` for unique schedule identity
+**Activity model `create()` classmethod**
+- `Activity.create()` encapsulates record + initial log entry creation in one async call
+**Async engine disposal**
+- `dispose_engine()` ensures the async engine's background threads exit cleanly on shutdown
+### Architecture Changes
+**Worker pool refactor**
+- Workers use the `spawn` multiprocessing start method with explicit context — no inherited state
+- Event handling and scheduling extracted into `_EventHandler` and `_Scheduler` classes
+- `StateEvent` replaced with stdlib `multiprocessing.Event` — removes dependency on the state backend for shutdown coordination
+- Class-based backend architecture with ABCs (`BaseStateBackend`, `BaseQueueBackend`, `BaseScheduleBackend`)
+- `Task` is pure data, `TaskDefinition` owns behavior
+- Status enum extracted to `activity/status.py` (no SQLAlchemy dependency)
+**Logging**
+- All modules use stdlib `logging.getLogger(__name__)`
+- Spawned workers bootstrap a `StreamHandler` on the root logger so logs reach stderr
+- Pool messages use `logger.info`/`logger.error` instead of `print()`
+### Bug Fixes
+- **Orphaned worker processes on shutdown.** SIGTERM (systemd/docker stop), SIGKILL, and SIGHUP were leaving worker processes running. Fixed via an asyncio SIGTERM handler in the CLI and `prctl(PR_SET_PDEATHSIG)` in each worker so the kernel terminates workers when the pool dies
+- **Worker and scheduler error loops throttled.** Infra failures (e.g. Redis unreachable) were producing 100k+ log lines per second. Added a 1s sleep after outer-loop exceptions
+- **Unregistered task name crash.** Worker now logs an error and skips instead of crashing when it receives a task for an unknown name
+- Failed tasks now log full tracebacks via `logger.exception` instead of `logger.error`
+- Kafka consumer handles `None` message values without crashing
+- `ActivityUpdated.status` is a `Status` enum instead of raw string
+### Documentation
+- Full documentation sweep for the async API — connection strings, CLI usage, `await` on activity calls across all guides and API references
+## v0.1.7
+### New Features
+**Scheduled tasks with cron expressions**
+- `@pool.schedule("task_name", "*/5 * * * *")` decorator registers and schedules a task in one step
+- `pool.add_schedule()` for imperative scheduling of already-registered tasks
+- Cron expressions evaluated in configurable timezone (`AGENTEXEC_SCHEDULER_TIMEZONE`, default UTC)
+- Repeat budget: `-1` for forever (default), `0` for one-shot, `N` for N more executions
+- Scheduler runs automatically inside `pool.run()` — no extra setup needed
+- Idempotent registration: keyed by task name, so restarts and multiple pool instances overwrite instead of duplicating
+- Clock-drift resilient: next run computed from intended anchor time, not wall clock
+- Skips missed intervals after downtime instead of enqueuing a burst of catch-up tasks
+- New `croniter` dependency for cron expression parsing
+### Improvements
+**State backend sorted set operations**
+- Added `zadd()`, `zrangebyscore()`, `zrem()` to `StateBackend` protocol and Redis implementation
+- Used internally by the scheduler for efficient due-task polling
 ## v0.1.6
 ### New Features

{agentexec-0.1.6 → agentexec-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agentexec
-Version: 0.1.6
+Version: 0.2.0
 Summary: Production-ready orchestration for OpenAI Agents with Redis-backed coordination, activity tracking, and workflow management
 Project-URL: Homepage, https://github.com/Agent-CI/agentexec
 Project-URL: Documentation, https://github.com/Agent-CI/agentexec#readme
@@ -16,11 +16,14 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.12
+Requires-Dist: croniter>=6.0.0
 Requires-Dist: openai-agents>=0.1.0
 Requires-Dist: pydantic-settings>=2.5.0
 Requires-Dist: pydantic>=2.12.0
 Requires-Dist: redis>=7.0.1
-Requires-Dist: sqlalchemy>=2.0.44
+Requires-Dist: sqlalchemy[asyncio]>=2.0.44
+Provides-Extra: kafka
+Requires-Dist: aiokafka>=0.11.0; extra == 'kafka'
 Description-Content-Type: text/markdown
 # `agentexec`
@@ -147,8 +150,8 @@ async def start_research(company: str) -> dict:
     return {"agent_id": str(task.agent_id), "status": "queued"}  # Return agent_id for status polling
 @router.get("/research/{agent_id}")
-def get_status(agent_id: UUID, db: Session = Depends(get_db)) -> ax.activity.ActivityDetailSchema:
-    return ax.activity.detail(db, agent_id=agent_id)  # Query by agent_id
+async def get_status(agent_id: UUID) -> ax.activity.ActivityDetailSchema:
+    return await ax.activity.detail(agent_id=agent_id)
 ```
 ### 4. Run Workers
@@ -175,8 +178,8 @@ task = await ax.enqueue(
 )
 # Filter activities by metadata
-activities = ax.activity.list(db, metadata_filter={"organization_id": "org-123"})
-detail = ax.activity.detail(db, agent_id, metadata_filter={"organization_id": "org-123"})
+activities = await ax.activity.list(metadata_filter={"organization_id": "org-123"})
+detail = await ax.activity.detail(agent_id=agent_id, metadata_filter={"organization_id": "org-123"})
 # Access metadata programmatically (excluded from API serialization by default)
 org_id = detail.metadata["organization_id"]
@@ -211,7 +214,7 @@ agent = Agent(
 Update progress explicitly from your task:
 ```python
-ax.activity.update(agent_id, "Processing batch 3 of 10", percentage=30)
+await ax.activity.update(agent_id, "Processing batch 3 of 10", percentage=30)
 ```
 ### Task Locking
@@ -227,11 +230,34 @@ async def associate(agent_id: UUID, context: ObservationContext):
 pool.add_task("associate_observation", handler, lock_key="user:{user_id}")
 ```
-The `lock_key` is a string template evaluated against the task context fields. When a worker dequeues a task whose lock is held, it puts the task back at the end of the queue and moves on. The lock is released automatically when the task completes or errors.
+The `lock_key` is a string template evaluated against the task context fields. Tasks with the same evaluated lock key are routed to a dedicated partition queue (`{prefix}:{lock_key}`) where they execute one at a time. Workers skip locked partitions and move on to the next available one — no requeuing, no wasted cycles.
-The lock TTL (`AGENTEXEC_LOCK_TTL`, default 1800s) is a safety net for worker process death — locks are always explicitly released on task completion or error. Set this higher than your longest expected task duration.
+The lock is released automatically when a task completes or errors. The lock TTL (`AGENTEXEC_LOCK_TTL`, default 1800s) is a safety net for worker process death (OOM, SIGKILL) — under normal operation, locks are always explicitly released. Set this higher than your longest expected task duration.
-**Note:** When a task is requeued due to a held lock, it goes to the back of the queue. This means strict FIFO ordering is not guaranteed between tasks sharing the same lock key — if tasks T2 and T3 are both waiting on T1's lock, either could run next after T1 completes.
+### Scheduled Tasks
+Run tasks on a recurring interval using cron expressions:
+```python
+# Decorator — registers the task and schedules it in one step
+@pool.schedule("refresh_cache", "*/5 * * * *")
+async def refresh(agent_id: UUID, context: RefreshContext):
+    ...
+# With context and repeat limit
+@pool.schedule("sync_users", "0 * * * *", context=SyncContext(full=True), repeat=3)
+async def sync(agent_id: UUID, context: SyncContext):
+    ...
+```
+For tasks registered separately, use `pool.add_schedule()`:
+```python
+pool.add_schedule("refresh_cache", "*/5 * * * *", RefreshContext(scope="all"))
+pool.add_schedule("refresh_cache", "0 * * * *", RefreshContext(scope="users"), repeat=3)
+```
+The scheduler runs automatically inside `pool.run()`. Cron expressions are evaluated in the configured timezone (`AGENTEXEC_SCHEDULER_TIMEZONE`, default UTC) so schedules read naturally regardless of server timezone. Next-run times are computed from the intended anchor time, not wall clock, to prevent cumulative drift.
 ### Priority Queue
@@ -366,8 +392,7 @@ if __name__ == "__main__":
     try:
         pool.run()
     except KeyboardInterrupt:
-        with Session(engine) as db:
-            ax.activity.cancel_pending(db)
+        asyncio.run(ax.activity.cancel_pending())
 ```
 ### Docker Deployment
@@ -396,11 +421,10 @@ import agentexec as ax
 engine = create_engine(os.environ["DATABASE_URL"])
 pool = ax.Pool(engine=engine)
-def cleanup() -> None:
-    with Session(engine) as db:
-        ax.activity.cancel_pending(db)
+async def cleanup() -> None:
+    await ax.activity.cancel_pending()
-atexit.register(cleanup)
+atexit.register(lambda: asyncio.run(cleanup()))
 @pool.task("my_task")
 async def my_task(agent_id: UUID, context: MyContext) -> None:
@@ -421,11 +445,13 @@ docker run -e DATABASE_URL=... -e REDIS_URL=... -e OPENAI_API_KEY=... my-worker
 ## Backend Architecture
-### Redis
+### Redis (Default)
+agentexec uses Redis for task queuing, result storage, and coordination between workers. The queue uses a partitioned design where tasks with a `lock_key` go to dedicated partition queues (`{prefix}:{lock_key}`) and are serialized by a lock, while tasks without a lock key go to the default queue for concurrent processing.
-agentexec uses Redis for task queuing, result storage, real-time log streaming, and coordination between workers. We chose Redis because it provides exactly the primitives we need (lists, pubsub, atomic counters) with minimal operational overhead.
+Workers dequeue using Redis `SCAN`, which iterates keys in hash-table order — effectively random. This provides fair distribution across partitions without explicit round-robin. See `examples/queue-fairness/` for benchmarks showing uniform distribution at 1000+ partitions.
-**AWS Compatible:** Since we use standard Redis features, AWS ElastiCache works out of the box.
+**AWS Compatible:** Standard Redis features only — AWS ElastiCache works out of the box.
 ```bash
 AGENTEXEC_REDIS_URL=redis://localhost:6379/0
@@ -433,18 +459,45 @@ AGENTEXEC_REDIS_URL=redis://localhost:6379/0
 AGENTEXEC_REDIS_URL=redis://my-cluster.abc123.use1.cache.amazonaws.com:6379
 ```
+### Kafka (Experimental)
+Kafka can be used as an alternative backend for task queuing and schedule storage. Activity tracking always uses PostgreSQL regardless of backend — Kafka is not a KV store, so state operations (`get`/`set`, counters) are not supported and will raise `NotImplementedError`.
+```bash
+pip install agentexec[kafka]
+AGENTEXEC_STATE_BACKEND=agentexec.state.kafka
+KAFKA_BOOTSTRAP_SERVERS=localhost:9092
+```
+Kafka uses consumer groups for work distribution instead of Redis's scan-based dequeue. Topics are auto-created on first use. Schedule storage uses a compacted topic that is replayed on each poll.
+**When to consider Kafka:**
+- You already run Kafka and want to avoid adding Redis
+- You need durable, replayable task queues with built-in replication
+- You want partition-level ordering guarantees (tasks with the same key go to the same partition)
+**Limitations:**
+- No KV state — `backend.state.get/set/delete` and counters raise `NotImplementedError`
+- No partition-level locking (Kafka partition assignment handles isolation instead)
+- Schedule `get_due()` replays the entire compacted topic on every poll
+- `lock_key` is used as a Kafka partition key (routing), not as a mutex
+See [Kafka configuration](#kafka-settings) below for all available settings.
 ### Extensible State Backend
-The state backend is pluggable. We're adding support for additional backends (DynamoDB, PostgreSQL, in-memory for testing). You can also implement your own:
+The state backend is pluggable. Implement `BaseBackend` with `state`, `queue`, and `schedule` sub-backends:
 ```bash
-AGENTEXEC_STATE_BACKEND=agentexec.state.redis_backend  # Default
-AGENTEXEC_STATE_BACKEND=myapp.state.dynamodb_backend   # Custom
+AGENTEXEC_STATE_BACKEND=agentexec.state.redis   # Default
+AGENTEXEC_STATE_BACKEND=agentexec.state.kafka    # Experimental
+AGENTEXEC_STATE_BACKEND=myapp.state.custom       # Custom (must export Backend class)
 ```
 ### Database
-Activity tracking uses SQLAlchemy with two tables:
+Activity tracking uses SQLAlchemy with two tables (always PostgreSQL/SQLite, independent of the state backend):
 **`agentexec_activity`** - Main activity records
 - `agent_id` - Unique identifier (UUID)
@@ -478,25 +531,23 @@ from agentexec.activity.schemas import (
 **List activities:**
 ```python
-with Session(engine) as db:
-    result = ax.activity.list(db, page=1, page_size=20)
-    # Returns ActivityListSchema:
-    # {
-    #   "items": [...],      # List of ActivityListItemSchema
-    #   "total": 150,
-    #   "page": 1,
-    #   "page_size": 20,
-    #   "total_pages": 8
-    # }
+result = await ax.activity.list(page=1, page_size=20)
+# Returns ActivityListSchema:
+# {
+#   "items": [...],      # List of ActivityListItemSchema
+#   "total": 150,
+#   "page": 1,
+#   "page_size": 20,
+#   "total_pages": 8
+# }
 ```
 **Get activity detail:**
 ```python
-activity = ax.activity.detail(db, agent_id=agent_id)
+activity = await ax.activity.detail(agent_id=agent_id)
 # Returns ActivityDetailSchema:
 # {
-#   "id": "...",
 #   "agent_id": "...",
 #   "agent_type": "research_company",
 #   "created_at": "2024-01-15T10:30:00Z",
@@ -512,7 +563,7 @@ activity = ax.activity.detail(db, agent_id=agent_id)
 **Count active agents:**
 ```python
-count = ax.activity.active_count(db)
+count = await ax.activity.count_active()
 # Returns number of agents with status QUEUED or RUNNING
 ```
@@ -527,13 +578,15 @@ from sqlalchemy.orm import Session
 import agentexec as ax
 def build_table(db: Session) -> Table:
-    table = Table(title=f"Active Agents: {ax.activity.active_count(db)}")
+    count = asyncio.run(ax.activity.count_active())
+    table = Table(title=f"Active Agents: {count}")
     table.add_column("Status")
     table.add_column("Task")
     table.add_column("Message")
     table.add_column("Progress")
-    for item in ax.activity.list(db, page=1, page_size=10).items:
+    activities = asyncio.run(ax.activity.list(page=1, page_size=10))
+    for item in activities.items:
         table.add_row(
             item.status,
             item.agent_type,
@@ -642,7 +695,12 @@ async def handler(agent_id: UUID, context: MyContext) -> None: ...
 @pool.task("name", lock_key="user:{user_id}")  # Sequential per user
 async def locked(agent_id: UUID, context: MyContext) -> None: ...
-pool.run()       # Blocking - runs workers
+@pool.schedule("name", "*/5 * * * *")  # Register + schedule in one step
+async def scheduled(agent_id: UUID, context: MyContext) -> None: ...
+pool.add_schedule("name", "0 * * * *", MyContext(), repeat=3)  # Schedule separately
+pool.run()       # Blocking - runs workers + scheduler + retry handling
 pool.start()     # Non-blocking - starts workers in background
 pool.shutdown()  # Graceful shutdown
 ```
@@ -653,20 +711,20 @@ pool.shutdown()  # Graceful shutdown
 import agentexec as ax
 # Create activity (returns agent_id for tracking)
-agent_id = ax.activity.create(task_name, message="Starting...")
+agent_id = await ax.activity.create(task_name, message="Starting...")
 # Update progress
-ax.activity.update(agent_id, message, percentage=50)
-ax.activity.complete(agent_id, message="Done")
-ax.activity.error(agent_id, error="Failed: ...")
+await ax.activity.update(agent_id, message, percentage=50)
+await ax.activity.complete(agent_id, message="Done")
+await ax.activity.error(agent_id, message="Failed: ...")
-# Query activities
-activities = ax.activity.list(db, page=1, page_size=20)
-activity = ax.activity.detail(db, agent_id=agent_id)
-count = ax.activity.active_count(db)
+# Query activities (uses database session)
+activities = await ax.activity.list(page=1, page_size=20)
+activity = await ax.activity.detail(agent_id=agent_id)
+count = await ax.activity.count_active()
 # Cleanup
-canceled = ax.activity.cancel_pending(db)
+canceled = await ax.activity.cancel_pending()
 ```
 ### Runners
@@ -728,13 +786,16 @@ ax.Base  # SQLAlchemy declarative base for activity tables
 All settings via environment variables:
 ```bash
-# Redis (required)
-AGENTEXEC_REDIS_URL=redis://localhost:6379/0
+# Redis
+AGENTEXEC_REDIS_URL=redis://localhost:6379/0    # Also accepts REDIS_URL
+AGENTEXEC_REDIS_POOL_SIZE=10
+AGENTEXEC_REDIS_POOL_TIMEOUT=5
 # Workers
 AGENTEXEC_NUM_WORKERS=4
-AGENTEXEC_QUEUE_NAME=agentexec_tasks
+AGENTEXEC_QUEUE_PREFIX=agentexec_tasks          # Also accepts AGENTEXEC_QUEUE_NAME
 AGENTEXEC_GRACEFUL_SHUTDOWN_TIMEOUT=300
+AGENTEXEC_MAX_TASK_RETRIES=3                    # 0 to disable retries
 # Database
 AGENTEXEC_TABLE_PREFIX=agentexec_
@@ -742,11 +803,15 @@ AGENTEXEC_TABLE_PREFIX=agentexec_
 # Results
 AGENTEXEC_RESULT_TTL=3600
-# Task locking
+# Task locking (Redis backend only)
 AGENTEXEC_LOCK_TTL=1800
+# Scheduling
+AGENTEXEC_SCHEDULER_TIMEZONE=UTC
+AGENTEXEC_SCHEDULER_POLL_INTERVAL=10
 # State backend
-AGENTEXEC_STATE_BACKEND=agentexec.state.redis_backend
+AGENTEXEC_STATE_BACKEND=agentexec.state.redis   # or agentexec.state.kafka
 AGENTEXEC_KEY_PREFIX=agentexec
 # Activity messages (customizable)
@@ -756,6 +821,21 @@ AGENTEXEC_ACTIVITY_MESSAGE_COMPLETE="Task completed successfully."
 AGENTEXEC_ACTIVITY_MESSAGE_ERROR="Task failed with error: {error}"
 ```
+### Kafka Settings
+These settings only apply when using the Kafka state backend (`AGENTEXEC_STATE_BACKEND=agentexec.state.kafka`):
+```bash
+KAFKA_BOOTSTRAP_SERVERS=localhost:9092          # Also accepts AGENTEXEC_KAFKA_BOOTSTRAP_SERVERS
+AGENTEXEC_KAFKA_DEFAULT_PARTITIONS=6            # Partitions for auto-created topics
+AGENTEXEC_KAFKA_REPLICATION_FACTOR=1            # Replication factor for auto-created topics
+AGENTEXEC_KAFKA_MAX_BATCH_SIZE=16384            # Producer max batch size (bytes)
+AGENTEXEC_KAFKA_LINGER_MS=5                     # Producer linger time (ms)
+AGENTEXEC_KAFKA_RETENTION_MS=-1                 # Retention for compacted topics (-1 = forever)
+```
+For single-node development, set `KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1` on your broker or consumer groups will hang.
 ---
 ## Development
@@ -804,4 +884,5 @@ MIT License - see [LICENSE](LICENSE) for details.
 - **Documentation**: [docs/](docs/)
 - **Example App**: [examples/openai-agents-fastapi/](examples/openai-agents-fastapi/)
 - **Multi-Tenancy Example**: [examples/multi-tenancy/](examples/multi-tenancy/)
+- **Queue Fairness Benchmark**: [examples/queue-fairness/](examples/queue-fairness/)
 - **Issues**: [GitHub Issues](https://github.com/Agent-CI/agentexec/issues)

agentexec 0.1.6__tar.gz → 0.2.0__tar.gz

agentexec 0.1.6tar.gz → 0.2.0tar.gz