django-agent-runtime 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_agent_runtime/__init__.py +25 -0
- django_agent_runtime/admin.py +155 -0
- django_agent_runtime/api/__init__.py +26 -0
- django_agent_runtime/api/permissions.py +109 -0
- django_agent_runtime/api/serializers.py +114 -0
- django_agent_runtime/api/views.py +472 -0
- django_agent_runtime/apps.py +26 -0
- django_agent_runtime/conf.py +241 -0
- django_agent_runtime/examples/__init__.py +10 -0
- django_agent_runtime/examples/langgraph_adapter.py +164 -0
- django_agent_runtime/examples/langgraph_tools.py +179 -0
- django_agent_runtime/examples/simple_chat.py +69 -0
- django_agent_runtime/examples/tool_agent.py +157 -0
- django_agent_runtime/management/__init__.py +2 -0
- django_agent_runtime/management/commands/__init__.py +2 -0
- django_agent_runtime/management/commands/runagent.py +419 -0
- django_agent_runtime/migrations/0001_initial.py +117 -0
- django_agent_runtime/migrations/0002_persistence_models.py +129 -0
- django_agent_runtime/migrations/0003_persistenceconversation_active_branch_id_and_more.py +212 -0
- django_agent_runtime/migrations/0004_add_anonymous_session_id.py +18 -0
- django_agent_runtime/migrations/__init__.py +2 -0
- django_agent_runtime/models/__init__.py +54 -0
- django_agent_runtime/models/base.py +450 -0
- django_agent_runtime/models/concrete.py +146 -0
- django_agent_runtime/persistence/__init__.py +60 -0
- django_agent_runtime/persistence/helpers.py +148 -0
- django_agent_runtime/persistence/models.py +506 -0
- django_agent_runtime/persistence/stores.py +1191 -0
- django_agent_runtime/runtime/__init__.py +23 -0
- django_agent_runtime/runtime/events/__init__.py +65 -0
- django_agent_runtime/runtime/events/base.py +135 -0
- django_agent_runtime/runtime/events/db.py +129 -0
- django_agent_runtime/runtime/events/redis.py +228 -0
- django_agent_runtime/runtime/events/sync.py +140 -0
- django_agent_runtime/runtime/interfaces.py +475 -0
- django_agent_runtime/runtime/llm/__init__.py +91 -0
- django_agent_runtime/runtime/llm/anthropic.py +249 -0
- django_agent_runtime/runtime/llm/litellm_adapter.py +173 -0
- django_agent_runtime/runtime/llm/openai.py +230 -0
- django_agent_runtime/runtime/queue/__init__.py +75 -0
- django_agent_runtime/runtime/queue/base.py +158 -0
- django_agent_runtime/runtime/queue/postgres.py +248 -0
- django_agent_runtime/runtime/queue/redis_streams.py +336 -0
- django_agent_runtime/runtime/queue/sync.py +277 -0
- django_agent_runtime/runtime/registry.py +186 -0
- django_agent_runtime/runtime/runner.py +540 -0
- django_agent_runtime/runtime/tracing/__init__.py +48 -0
- django_agent_runtime/runtime/tracing/langfuse.py +117 -0
- django_agent_runtime/runtime/tracing/noop.py +36 -0
- django_agent_runtime/urls.py +39 -0
- django_agent_runtime-0.3.6.dist-info/METADATA +723 -0
- django_agent_runtime-0.3.6.dist-info/RECORD +55 -0
- django_agent_runtime-0.3.6.dist-info/WHEEL +5 -0
- django_agent_runtime-0.3.6.dist-info/licenses/LICENSE +22 -0
- django_agent_runtime-0.3.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tool-using agent example.
|
|
3
|
+
|
|
4
|
+
This demonstrates an agent that:
|
|
5
|
+
- Has access to tools
|
|
6
|
+
- Calls tools when the LLM requests them
|
|
7
|
+
- Handles the tool call loop
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
1. Add to RUNTIME_REGISTRY in settings:
|
|
11
|
+
'RUNTIME_REGISTRY': ['django_agent_runtime.examples.tool_agent:register']
|
|
12
|
+
|
|
13
|
+
2. Create a run with agent_key="tool-agent"
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from django_agent_runtime.runtime.interfaces import (
|
|
18
|
+
AgentRuntime,
|
|
19
|
+
RunContext,
|
|
20
|
+
RunResult,
|
|
21
|
+
EventType,
|
|
22
|
+
ToolDefinition,
|
|
23
|
+
)
|
|
24
|
+
from django_agent_runtime.runtime.registry import register_runtime
|
|
25
|
+
from django_agent_runtime.runtime.llm import get_llm_client
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ToolAgentRuntime(AgentRuntime):
|
|
29
|
+
"""
|
|
30
|
+
An agent that can use tools.
|
|
31
|
+
|
|
32
|
+
Demonstrates the tool calling loop pattern.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
MAX_ITERATIONS = 10
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def key(self) -> str:
|
|
39
|
+
return "tool-agent"
|
|
40
|
+
|
|
41
|
+
async def run(self, ctx: RunContext) -> RunResult:
|
|
42
|
+
"""Execute the tool agent."""
|
|
43
|
+
llm = get_llm_client()
|
|
44
|
+
messages = list(ctx.input_messages)
|
|
45
|
+
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
46
|
+
|
|
47
|
+
# Register example tools
|
|
48
|
+
self._register_tools(ctx.tool_registry)
|
|
49
|
+
|
|
50
|
+
for iteration in range(self.MAX_ITERATIONS):
|
|
51
|
+
# Check for cancellation
|
|
52
|
+
if ctx.cancelled():
|
|
53
|
+
break
|
|
54
|
+
|
|
55
|
+
# Checkpoint state
|
|
56
|
+
await ctx.checkpoint({
|
|
57
|
+
"iteration": iteration,
|
|
58
|
+
"messages": messages,
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
# Call LLM with tools
|
|
62
|
+
response = await llm.generate(
|
|
63
|
+
messages=messages,
|
|
64
|
+
tools=ctx.tool_registry.to_openai_format(),
|
|
65
|
+
**ctx.params,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Accumulate usage
|
|
69
|
+
for key in total_usage:
|
|
70
|
+
total_usage[key] += response.usage.get(key, 0)
|
|
71
|
+
|
|
72
|
+
assistant_message = response.message
|
|
73
|
+
messages.append(assistant_message)
|
|
74
|
+
|
|
75
|
+
# Check for tool calls
|
|
76
|
+
tool_calls = assistant_message.get("tool_calls", [])
|
|
77
|
+
|
|
78
|
+
if not tool_calls:
|
|
79
|
+
# No tool calls - we're done
|
|
80
|
+
await ctx.emit(EventType.ASSISTANT_MESSAGE, {
|
|
81
|
+
"content": assistant_message.get("content", ""),
|
|
82
|
+
"role": "assistant",
|
|
83
|
+
})
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
# Execute tool calls
|
|
87
|
+
for tool_call in tool_calls:
|
|
88
|
+
func = tool_call.get("function", {})
|
|
89
|
+
tool_name = func.get("name", "")
|
|
90
|
+
tool_args = json.loads(func.get("arguments", "{}"))
|
|
91
|
+
|
|
92
|
+
# Emit tool call event
|
|
93
|
+
await ctx.emit(EventType.TOOL_CALL, {
|
|
94
|
+
"id": tool_call.get("id"),
|
|
95
|
+
"name": tool_name,
|
|
96
|
+
"arguments": tool_args,
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
# Execute tool
|
|
100
|
+
try:
|
|
101
|
+
result = await ctx.tool_registry.execute(tool_name, tool_args)
|
|
102
|
+
result_str = json.dumps(result) if not isinstance(result, str) else result
|
|
103
|
+
except Exception as e:
|
|
104
|
+
result_str = f"Error: {e}"
|
|
105
|
+
|
|
106
|
+
# Emit tool result event
|
|
107
|
+
await ctx.emit(EventType.TOOL_RESULT, {
|
|
108
|
+
"id": tool_call.get("id"),
|
|
109
|
+
"name": tool_name,
|
|
110
|
+
"result": result_str,
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
# Add tool result to messages
|
|
114
|
+
messages.append({
|
|
115
|
+
"role": "tool",
|
|
116
|
+
"tool_call_id": tool_call.get("id"),
|
|
117
|
+
"content": result_str,
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
return RunResult(
|
|
121
|
+
final_output={"response": messages[-1].get("content", "")},
|
|
122
|
+
final_messages=messages,
|
|
123
|
+
usage=total_usage,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def _register_tools(self, registry):
|
|
127
|
+
"""Register example tools."""
|
|
128
|
+
# Example: Calculator tool
|
|
129
|
+
async def calculate(expression: str) -> str:
|
|
130
|
+
"""Evaluate a math expression."""
|
|
131
|
+
try:
|
|
132
|
+
result = eval(expression, {"__builtins__": {}}, {})
|
|
133
|
+
return str(result)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
return f"Error: {e}"
|
|
136
|
+
|
|
137
|
+
registry.register(ToolDefinition(
|
|
138
|
+
name="calculate",
|
|
139
|
+
description="Evaluate a mathematical expression",
|
|
140
|
+
parameters={
|
|
141
|
+
"type": "object",
|
|
142
|
+
"properties": {
|
|
143
|
+
"expression": {
|
|
144
|
+
"type": "string",
|
|
145
|
+
"description": "The math expression to evaluate",
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
"required": ["expression"],
|
|
149
|
+
},
|
|
150
|
+
handler=calculate,
|
|
151
|
+
))
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def register():
|
|
155
|
+
"""Register the tool agent runtime."""
|
|
156
|
+
register_runtime(ToolAgentRuntime())
|
|
157
|
+
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Management command to run agent workers.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
./manage.py runagent
|
|
6
|
+
./manage.py runagent --processes 4 --concurrency 20
|
|
7
|
+
./manage.py runagent --queue redis --agent-keys my-agent,other-agent
|
|
8
|
+
./manage.py runagent --noreload # Disable auto-reload in DEBUG mode
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
import multiprocessing
|
|
14
|
+
import os
|
|
15
|
+
import signal
|
|
16
|
+
import sys
|
|
17
|
+
import uuid
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from django.conf import settings as django_settings
|
|
22
|
+
from django.core.management.base import BaseCommand
|
|
23
|
+
from django.utils import autoreload
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
# Check DEBUG mode
|
|
28
|
+
DEBUG = getattr(django_settings, 'DEBUG', False)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def debug_print(msg: str):
|
|
32
|
+
"""Print debug message if Django DEBUG is True."""
|
|
33
|
+
if DEBUG:
|
|
34
|
+
print(f"[agent-worker] {msg}", flush=True)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Command(BaseCommand):
|
|
38
|
+
help = "Run agent workers to process agent runs"
|
|
39
|
+
|
|
40
|
+
# Validation is called explicitly each time the worker restarts with autoreload
|
|
41
|
+
requires_system_checks = []
|
|
42
|
+
suppressed_base_arguments = {"--verbosity", "--traceback"}
|
|
43
|
+
|
|
44
|
+
def add_arguments(self, parser):
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--processes",
|
|
47
|
+
type=int,
|
|
48
|
+
default=None,
|
|
49
|
+
help="Number of worker processes (default: from settings or 1)",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--concurrency",
|
|
53
|
+
type=int,
|
|
54
|
+
default=None,
|
|
55
|
+
help="Async concurrency per process (default: from settings or 10)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--queue",
|
|
59
|
+
type=str,
|
|
60
|
+
default=None,
|
|
61
|
+
choices=["postgres", "redis_streams"],
|
|
62
|
+
help="Queue backend (default: from settings)",
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--agent-keys",
|
|
66
|
+
type=str,
|
|
67
|
+
default=None,
|
|
68
|
+
help="Comma-separated list of agent keys to process (default: all)",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--lease-ttl-seconds",
|
|
72
|
+
type=int,
|
|
73
|
+
default=None,
|
|
74
|
+
help="Lease TTL in seconds (default: from settings)",
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--run-timeout-seconds",
|
|
78
|
+
type=int,
|
|
79
|
+
default=None,
|
|
80
|
+
help="Run timeout in seconds (default: from settings)",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"--max-attempts",
|
|
84
|
+
type=int,
|
|
85
|
+
default=None,
|
|
86
|
+
help="Max retry attempts (default: from settings)",
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--worker-id",
|
|
90
|
+
type=str,
|
|
91
|
+
default=None,
|
|
92
|
+
help="Worker ID (default: auto-generated)",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--noreload",
|
|
96
|
+
action="store_true",
|
|
97
|
+
help="Disable auto-reload when code changes (only applies in DEBUG mode)",
|
|
98
|
+
)
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--skip-checks",
|
|
101
|
+
action="store_true",
|
|
102
|
+
help="Skip system checks.",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def handle(self, *args, **options):
|
|
106
|
+
# In DEBUG mode with autoreload enabled, use Django's autoreloader
|
|
107
|
+
use_reloader = DEBUG and not options.get("noreload", False)
|
|
108
|
+
|
|
109
|
+
if use_reloader:
|
|
110
|
+
# Note: autoreload only works well with single process mode
|
|
111
|
+
if options.get("processes") and options["processes"] > 1:
|
|
112
|
+
self.stdout.write(
|
|
113
|
+
self.style.WARNING(
|
|
114
|
+
"Auto-reload is not compatible with multi-process mode. "
|
|
115
|
+
"Using --noreload or set processes=1 for auto-reload."
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
self._run_inner(*args, **options)
|
|
119
|
+
else:
|
|
120
|
+
# Force single process for autoreload
|
|
121
|
+
options["processes"] = 1
|
|
122
|
+
autoreload.run_with_reloader(self._run_inner, *args, **options)
|
|
123
|
+
else:
|
|
124
|
+
self._run_inner(*args, **options)
|
|
125
|
+
|
|
126
|
+
def _run_inner(self, *args, **options):
|
|
127
|
+
"""Inner run method - called directly or via autoreloader."""
|
|
128
|
+
# If an exception was silenced in ManagementUtility.execute in order
|
|
129
|
+
# to be raised in the child process, raise it now.
|
|
130
|
+
if DEBUG and not options.get("noreload", False):
|
|
131
|
+
autoreload.raise_last_exception()
|
|
132
|
+
|
|
133
|
+
if not options.get("skip_checks", False):
|
|
134
|
+
self.check(display_num_errors=True)
|
|
135
|
+
|
|
136
|
+
from django_agent_runtime.conf import runtime_settings
|
|
137
|
+
|
|
138
|
+
settings = runtime_settings()
|
|
139
|
+
|
|
140
|
+
# Get configuration
|
|
141
|
+
processes = options["processes"] or settings.DEFAULT_PROCESSES
|
|
142
|
+
concurrency = options["concurrency"] or settings.DEFAULT_CONCURRENCY
|
|
143
|
+
queue_backend = options["queue"] or settings.QUEUE_BACKEND
|
|
144
|
+
agent_keys = options["agent_keys"]
|
|
145
|
+
if agent_keys:
|
|
146
|
+
agent_keys = [k.strip() for k in agent_keys.split(",")]
|
|
147
|
+
|
|
148
|
+
# Print startup info
|
|
149
|
+
now = datetime.now().strftime("%B %d, %Y - %X")
|
|
150
|
+
quit_command = "CTRL-BREAK" if sys.platform == "win32" else "CONTROL-C"
|
|
151
|
+
|
|
152
|
+
self.stdout.write(f"{now}")
|
|
153
|
+
self.stdout.write(
|
|
154
|
+
self.style.SUCCESS(
|
|
155
|
+
f"Starting agent runtime with {processes} process(es), "
|
|
156
|
+
f"{concurrency} concurrent tasks each"
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
self.stdout.write(f"Queue backend: {queue_backend}")
|
|
160
|
+
if agent_keys:
|
|
161
|
+
self.stdout.write(f"Agent keys: {agent_keys}")
|
|
162
|
+
|
|
163
|
+
if DEBUG:
|
|
164
|
+
if not options.get("noreload", False):
|
|
165
|
+
self.stdout.write(
|
|
166
|
+
self.style.WARNING("DEBUG mode: auto-reload enabled (use --noreload to disable)")
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
self.stdout.write(self.style.WARNING("DEBUG mode: verbose logging enabled"))
|
|
170
|
+
|
|
171
|
+
self.stdout.write(f"Quit with {quit_command}.")
|
|
172
|
+
|
|
173
|
+
if processes == 1:
|
|
174
|
+
# Single process mode - run directly
|
|
175
|
+
self._run_worker(
|
|
176
|
+
worker_num=0,
|
|
177
|
+
concurrency=concurrency,
|
|
178
|
+
queue_backend=queue_backend,
|
|
179
|
+
agent_keys=agent_keys,
|
|
180
|
+
options=options,
|
|
181
|
+
)
|
|
182
|
+
else:
|
|
183
|
+
# Multi-process mode
|
|
184
|
+
self._run_multiprocess(
|
|
185
|
+
processes=processes,
|
|
186
|
+
concurrency=concurrency,
|
|
187
|
+
queue_backend=queue_backend,
|
|
188
|
+
agent_keys=agent_keys,
|
|
189
|
+
options=options,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def _run_multiprocess(
|
|
193
|
+
self,
|
|
194
|
+
processes: int,
|
|
195
|
+
concurrency: int,
|
|
196
|
+
queue_backend: str,
|
|
197
|
+
agent_keys: Optional[list[str]],
|
|
198
|
+
options: dict,
|
|
199
|
+
):
|
|
200
|
+
"""Run multiple worker processes."""
|
|
201
|
+
workers = []
|
|
202
|
+
|
|
203
|
+
def signal_handler(signum, frame):
|
|
204
|
+
self.stdout.write("\nShutting down workers...")
|
|
205
|
+
for p in workers:
|
|
206
|
+
p.terminate()
|
|
207
|
+
for p in workers:
|
|
208
|
+
p.join(timeout=30)
|
|
209
|
+
sys.exit(0)
|
|
210
|
+
|
|
211
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
212
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
213
|
+
|
|
214
|
+
for i in range(processes):
|
|
215
|
+
p = multiprocessing.Process(
|
|
216
|
+
target=self._run_worker,
|
|
217
|
+
args=(i, concurrency, queue_backend, agent_keys, options),
|
|
218
|
+
)
|
|
219
|
+
p.start()
|
|
220
|
+
workers.append(p)
|
|
221
|
+
self.stdout.write(f"Started worker process {i} (PID: {p.pid})")
|
|
222
|
+
|
|
223
|
+
# Wait for all workers
|
|
224
|
+
for p in workers:
|
|
225
|
+
p.join()
|
|
226
|
+
|
|
227
|
+
def _run_worker(
|
|
228
|
+
self,
|
|
229
|
+
worker_num: int,
|
|
230
|
+
concurrency: int,
|
|
231
|
+
queue_backend: str,
|
|
232
|
+
agent_keys: Optional[list[str]],
|
|
233
|
+
options: dict,
|
|
234
|
+
):
|
|
235
|
+
"""Run a single worker process."""
|
|
236
|
+
# Generate worker ID
|
|
237
|
+
worker_id = options.get("worker_id") or f"worker-{worker_num}-{uuid.uuid4().hex[:8]}"
|
|
238
|
+
|
|
239
|
+
debug_print(f"Worker {worker_id} starting...")
|
|
240
|
+
|
|
241
|
+
# Run the async worker loop
|
|
242
|
+
asyncio.run(
|
|
243
|
+
self._async_worker_loop(
|
|
244
|
+
worker_id=worker_id,
|
|
245
|
+
concurrency=concurrency,
|
|
246
|
+
queue_backend=queue_backend,
|
|
247
|
+
agent_keys=agent_keys,
|
|
248
|
+
options=options,
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
async def _async_worker_loop(
|
|
253
|
+
self,
|
|
254
|
+
worker_id: str,
|
|
255
|
+
concurrency: int,
|
|
256
|
+
queue_backend: str,
|
|
257
|
+
agent_keys: Optional[list[str]],
|
|
258
|
+
options: dict,
|
|
259
|
+
):
|
|
260
|
+
"""Main async worker loop."""
|
|
261
|
+
from django_agent_runtime.conf import runtime_settings
|
|
262
|
+
from django_agent_runtime.runtime.queue import get_queue
|
|
263
|
+
from django_agent_runtime.runtime.events import get_event_bus
|
|
264
|
+
from django_agent_runtime.runtime.runner import AgentRunner
|
|
265
|
+
from django_agent_runtime.runtime.tracing import get_trace_sink
|
|
266
|
+
|
|
267
|
+
settings = runtime_settings()
|
|
268
|
+
|
|
269
|
+
# Initialize queue
|
|
270
|
+
queue_kwargs = {"lease_ttl_seconds": options.get("lease_ttl_seconds") or settings.LEASE_TTL_SECONDS}
|
|
271
|
+
if queue_backend == "redis_streams":
|
|
272
|
+
queue_kwargs["redis_url"] = settings.REDIS_URL
|
|
273
|
+
|
|
274
|
+
queue = get_queue(queue_backend, **queue_kwargs)
|
|
275
|
+
debug_print(f"Queue initialized: {queue_backend}")
|
|
276
|
+
|
|
277
|
+
# Initialize event bus
|
|
278
|
+
event_bus_kwargs = {}
|
|
279
|
+
if settings.EVENT_BUS_BACKEND == "redis":
|
|
280
|
+
event_bus_kwargs["redis_url"] = settings.REDIS_URL
|
|
281
|
+
event_bus_kwargs["persist_to_db"] = True
|
|
282
|
+
event_bus_kwargs["persist_token_deltas"] = settings.PERSIST_TOKEN_DELTAS
|
|
283
|
+
|
|
284
|
+
event_bus = get_event_bus(settings.EVENT_BUS_BACKEND, **event_bus_kwargs)
|
|
285
|
+
debug_print(f"Event bus initialized: {settings.EVENT_BUS_BACKEND}")
|
|
286
|
+
|
|
287
|
+
# Initialize trace sink
|
|
288
|
+
trace_sink = get_trace_sink()
|
|
289
|
+
|
|
290
|
+
# Create runner
|
|
291
|
+
runner = AgentRunner(
|
|
292
|
+
worker_id=worker_id,
|
|
293
|
+
queue=queue,
|
|
294
|
+
event_bus=event_bus,
|
|
295
|
+
trace_sink=trace_sink,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
print(f"[agent-worker] Worker {worker_id} ready, polling for runs...", flush=True)
|
|
299
|
+
|
|
300
|
+
# Semaphore for concurrency control
|
|
301
|
+
semaphore = asyncio.Semaphore(concurrency)
|
|
302
|
+
|
|
303
|
+
# Shutdown event
|
|
304
|
+
shutdown_event = asyncio.Event()
|
|
305
|
+
|
|
306
|
+
# Handle signals
|
|
307
|
+
loop = asyncio.get_event_loop()
|
|
308
|
+
|
|
309
|
+
def handle_shutdown():
|
|
310
|
+
print(f"[agent-worker] Worker {worker_id} shutting down...", flush=True)
|
|
311
|
+
shutdown_event.set()
|
|
312
|
+
|
|
313
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
314
|
+
try:
|
|
315
|
+
loop.add_signal_handler(sig, handle_shutdown)
|
|
316
|
+
except NotImplementedError:
|
|
317
|
+
# Windows doesn't support add_signal_handler
|
|
318
|
+
pass
|
|
319
|
+
|
|
320
|
+
# Background task for lease recovery
|
|
321
|
+
recovery_task = asyncio.create_task(
|
|
322
|
+
self._lease_recovery_loop(queue, shutdown_event)
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Main processing loop
|
|
326
|
+
active_tasks: set[asyncio.Task] = set()
|
|
327
|
+
poll_count = 0
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
while not shutdown_event.is_set():
|
|
331
|
+
# Wait for semaphore slot
|
|
332
|
+
await semaphore.acquire()
|
|
333
|
+
|
|
334
|
+
if shutdown_event.is_set():
|
|
335
|
+
semaphore.release()
|
|
336
|
+
break
|
|
337
|
+
|
|
338
|
+
# Try to claim a run
|
|
339
|
+
runs = await queue.claim(
|
|
340
|
+
worker_id=worker_id,
|
|
341
|
+
agent_keys=agent_keys,
|
|
342
|
+
batch_size=1,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if not runs:
|
|
346
|
+
semaphore.release()
|
|
347
|
+
poll_count += 1
|
|
348
|
+
# Log every 60 polls (roughly every minute at 1s interval)
|
|
349
|
+
if DEBUG and poll_count % 60 == 0:
|
|
350
|
+
debug_print(f"Polling... (no runs in queue)")
|
|
351
|
+
# No work available, wait a bit
|
|
352
|
+
try:
|
|
353
|
+
await asyncio.wait_for(
|
|
354
|
+
shutdown_event.wait(),
|
|
355
|
+
timeout=1.0,
|
|
356
|
+
)
|
|
357
|
+
except asyncio.TimeoutError:
|
|
358
|
+
pass
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
# Reset poll count when we get work
|
|
362
|
+
poll_count = 0
|
|
363
|
+
|
|
364
|
+
# Process the run
|
|
365
|
+
run = runs[0]
|
|
366
|
+
print(f"[agent-worker] Claimed run {run.run_id} (agent={run.agent_key})", flush=True)
|
|
367
|
+
|
|
368
|
+
async def process_run(r):
|
|
369
|
+
try:
|
|
370
|
+
await runner.run_once(r)
|
|
371
|
+
debug_print(f"Run {r.run_id} completed")
|
|
372
|
+
except Exception as e:
|
|
373
|
+
print(f"[agent-worker] ERROR processing run {r.run_id}: {e}", flush=True)
|
|
374
|
+
finally:
|
|
375
|
+
semaphore.release()
|
|
376
|
+
|
|
377
|
+
task = asyncio.create_task(process_run(run))
|
|
378
|
+
active_tasks.add(task)
|
|
379
|
+
task.add_done_callback(active_tasks.discard)
|
|
380
|
+
|
|
381
|
+
finally:
|
|
382
|
+
# Wait for active tasks to complete
|
|
383
|
+
if active_tasks:
|
|
384
|
+
print(f"[agent-worker] Waiting for {len(active_tasks)} active tasks...", flush=True)
|
|
385
|
+
await asyncio.gather(*active_tasks, return_exceptions=True)
|
|
386
|
+
|
|
387
|
+
# Cancel recovery task
|
|
388
|
+
recovery_task.cancel()
|
|
389
|
+
try:
|
|
390
|
+
await recovery_task
|
|
391
|
+
except asyncio.CancelledError:
|
|
392
|
+
pass
|
|
393
|
+
|
|
394
|
+
# Cleanup
|
|
395
|
+
await queue.close()
|
|
396
|
+
await event_bus.close()
|
|
397
|
+
|
|
398
|
+
print(f"[agent-worker] Worker {worker_id} stopped", flush=True)
|
|
399
|
+
|
|
400
|
+
async def _lease_recovery_loop(self, queue, shutdown_event: asyncio.Event):
|
|
401
|
+
"""Periodically recover expired leases."""
|
|
402
|
+
while not shutdown_event.is_set():
|
|
403
|
+
try:
|
|
404
|
+
await asyncio.wait_for(
|
|
405
|
+
shutdown_event.wait(),
|
|
406
|
+
timeout=60.0, # Check every minute
|
|
407
|
+
)
|
|
408
|
+
except asyncio.TimeoutError:
|
|
409
|
+
pass
|
|
410
|
+
|
|
411
|
+
if shutdown_event.is_set():
|
|
412
|
+
break
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
recovered = await queue.recover_expired_leases()
|
|
416
|
+
if recovered:
|
|
417
|
+
print(f"[agent-worker] Recovered {recovered} expired leases", flush=True)
|
|
418
|
+
except Exception as e:
|
|
419
|
+
print(f"[agent-worker] Error recovering leases: {e}", flush=True)
|