django-agent-runtime 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. django_agent_runtime/__init__.py +25 -0
  2. django_agent_runtime/admin.py +155 -0
  3. django_agent_runtime/api/__init__.py +26 -0
  4. django_agent_runtime/api/permissions.py +109 -0
  5. django_agent_runtime/api/serializers.py +114 -0
  6. django_agent_runtime/api/views.py +472 -0
  7. django_agent_runtime/apps.py +26 -0
  8. django_agent_runtime/conf.py +241 -0
  9. django_agent_runtime/examples/__init__.py +10 -0
  10. django_agent_runtime/examples/langgraph_adapter.py +164 -0
  11. django_agent_runtime/examples/langgraph_tools.py +179 -0
  12. django_agent_runtime/examples/simple_chat.py +69 -0
  13. django_agent_runtime/examples/tool_agent.py +157 -0
  14. django_agent_runtime/management/__init__.py +2 -0
  15. django_agent_runtime/management/commands/__init__.py +2 -0
  16. django_agent_runtime/management/commands/runagent.py +419 -0
  17. django_agent_runtime/migrations/0001_initial.py +117 -0
  18. django_agent_runtime/migrations/0002_persistence_models.py +129 -0
  19. django_agent_runtime/migrations/0003_persistenceconversation_active_branch_id_and_more.py +212 -0
  20. django_agent_runtime/migrations/0004_add_anonymous_session_id.py +18 -0
  21. django_agent_runtime/migrations/__init__.py +2 -0
  22. django_agent_runtime/models/__init__.py +54 -0
  23. django_agent_runtime/models/base.py +450 -0
  24. django_agent_runtime/models/concrete.py +146 -0
  25. django_agent_runtime/persistence/__init__.py +60 -0
  26. django_agent_runtime/persistence/helpers.py +148 -0
  27. django_agent_runtime/persistence/models.py +506 -0
  28. django_agent_runtime/persistence/stores.py +1191 -0
  29. django_agent_runtime/runtime/__init__.py +23 -0
  30. django_agent_runtime/runtime/events/__init__.py +65 -0
  31. django_agent_runtime/runtime/events/base.py +135 -0
  32. django_agent_runtime/runtime/events/db.py +129 -0
  33. django_agent_runtime/runtime/events/redis.py +228 -0
  34. django_agent_runtime/runtime/events/sync.py +140 -0
  35. django_agent_runtime/runtime/interfaces.py +475 -0
  36. django_agent_runtime/runtime/llm/__init__.py +91 -0
  37. django_agent_runtime/runtime/llm/anthropic.py +249 -0
  38. django_agent_runtime/runtime/llm/litellm_adapter.py +173 -0
  39. django_agent_runtime/runtime/llm/openai.py +230 -0
  40. django_agent_runtime/runtime/queue/__init__.py +75 -0
  41. django_agent_runtime/runtime/queue/base.py +158 -0
  42. django_agent_runtime/runtime/queue/postgres.py +248 -0
  43. django_agent_runtime/runtime/queue/redis_streams.py +336 -0
  44. django_agent_runtime/runtime/queue/sync.py +277 -0
  45. django_agent_runtime/runtime/registry.py +186 -0
  46. django_agent_runtime/runtime/runner.py +540 -0
  47. django_agent_runtime/runtime/tracing/__init__.py +48 -0
  48. django_agent_runtime/runtime/tracing/langfuse.py +117 -0
  49. django_agent_runtime/runtime/tracing/noop.py +36 -0
  50. django_agent_runtime/urls.py +39 -0
  51. django_agent_runtime-0.3.6.dist-info/METADATA +723 -0
  52. django_agent_runtime-0.3.6.dist-info/RECORD +55 -0
  53. django_agent_runtime-0.3.6.dist-info/WHEEL +5 -0
  54. django_agent_runtime-0.3.6.dist-info/licenses/LICENSE +22 -0
  55. django_agent_runtime-0.3.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,336 @@
1
+ """
2
+ Redis Streams-backed queue with consumer groups.
3
+
4
+ Higher throughput than Postgres queue, recommended for production.
5
+ Database remains authoritative - Redis is used for distribution only.
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timedelta, timezone
10
+ from typing import Optional
11
+ from uuid import UUID
12
+
13
+ from asgiref.sync import sync_to_async
14
+ from django.db import transaction
15
+
16
+ from django_agent_runtime.models import AgentRun
17
+ from django_agent_runtime.models.base import RunStatus
18
+ from django_agent_runtime.runtime.queue.base import RunQueue, QueuedRun
19
+
20
+ try:
21
+ import redis.asyncio as aioredis
22
+ except ImportError:
23
+ aioredis = None
24
+
25
+
26
+ class RedisStreamsQueue(RunQueue):
27
+ """
28
+ Redis Streams-backed queue implementation.
29
+
30
+ Uses consumer groups for distributed processing.
31
+ Database is still the source of truth - Redis handles distribution.
32
+ """
33
+
34
+ STREAM_KEY = "agent_runtime:runs"
35
+ GROUP_NAME = "agent_workers"
36
+
37
+ def __init__(
38
+ self,
39
+ redis_url: str,
40
+ lease_ttl_seconds: int = 30,
41
+ stream_key: Optional[str] = None,
42
+ group_name: Optional[str] = None,
43
+ ):
44
+ if aioredis is None:
45
+ raise ImportError("redis package is required for RedisStreamsQueue")
46
+
47
+ self.redis_url = redis_url
48
+ self.lease_ttl_seconds = lease_ttl_seconds
49
+ self.stream_key = stream_key or self.STREAM_KEY
50
+ self.group_name = group_name or self.GROUP_NAME
51
+ self._redis: Optional[aioredis.Redis] = None
52
+
53
+ async def _get_redis(self) -> "aioredis.Redis":
54
+ """Get or create Redis connection."""
55
+ if self._redis is None:
56
+ self._redis = aioredis.from_url(self.redis_url)
57
+ # Ensure consumer group exists
58
+ try:
59
+ await self._redis.xgroup_create(
60
+ self.stream_key, self.group_name, id="0", mkstream=True
61
+ )
62
+ except aioredis.ResponseError as e:
63
+ if "BUSYGROUP" not in str(e):
64
+ raise
65
+ return self._redis
66
+
67
+ async def enqueue(self, run_id: UUID, agent_key: str) -> None:
68
+ """
69
+ Add a run to the stream.
70
+
71
+ Called when a new run is created.
72
+ """
73
+ redis = await self._get_redis()
74
+ await redis.xadd(
75
+ self.stream_key,
76
+ {"run_id": str(run_id), "agent_key": agent_key},
77
+ )
78
+
79
+ async def claim(
80
+ self,
81
+ worker_id: str,
82
+ agent_keys: Optional[list[str]] = None,
83
+ batch_size: int = 1,
84
+ ) -> list[QueuedRun]:
85
+ """Claim runs from the stream using consumer groups."""
86
+ redis = await self._get_redis()
87
+ now = datetime.now(timezone.utc)
88
+ lease_expires = now + timedelta(seconds=self.lease_ttl_seconds)
89
+
90
+ # Read from consumer group
91
+ messages = await redis.xreadgroup(
92
+ self.group_name,
93
+ worker_id,
94
+ {self.stream_key: ">"},
95
+ count=batch_size,
96
+ block=1000, # 1 second block
97
+ )
98
+
99
+ if not messages:
100
+ return []
101
+
102
+ claimed = []
103
+ for stream_name, stream_messages in messages:
104
+ for msg_id, data in stream_messages:
105
+ run_id = UUID(data[b"run_id"].decode())
106
+ agent_key = data[b"agent_key"].decode()
107
+
108
+ # Filter by agent_keys if specified
109
+ if agent_keys and agent_key not in agent_keys:
110
+ # Acknowledge but don't process
111
+ await redis.xack(self.stream_key, self.group_name, msg_id)
112
+ continue
113
+
114
+ # Update database with lease
115
+ run = await self._claim_in_db(run_id, worker_id, lease_expires)
116
+ if run:
117
+ claimed.append(run)
118
+ # Acknowledge the message
119
+ await redis.xack(self.stream_key, self.group_name, msg_id)
120
+ else:
121
+ # Run not found or already claimed, acknowledge anyway
122
+ await redis.xack(self.stream_key, self.group_name, msg_id)
123
+
124
+ return claimed
125
+
126
+ @sync_to_async
127
+ def _claim_in_db(
128
+ self, run_id: UUID, worker_id: str, lease_expires: datetime
129
+ ) -> Optional[QueuedRun]:
130
+ """Claim run in database."""
131
+ now = datetime.now(timezone.utc)
132
+
133
+ with transaction.atomic():
134
+ try:
135
+ run = AgentRun.objects.select_for_update(nowait=True).get(
136
+ id=run_id,
137
+ status__in=[RunStatus.QUEUED, RunStatus.RUNNING],
138
+ )
139
+ except (AgentRun.DoesNotExist, Exception):
140
+ return None
141
+
142
+ # Check if already claimed by another worker
143
+ if run.status == RunStatus.RUNNING and run.lease_expires_at > now:
144
+ return None
145
+
146
+ run.status = RunStatus.RUNNING
147
+ run.lease_owner = worker_id
148
+ run.lease_expires_at = lease_expires
149
+ if run.started_at is None:
150
+ run.started_at = now
151
+ run.save()
152
+
153
+ return QueuedRun(
154
+ run_id=run.id,
155
+ agent_key=run.agent_key,
156
+ attempt=run.attempt,
157
+ lease_expires_at=lease_expires,
158
+ input=run.input,
159
+ metadata=run.metadata,
160
+ )
161
+
162
+ async def extend_lease(self, run_id: UUID, worker_id: str, seconds: int) -> bool:
163
+ """Extend lease in database."""
164
+
165
+ @sync_to_async
166
+ def _extend():
167
+ now = datetime.now(timezone.utc)
168
+ new_expires = now + timedelta(seconds=seconds)
169
+
170
+ updated = AgentRun.objects.filter(
171
+ id=run_id,
172
+ lease_owner=worker_id,
173
+ status=RunStatus.RUNNING,
174
+ ).update(lease_expires_at=new_expires)
175
+
176
+ return updated > 0
177
+
178
+ return await _extend()
179
+
180
+ async def release(
181
+ self,
182
+ run_id: UUID,
183
+ worker_id: str,
184
+ success: bool,
185
+ output: Optional[dict] = None,
186
+ error: Optional[dict] = None,
187
+ ) -> None:
188
+ """Release run after completion."""
189
+
190
+ @sync_to_async
191
+ def _release():
192
+ now = datetime.now(timezone.utc)
193
+
194
+ updates = {
195
+ "status": RunStatus.SUCCEEDED if success else RunStatus.FAILED,
196
+ "finished_at": now,
197
+ "lease_owner": "",
198
+ "lease_expires_at": None,
199
+ }
200
+
201
+ if output:
202
+ updates["output"] = output
203
+ if error:
204
+ updates["error"] = error
205
+
206
+ AgentRun.objects.filter(
207
+ id=run_id,
208
+ lease_owner=worker_id,
209
+ ).update(**updates)
210
+
211
+ await _release()
212
+
213
+ async def requeue_for_retry(
214
+ self,
215
+ run_id: UUID,
216
+ worker_id: str,
217
+ error: dict,
218
+ delay_seconds: int = 0,
219
+ ) -> bool:
220
+ """Requeue for retry - re-add to stream."""
221
+
222
+ @sync_to_async
223
+ def _check_and_update():
224
+ with transaction.atomic():
225
+ try:
226
+ run = AgentRun.objects.select_for_update().get(
227
+ id=run_id, lease_owner=worker_id
228
+ )
229
+ except AgentRun.DoesNotExist:
230
+ return None
231
+
232
+ if run.attempt >= run.max_attempts:
233
+ run.status = RunStatus.FAILED
234
+ run.error = error
235
+ run.finished_at = datetime.now(timezone.utc)
236
+ run.lease_owner = ""
237
+ run.lease_expires_at = None
238
+ run.save()
239
+ return None
240
+
241
+ run.status = RunStatus.QUEUED
242
+ run.attempt += 1
243
+ run.error = error
244
+ run.lease_owner = ""
245
+ run.lease_expires_at = None
246
+ run.save()
247
+ return run.agent_key
248
+
249
+ agent_key = await _check_and_update()
250
+ if agent_key:
251
+ # Re-add to stream
252
+ await self.enqueue(run_id, agent_key)
253
+ return True
254
+ return False
255
+
256
+ async def cancel(self, run_id: UUID) -> bool:
257
+ """Mark run for cancellation."""
258
+
259
+ @sync_to_async
260
+ def _cancel():
261
+ now = datetime.now(timezone.utc)
262
+ updated = AgentRun.objects.filter(
263
+ id=run_id,
264
+ status__in=[RunStatus.QUEUED, RunStatus.RUNNING],
265
+ ).update(cancel_requested_at=now)
266
+ return updated > 0
267
+
268
+ return await _cancel()
269
+
270
+ async def is_cancelled(self, run_id: UUID) -> bool:
271
+ """Check if cancellation was requested."""
272
+
273
+ @sync_to_async
274
+ def _is_cancelled():
275
+ try:
276
+ run = AgentRun.objects.get(id=run_id)
277
+ return run.cancel_requested_at is not None
278
+ except AgentRun.DoesNotExist:
279
+ return False
280
+
281
+ return await _is_cancelled()
282
+
283
+ async def recover_expired_leases(self) -> int:
284
+ """Recover runs with expired leases and re-add to stream."""
285
+ redis = await self._get_redis()
286
+
287
+ @sync_to_async
288
+ def _get_expired():
289
+ now = datetime.now(timezone.utc)
290
+ return list(
291
+ AgentRun.objects.filter(
292
+ status=RunStatus.RUNNING,
293
+ lease_expires_at__lt=now,
294
+ ).values("id", "agent_key", "attempt", "max_attempts")
295
+ )
296
+
297
+ expired = await _get_expired()
298
+
299
+ @sync_to_async
300
+ def _update_run(run_data):
301
+ now = datetime.now(timezone.utc)
302
+ run = AgentRun.objects.get(id=run_data["id"])
303
+
304
+ if run_data["attempt"] >= run_data["max_attempts"]:
305
+ run.status = RunStatus.TIMED_OUT
306
+ run.finished_at = now
307
+ run.error = {
308
+ "type": "LeaseExpired",
309
+ "message": "Worker lease expired without completion",
310
+ "retriable": False,
311
+ }
312
+ requeue = False
313
+ else:
314
+ run.status = RunStatus.QUEUED
315
+ run.attempt += 1
316
+ requeue = True
317
+
318
+ run.lease_owner = ""
319
+ run.lease_expires_at = None
320
+ run.save()
321
+ return requeue
322
+
323
+ count = 0
324
+ for run_data in expired:
325
+ requeue = await _update_run(run_data)
326
+ if requeue:
327
+ await self.enqueue(run_data["id"], run_data["agent_key"])
328
+ count += 1
329
+
330
+ return count
331
+
332
+ async def close(self) -> None:
333
+ """Close Redis connection."""
334
+ if self._redis:
335
+ await self._redis.close()
336
+ self._redis = None
@@ -0,0 +1,277 @@
1
+ """
2
+ Synchronous queue implementations.
3
+
4
+ These are for use in sync contexts like management commands, Celery tasks,
5
+ and traditional Django views.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from datetime import datetime, timedelta, timezone
10
+ from typing import Optional
11
+ from uuid import UUID
12
+
13
+ from django.db import transaction
14
+ from django.db.models import F, Q
15
+
16
+ from django_agent_runtime.models import AgentRun
17
+ from django_agent_runtime.models.base import RunStatus
18
+ from django_agent_runtime.runtime.queue.base import QueuedRun
19
+
20
+
21
+ class SyncRunQueue(ABC):
22
+ """
23
+ Synchronous interface for run queue implementations.
24
+
25
+ Use this in sync contexts like management commands, Celery tasks,
26
+ and traditional Django views.
27
+ """
28
+
29
+ @abstractmethod
30
+ def claim(
31
+ self,
32
+ worker_id: str,
33
+ agent_keys: Optional[list[str]] = None,
34
+ batch_size: int = 1,
35
+ ) -> list[QueuedRun]:
36
+ """Claim runs from the queue."""
37
+ ...
38
+
39
+ @abstractmethod
40
+ def extend_lease(self, run_id: UUID, worker_id: str, seconds: int) -> bool:
41
+ """Extend the lease on a run (heartbeat)."""
42
+ ...
43
+
44
+ @abstractmethod
45
+ def release(
46
+ self,
47
+ run_id: UUID,
48
+ worker_id: str,
49
+ success: bool,
50
+ output: Optional[dict] = None,
51
+ error: Optional[dict] = None,
52
+ ) -> None:
53
+ """Release a run after completion."""
54
+ ...
55
+
56
+ @abstractmethod
57
+ def requeue_for_retry(
58
+ self,
59
+ run_id: UUID,
60
+ worker_id: str,
61
+ error: dict,
62
+ delay_seconds: int = 0,
63
+ ) -> bool:
64
+ """Requeue a run for retry."""
65
+ ...
66
+
67
+ @abstractmethod
68
+ def cancel(self, run_id: UUID) -> bool:
69
+ """Mark a run for cancellation."""
70
+ ...
71
+
72
+ @abstractmethod
73
+ def is_cancelled(self, run_id: UUID) -> bool:
74
+ """Check if a run has been cancelled."""
75
+ ...
76
+
77
+ @abstractmethod
78
+ def recover_expired_leases(self) -> int:
79
+ """Recover runs with expired leases."""
80
+ ...
81
+
82
+ def close(self) -> None:
83
+ """Close any connections. Override if needed."""
84
+ pass
85
+
86
+
87
+ class SyncPostgresQueue(SyncRunQueue):
88
+ """
89
+ Synchronous PostgreSQL-backed queue implementation.
90
+
91
+ Uses SELECT FOR UPDATE SKIP LOCKED for atomic claiming.
92
+ """
93
+
94
+ def __init__(self, lease_ttl_seconds: int = 30):
95
+ self.lease_ttl_seconds = lease_ttl_seconds
96
+
97
+ def claim(
98
+ self,
99
+ worker_id: str,
100
+ agent_keys: Optional[list[str]] = None,
101
+ batch_size: int = 1,
102
+ ) -> list[QueuedRun]:
103
+ """Claim runs using SELECT FOR UPDATE SKIP LOCKED."""
104
+ now = datetime.now(timezone.utc)
105
+ lease_expires = now + timedelta(seconds=self.lease_ttl_seconds)
106
+
107
+ with transaction.atomic():
108
+ # Build query for claimable runs
109
+ query = Q(status=RunStatus.QUEUED) | Q(
110
+ status=RunStatus.RUNNING,
111
+ lease_expires_at__lt=now, # Expired lease
112
+ )
113
+
114
+ queryset = AgentRun.objects.filter(query)
115
+
116
+ if agent_keys:
117
+ queryset = queryset.filter(agent_key__in=agent_keys)
118
+
119
+ # SELECT FOR UPDATE SKIP LOCKED
120
+ runs = list(queryset.select_for_update(skip_locked=True)[:batch_size])
121
+
122
+ claimed = []
123
+ for run in runs:
124
+ # Update lease
125
+ run.status = RunStatus.RUNNING
126
+ run.lease_owner = worker_id
127
+ run.lease_expires_at = lease_expires
128
+ if run.started_at is None:
129
+ run.started_at = now
130
+ run.save(
131
+ update_fields=[
132
+ "status",
133
+ "lease_owner",
134
+ "lease_expires_at",
135
+ "started_at",
136
+ ]
137
+ )
138
+
139
+ claimed.append(
140
+ QueuedRun(
141
+ run_id=run.id,
142
+ agent_key=run.agent_key,
143
+ attempt=run.attempt,
144
+ lease_expires_at=lease_expires,
145
+ input=run.input,
146
+ metadata=run.metadata,
147
+ )
148
+ )
149
+
150
+ return claimed
151
+
152
+ def extend_lease(self, run_id: UUID, worker_id: str, seconds: int) -> bool:
153
+ """Extend lease if we still own it."""
154
+ now = datetime.now(timezone.utc)
155
+ new_expires = now + timedelta(seconds=seconds)
156
+
157
+ updated = AgentRun.objects.filter(
158
+ id=run_id,
159
+ lease_owner=worker_id,
160
+ status=RunStatus.RUNNING,
161
+ ).update(lease_expires_at=new_expires)
162
+
163
+ return updated > 0
164
+
165
+ def release(
166
+ self,
167
+ run_id: UUID,
168
+ worker_id: str,
169
+ success: bool,
170
+ output: Optional[dict] = None,
171
+ error: Optional[dict] = None,
172
+ ) -> None:
173
+ """Release run after completion."""
174
+ now = datetime.now(timezone.utc)
175
+
176
+ updates = {
177
+ "status": RunStatus.SUCCEEDED if success else RunStatus.FAILED,
178
+ "finished_at": now,
179
+ "lease_owner": "",
180
+ "lease_expires_at": None,
181
+ }
182
+
183
+ if output:
184
+ updates["output"] = output
185
+ if error:
186
+ updates["error"] = error
187
+
188
+ AgentRun.objects.filter(
189
+ id=run_id,
190
+ lease_owner=worker_id,
191
+ ).update(**updates)
192
+
193
+ def requeue_for_retry(
194
+ self,
195
+ run_id: UUID,
196
+ worker_id: str,
197
+ error: dict,
198
+ delay_seconds: int = 0,
199
+ ) -> bool:
200
+ """Requeue for retry if attempts remain."""
201
+ with transaction.atomic():
202
+ try:
203
+ run = AgentRun.objects.select_for_update().get(
204
+ id=run_id, lease_owner=worker_id
205
+ )
206
+ except AgentRun.DoesNotExist:
207
+ return False
208
+
209
+ if run.attempt >= run.max_attempts:
210
+ # Max attempts reached
211
+ run.status = RunStatus.FAILED
212
+ run.error = error
213
+ run.finished_at = datetime.now(timezone.utc)
214
+ run.lease_owner = ""
215
+ run.lease_expires_at = None
216
+ run.save()
217
+ return False
218
+
219
+ # Requeue with incremented attempt
220
+ run.status = RunStatus.QUEUED
221
+ run.attempt = F("attempt") + 1
222
+ run.error = error
223
+ run.lease_owner = ""
224
+ run.lease_expires_at = None
225
+ run.save()
226
+ return True
227
+
228
+ def cancel(self, run_id: UUID) -> bool:
229
+ """Mark run for cancellation."""
230
+ now = datetime.now(timezone.utc)
231
+ updated = AgentRun.objects.filter(
232
+ id=run_id,
233
+ status__in=[RunStatus.QUEUED, RunStatus.RUNNING],
234
+ ).update(cancel_requested_at=now)
235
+ return updated > 0
236
+
237
+ def is_cancelled(self, run_id: UUID) -> bool:
238
+ """Check if cancellation was requested."""
239
+ try:
240
+ run = AgentRun.objects.get(id=run_id)
241
+ return run.cancel_requested_at is not None
242
+ except AgentRun.DoesNotExist:
243
+ return False
244
+
245
+ def recover_expired_leases(self) -> int:
246
+ """Recover runs with expired leases."""
247
+ now = datetime.now(timezone.utc)
248
+
249
+ # Find runs with expired leases
250
+ expired = AgentRun.objects.filter(
251
+ status=RunStatus.RUNNING,
252
+ lease_expires_at__lt=now,
253
+ )
254
+
255
+ count = 0
256
+ for run in expired:
257
+ if run.attempt >= run.max_attempts:
258
+ # Mark as timed out
259
+ run.status = RunStatus.TIMED_OUT
260
+ run.finished_at = now
261
+ run.error = {
262
+ "type": "LeaseExpired",
263
+ "message": "Worker lease expired without completion",
264
+ "retriable": False,
265
+ }
266
+ else:
267
+ # Requeue for retry
268
+ run.status = RunStatus.QUEUED
269
+ run.attempt += 1
270
+
271
+ run.lease_owner = ""
272
+ run.lease_expires_at = None
273
+ run.save()
274
+ count += 1
275
+
276
+ return count
277
+