django-agent-runtime 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. django_agent_runtime/__init__.py +25 -0
  2. django_agent_runtime/admin.py +155 -0
  3. django_agent_runtime/api/__init__.py +26 -0
  4. django_agent_runtime/api/permissions.py +109 -0
  5. django_agent_runtime/api/serializers.py +114 -0
  6. django_agent_runtime/api/views.py +472 -0
  7. django_agent_runtime/apps.py +26 -0
  8. django_agent_runtime/conf.py +241 -0
  9. django_agent_runtime/examples/__init__.py +10 -0
  10. django_agent_runtime/examples/langgraph_adapter.py +164 -0
  11. django_agent_runtime/examples/langgraph_tools.py +179 -0
  12. django_agent_runtime/examples/simple_chat.py +69 -0
  13. django_agent_runtime/examples/tool_agent.py +157 -0
  14. django_agent_runtime/management/__init__.py +2 -0
  15. django_agent_runtime/management/commands/__init__.py +2 -0
  16. django_agent_runtime/management/commands/runagent.py +419 -0
  17. django_agent_runtime/migrations/0001_initial.py +117 -0
  18. django_agent_runtime/migrations/0002_persistence_models.py +129 -0
  19. django_agent_runtime/migrations/0003_persistenceconversation_active_branch_id_and_more.py +212 -0
  20. django_agent_runtime/migrations/0004_add_anonymous_session_id.py +18 -0
  21. django_agent_runtime/migrations/__init__.py +2 -0
  22. django_agent_runtime/models/__init__.py +54 -0
  23. django_agent_runtime/models/base.py +450 -0
  24. django_agent_runtime/models/concrete.py +146 -0
  25. django_agent_runtime/persistence/__init__.py +60 -0
  26. django_agent_runtime/persistence/helpers.py +148 -0
  27. django_agent_runtime/persistence/models.py +506 -0
  28. django_agent_runtime/persistence/stores.py +1191 -0
  29. django_agent_runtime/runtime/__init__.py +23 -0
  30. django_agent_runtime/runtime/events/__init__.py +65 -0
  31. django_agent_runtime/runtime/events/base.py +135 -0
  32. django_agent_runtime/runtime/events/db.py +129 -0
  33. django_agent_runtime/runtime/events/redis.py +228 -0
  34. django_agent_runtime/runtime/events/sync.py +140 -0
  35. django_agent_runtime/runtime/interfaces.py +475 -0
  36. django_agent_runtime/runtime/llm/__init__.py +91 -0
  37. django_agent_runtime/runtime/llm/anthropic.py +249 -0
  38. django_agent_runtime/runtime/llm/litellm_adapter.py +173 -0
  39. django_agent_runtime/runtime/llm/openai.py +230 -0
  40. django_agent_runtime/runtime/queue/__init__.py +75 -0
  41. django_agent_runtime/runtime/queue/base.py +158 -0
  42. django_agent_runtime/runtime/queue/postgres.py +248 -0
  43. django_agent_runtime/runtime/queue/redis_streams.py +336 -0
  44. django_agent_runtime/runtime/queue/sync.py +277 -0
  45. django_agent_runtime/runtime/registry.py +186 -0
  46. django_agent_runtime/runtime/runner.py +540 -0
  47. django_agent_runtime/runtime/tracing/__init__.py +48 -0
  48. django_agent_runtime/runtime/tracing/langfuse.py +117 -0
  49. django_agent_runtime/runtime/tracing/noop.py +36 -0
  50. django_agent_runtime/urls.py +39 -0
  51. django_agent_runtime-0.3.6.dist-info/METADATA +723 -0
  52. django_agent_runtime-0.3.6.dist-info/RECORD +55 -0
  53. django_agent_runtime-0.3.6.dist-info/WHEEL +5 -0
  54. django_agent_runtime-0.3.6.dist-info/licenses/LICENSE +22 -0
  55. django_agent_runtime-0.3.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,540 @@
1
+ """
2
+ Core runner for executing agent runs.
3
+
4
+ Handles:
5
+ - Claiming runs from queue
6
+ - Executing agent runtimes
7
+ - Heartbeats and lease management
8
+ - Retries and error handling
9
+ - Cancellation
10
+ - Event emission
11
+ """
12
+
13
+ import asyncio
14
+ import logging
15
+ import traceback
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime, timezone
18
+ from typing import Optional
19
+ from uuid import UUID
20
+
21
+ from django.conf import settings as django_settings
22
+
23
+ from django_agent_runtime.conf import runtime_settings, get_event_visibility
24
+ from django_agent_runtime.runtime.interfaces import (
25
+ AgentRuntime,
26
+ EventType,
27
+ Message,
28
+ RunContext,
29
+ RunResult,
30
+ ToolRegistry,
31
+ ErrorInfo,
32
+ )
33
+ from django_agent_runtime.runtime.registry import get_runtime
34
+ from django_agent_runtime.runtime.queue.base import RunQueue, QueuedRun
35
+ from django_agent_runtime.runtime.events.base import EventBus, Event
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # Check DEBUG mode
40
+ DEBUG = getattr(django_settings, 'DEBUG', False)
41
+
42
+
43
+ def debug_print(msg: str):
44
+ """Print debug message if Django DEBUG is True."""
45
+ if DEBUG:
46
+ print(f"[agent-runner] {msg}", flush=True)
47
+
48
+
49
+ @dataclass
50
+ class RunContextImpl:
51
+ """
52
+ Concrete implementation of RunContext.
53
+
54
+ Provided to agent runtimes during execution.
55
+ """
56
+
57
+ run_id: UUID
58
+ conversation_id: Optional[UUID]
59
+ input_messages: list[Message]
60
+ params: dict
61
+ metadata: dict
62
+ tool_registry: ToolRegistry
63
+
64
+ # Internal state
65
+ _event_bus: EventBus = field(repr=False)
66
+ _queue: RunQueue = field(repr=False)
67
+ _worker_id: str = field(repr=False)
68
+ _seq: int = field(default=0, repr=False)
69
+ _state: Optional[dict] = field(default=None, repr=False)
70
+ _cancel_check_interval: float = field(default=1.0, repr=False)
71
+ _last_cancel_check: float = field(default=0.0, repr=False)
72
+ _is_cancelled: bool = field(default=False, repr=False)
73
+
74
+ async def emit(self, event_type: EventType | str, payload: dict) -> None:
75
+ """Emit an event to the event bus."""
76
+ event_type_str = event_type.value if isinstance(event_type, EventType) else event_type
77
+
78
+ # Get visibility for this event type
79
+ visibility_level, ui_visible = get_event_visibility(event_type_str)
80
+
81
+ event = Event(
82
+ run_id=self.run_id,
83
+ seq=self._seq,
84
+ event_type=event_type_str,
85
+ payload=payload,
86
+ timestamp=datetime.now(timezone.utc),
87
+ visibility_level=visibility_level,
88
+ ui_visible=ui_visible,
89
+ )
90
+
91
+ # Add detail for specific event types
92
+ detail = ""
93
+ if event_type == EventType.TOOL_CALL:
94
+ tool_name = payload.get("name", "unknown")
95
+ tool_args = str(payload.get("arguments", {}))[:80]
96
+ detail = f" -> {tool_name}({tool_args})"
97
+ elif event_type == EventType.ASSISTANT_MESSAGE:
98
+ content = str(payload.get("content", ""))[:80]
99
+ detail = f" -> {content}{'...' if len(str(payload.get('content', ''))) > 80 else ''}"
100
+
101
+ debug_print(f"Emitting event: type={event_type_str}, seq={self._seq}, visible={ui_visible}{detail}")
102
+ await self._event_bus.publish(event)
103
+ self._seq += 1
104
+
105
+ async def emit_user_message(self, content: str) -> None:
106
+ """
107
+ Emit a message that will always be shown to the user.
108
+
109
+ This is a convenience method for emitting assistant messages.
110
+
111
+ Args:
112
+ content: The message content to display
113
+ """
114
+ await self.emit(EventType.ASSISTANT_MESSAGE, {"content": content})
115
+
116
+ async def emit_error(self, error: str, details: dict = None) -> None:
117
+ """
118
+ Emit an error that will be shown to the user.
119
+
120
+ This is for runtime errors that should be displayed to users,
121
+ distinct from run.failed which is the final failure event.
122
+
123
+ Args:
124
+ error: The error message
125
+ details: Optional additional error details
126
+ """
127
+ await self.emit(EventType.ERROR, {
128
+ "message": error,
129
+ "details": details or {},
130
+ })
131
+
132
+ async def checkpoint(self, state: dict) -> None:
133
+ """Save a state checkpoint."""
134
+ from asgiref.sync import sync_to_async
135
+ from django_agent_runtime.models import AgentCheckpoint
136
+
137
+ self._state = state
138
+
139
+ @sync_to_async
140
+ def _save():
141
+ # Get next checkpoint seq
142
+ last = AgentCheckpoint.objects.filter(run_id=self.run_id).order_by("-seq").first()
143
+ next_seq = (last.seq + 1) if last else 0
144
+
145
+ AgentCheckpoint.objects.create(
146
+ run_id=self.run_id,
147
+ seq=next_seq,
148
+ state=state,
149
+ )
150
+
151
+ await _save()
152
+
153
+ # Also emit checkpoint event
154
+ await self.emit(EventType.STATE_CHECKPOINT, {"seq": self._seq - 1})
155
+
156
+ async def get_state(self) -> Optional[dict]:
157
+ """Get the last checkpointed state."""
158
+ if self._state is not None:
159
+ return self._state
160
+
161
+ from asgiref.sync import sync_to_async
162
+ from django_agent_runtime.models import AgentCheckpoint
163
+
164
+ @sync_to_async
165
+ def _get():
166
+ checkpoint = (
167
+ AgentCheckpoint.objects.filter(run_id=self.run_id)
168
+ .order_by("-seq")
169
+ .first()
170
+ )
171
+ return checkpoint.state if checkpoint else None
172
+
173
+ self._state = await _get()
174
+ return self._state
175
+
176
+ def cancelled(self) -> bool:
177
+ """Check if cancellation has been requested."""
178
+ return self._is_cancelled
179
+
180
+ async def check_cancelled(self) -> bool:
181
+ """
182
+ Async check for cancellation (queries database).
183
+
184
+ Call this periodically in long-running operations.
185
+ """
186
+ now = asyncio.get_event_loop().time()
187
+ if now - self._last_cancel_check < self._cancel_check_interval:
188
+ return self._is_cancelled
189
+
190
+ self._last_cancel_check = now
191
+
192
+ self._is_cancelled = await self._queue.is_cancelled(self.run_id)
193
+ return self._is_cancelled
194
+
195
+
196
+ class AgentRunner:
197
+ """
198
+ Main runner for executing agent runs.
199
+
200
+ Manages the lifecycle of runs including:
201
+ - Claiming from queue
202
+ - Executing with timeout
203
+ - Heartbeat management
204
+ - Error handling and retries
205
+ - Cancellation
206
+ """
207
+
208
+ def __init__(
209
+ self,
210
+ worker_id: str,
211
+ queue: RunQueue,
212
+ event_bus: EventBus,
213
+ trace_sink: Optional["TraceSink"] = None,
214
+ ):
215
+ self.worker_id = worker_id
216
+ self.queue = queue
217
+ self.event_bus = event_bus
218
+ self.trace_sink = trace_sink
219
+ self.settings = runtime_settings()
220
+
221
+ self._running = False
222
+ self._current_runs: dict[UUID, asyncio.Task] = {}
223
+
224
+ async def run_once(self, queued_run: QueuedRun) -> None:
225
+ """Execute a single run."""
226
+ run_id = queued_run.run_id
227
+ agent_key = queued_run.agent_key
228
+
229
+ print(f"[agent-runner] Starting run {run_id} (agent={agent_key}, attempt={queued_run.attempt})", flush=True)
230
+
231
+ # Start tracing
232
+ if self.trace_sink:
233
+ self.trace_sink.start_run(run_id, {"agent_key": agent_key})
234
+
235
+ try:
236
+ # Get the runtime
237
+ debug_print(f"Getting runtime for agent_key={agent_key}")
238
+ runtime = get_runtime(agent_key)
239
+ debug_print(f"Got runtime: {runtime.__class__.__name__}")
240
+
241
+ # Build context
242
+ ctx = await self._build_context(queued_run, runtime)
243
+ debug_print(f"Context built: {len(ctx.input_messages)} messages")
244
+ for i, msg in enumerate(ctx.input_messages):
245
+ role = msg.get("role", "unknown")
246
+ content = msg.get("content", "")[:100] # Truncate for readability
247
+ debug_print(f" [{i}] {role}: {content}{'...' if len(msg.get('content', '')) > 100 else ''}")
248
+
249
+ # Emit started event
250
+ await ctx.emit(EventType.RUN_STARTED, {
251
+ "agent_key": agent_key,
252
+ "attempt": queued_run.attempt,
253
+ })
254
+
255
+ # Start heartbeat task
256
+ heartbeat_task = asyncio.create_task(
257
+ self._heartbeat_loop(run_id, ctx)
258
+ )
259
+
260
+ try:
261
+ # Execute with timeout
262
+ debug_print(f"Calling runtime.run() with timeout={self.settings.RUN_TIMEOUT_SECONDS}s")
263
+ result = await asyncio.wait_for(
264
+ runtime.run(ctx),
265
+ timeout=self.settings.RUN_TIMEOUT_SECONDS,
266
+ )
267
+
268
+ # Check for cancellation
269
+ if ctx.cancelled():
270
+ await self._handle_cancellation(run_id, ctx)
271
+ return
272
+
273
+ # Success!
274
+ await self._handle_success(run_id, ctx, result)
275
+
276
+ except asyncio.TimeoutError:
277
+ await self._handle_timeout(run_id, ctx)
278
+
279
+ except asyncio.CancelledError:
280
+ await self._handle_cancellation(run_id, ctx)
281
+
282
+ except Exception as e:
283
+ print(f"[agent-runner] Runtime error in run {run_id}: {e}", flush=True)
284
+ traceback.print_exc()
285
+ await self._handle_error(
286
+ run_id, ctx, runtime, e,
287
+ attempt=queued_run.attempt,
288
+ max_attempts=self.settings.DEFAULT_MAX_ATTEMPTS,
289
+ )
290
+
291
+ finally:
292
+ heartbeat_task.cancel()
293
+ try:
294
+ await heartbeat_task
295
+ except asyncio.CancelledError:
296
+ pass
297
+
298
+ except Exception as e:
299
+ # Error before run started (e.g., runtime not found)
300
+ print(f"[agent-runner] Failed to start run {run_id}: {e}", flush=True)
301
+ traceback.print_exc()
302
+ await self.queue.release(
303
+ run_id,
304
+ self.worker_id,
305
+ success=False,
306
+ error={
307
+ "type": type(e).__name__,
308
+ "message": str(e),
309
+ "stack": traceback.format_exc(),
310
+ "retriable": False,
311
+ },
312
+ )
313
+
314
+ finally:
315
+ if self.trace_sink:
316
+ self.trace_sink.end_run(run_id, "completed")
317
+
318
+ async def _build_context(
319
+ self, queued_run: QueuedRun, runtime: AgentRuntime
320
+ ) -> RunContextImpl:
321
+ """Build the run context."""
322
+ input_data = queued_run.input
323
+ messages = input_data.get("messages", [])
324
+ params = input_data.get("params", {})
325
+
326
+ # Get conversation_id from metadata
327
+ conversation_id = queued_run.metadata.get("conversation_id")
328
+ if conversation_id:
329
+ conversation_id = UUID(conversation_id) if isinstance(conversation_id, str) else conversation_id
330
+
331
+ # Build tool registry (could be customized per agent)
332
+ tool_registry = ToolRegistry()
333
+
334
+ # Get next sequence number
335
+ seq = await self.event_bus.get_next_seq(queued_run.run_id)
336
+
337
+ return RunContextImpl(
338
+ run_id=queued_run.run_id,
339
+ conversation_id=conversation_id,
340
+ input_messages=messages,
341
+ params=params,
342
+ metadata=queued_run.metadata,
343
+ tool_registry=tool_registry,
344
+ _event_bus=self.event_bus,
345
+ _queue=self.queue,
346
+ _worker_id=self.worker_id,
347
+ _seq=seq,
348
+ )
349
+
350
+ async def _heartbeat_loop(self, run_id: UUID, ctx: RunContextImpl) -> None:
351
+ """Send periodic heartbeats to extend lease."""
352
+ while True:
353
+ await asyncio.sleep(self.settings.HEARTBEAT_INTERVAL_SECONDS)
354
+
355
+ # Extend lease
356
+ extended = await self.queue.extend_lease(
357
+ run_id,
358
+ self.worker_id,
359
+ self.settings.LEASE_TTL_SECONDS,
360
+ )
361
+
362
+ if not extended:
363
+ print(f"[agent-runner] Lost lease on run {run_id}", flush=True)
364
+ break
365
+
366
+ # Emit heartbeat event
367
+ await ctx.emit(EventType.RUN_HEARTBEAT, {})
368
+
369
+ # Check for cancellation
370
+ await ctx.check_cancelled()
371
+
372
+ async def _handle_success(
373
+ self, run_id: UUID, ctx: RunContextImpl, result: RunResult
374
+ ) -> None:
375
+ """Handle successful run completion."""
376
+ print(f"[agent-runner] Run {run_id} succeeded", flush=True)
377
+
378
+ output = {
379
+ "final_output": result.final_output,
380
+ "final_messages": result.final_messages,
381
+ "usage": result.usage,
382
+ "artifacts": result.artifacts,
383
+ }
384
+
385
+ # Emit success event
386
+ await ctx.emit(EventType.RUN_SUCCEEDED, {
387
+ "output": result.final_output,
388
+ "usage": result.usage,
389
+ })
390
+
391
+ # Release with success
392
+ await self.queue.release(
393
+ run_id,
394
+ self.worker_id,
395
+ success=True,
396
+ output=output,
397
+ )
398
+
399
+ # Call completion hook if configured
400
+ await self._call_completion_hook(run_id, output)
401
+
402
+ async def _call_completion_hook(self, run_id: UUID, output: dict) -> None:
403
+ """Call the configured completion hook if any."""
404
+ from django_agent_runtime.conf import get_hook
405
+
406
+ hook = get_hook(self.settings.RUN_COMPLETED_HOOK)
407
+ if not hook:
408
+ return
409
+
410
+ try:
411
+ # Run hook in thread pool since it may do sync I/O
412
+ from asgiref.sync import sync_to_async
413
+ await sync_to_async(hook)(str(run_id), output)
414
+ except Exception as e:
415
+ print(f"[agent-runner] Error in completion hook for run {run_id}: {e}", flush=True)
416
+
417
+ async def _handle_timeout(self, run_id: UUID, ctx: RunContextImpl) -> None:
418
+ """Handle run timeout."""
419
+ print(f"[agent-runner] Run {run_id} timed out after {self.settings.RUN_TIMEOUT_SECONDS}s", flush=True)
420
+
421
+ await ctx.emit(EventType.RUN_TIMED_OUT, {
422
+ "timeout_seconds": self.settings.RUN_TIMEOUT_SECONDS,
423
+ })
424
+
425
+ await self.queue.release(
426
+ run_id,
427
+ self.worker_id,
428
+ success=False,
429
+ error={
430
+ "type": "TimeoutError",
431
+ "message": f"Run exceeded {self.settings.RUN_TIMEOUT_SECONDS}s timeout",
432
+ "retriable": False,
433
+ },
434
+ )
435
+
436
+ async def _handle_cancellation(self, run_id: UUID, ctx: RunContextImpl) -> None:
437
+ """Handle run cancellation."""
438
+ print(f"[agent-runner] Run {run_id} cancelled", flush=True)
439
+
440
+ await ctx.emit(EventType.RUN_CANCELLED, {})
441
+
442
+ # Update status directly (not through queue.release)
443
+ from asgiref.sync import sync_to_async
444
+ from django_agent_runtime.models import AgentRun
445
+ from django_agent_runtime.models.base import RunStatus
446
+
447
+ @sync_to_async
448
+ def _update():
449
+ AgentRun.objects.filter(id=run_id).update(
450
+ status=RunStatus.CANCELLED,
451
+ finished_at=datetime.now(timezone.utc),
452
+ lease_owner="",
453
+ lease_expires_at=None,
454
+ )
455
+
456
+ await _update()
457
+
458
+ async def _handle_error(
459
+ self,
460
+ run_id: UUID,
461
+ ctx: RunContextImpl,
462
+ runtime: AgentRuntime,
463
+ error: Exception,
464
+ attempt: int = 1,
465
+ max_attempts: int = None,
466
+ ) -> None:
467
+ """Handle run error with retry logic."""
468
+ if max_attempts is None:
469
+ max_attempts = self.settings.DEFAULT_MAX_ATTEMPTS
470
+
471
+ print(f"[agent-runner] Run {run_id} failed (attempt {attempt}/{max_attempts}): {error}", flush=True)
472
+
473
+ # Let runtime classify the error
474
+ error_info = await runtime.on_error(ctx, error)
475
+ if error_info is None:
476
+ error_info = ErrorInfo(
477
+ type=type(error).__name__,
478
+ message=str(error),
479
+ stack=traceback.format_exc(),
480
+ retriable=True,
481
+ )
482
+
483
+ # Build comprehensive error dict for events and storage
484
+ error_dict = {
485
+ "type": error_info.type,
486
+ "message": error_info.message,
487
+ "stack": error_info.stack,
488
+ "retriable": error_info.retriable,
489
+ "details": error_info.details,
490
+ }
491
+
492
+ # Check if we should retry
493
+ can_retry = error_info.retriable and attempt < max_attempts
494
+
495
+ if can_retry:
496
+ # Try to requeue
497
+ requeued = await self.queue.requeue_for_retry(
498
+ run_id,
499
+ self.worker_id,
500
+ error_dict,
501
+ delay_seconds=self._calculate_backoff(ctx, attempt),
502
+ )
503
+
504
+ if requeued:
505
+ print(f"[agent-runner] Run {run_id} requeued for retry (attempt {attempt + 1})", flush=True)
506
+ # Emit an error event so UI knows about the retry
507
+ await ctx.emit(EventType.ERROR, {
508
+ "message": f"Error occurred, retrying... (attempt {attempt}/{max_attempts})",
509
+ "error": error_info.message,
510
+ "error_type": error_info.type,
511
+ "attempt": attempt,
512
+ "max_attempts": max_attempts,
513
+ "retriable": True,
514
+ })
515
+ return
516
+
517
+ # Final failure - emit detailed run.failed event
518
+ await ctx.emit(EventType.RUN_FAILED, {
519
+ "error": error_dict["message"],
520
+ "error_type": error_dict["type"],
521
+ "error_details": error_dict,
522
+ "attempt": attempt,
523
+ "max_attempts": max_attempts,
524
+ "retriable": False, # No more retries
525
+ })
526
+
527
+ await self.queue.release(
528
+ run_id,
529
+ self.worker_id,
530
+ success=False,
531
+ error=error_dict,
532
+ )
533
+
534
+ def _calculate_backoff(self, ctx: RunContextImpl, attempt: int = 1) -> int:
535
+ """Calculate exponential backoff delay."""
536
+ base = self.settings.RETRY_BACKOFF_BASE
537
+ max_backoff = self.settings.RETRY_BACKOFF_MAX
538
+
539
+ delay = min(base ** attempt, max_backoff)
540
+ return int(delay)
@@ -0,0 +1,48 @@
1
+ """
2
+ Tracing/observability layer for agent runs.
3
+
4
+ Provides:
5
+ - TraceSink: Abstract interface (from interfaces.py)
6
+ - NoopTraceSink: Default no-op implementation
7
+ - LangfuseTraceSink: Langfuse integration (optional)
8
+ """
9
+
10
+ from django_agent_runtime.runtime.interfaces import TraceSink
11
+ from django_agent_runtime.runtime.tracing.noop import NoopTraceSink
12
+
13
+ __all__ = [
14
+ "TraceSink",
15
+ "NoopTraceSink",
16
+ "get_trace_sink",
17
+ ]
18
+
19
+
20
+ def get_trace_sink() -> TraceSink:
21
+ """
22
+ Factory function to get a trace sink based on settings.
23
+
24
+ Returns:
25
+ TraceSink instance (NoopTraceSink if tracing disabled)
26
+ """
27
+ from django_agent_runtime.conf import runtime_settings
28
+
29
+ settings = runtime_settings()
30
+
31
+ if settings.LANGFUSE_ENABLED:
32
+ try:
33
+ from django_agent_runtime.runtime.tracing.langfuse import LangfuseTraceSink
34
+
35
+ return LangfuseTraceSink(
36
+ public_key=settings.LANGFUSE_PUBLIC_KEY,
37
+ secret_key=settings.LANGFUSE_SECRET_KEY,
38
+ host=settings.LANGFUSE_HOST,
39
+ )
40
+ except ImportError:
41
+ import logging
42
+
43
+ logging.getLogger(__name__).warning(
44
+ "Langfuse enabled but langfuse package not installed. Using NoopTraceSink."
45
+ )
46
+
47
+ return NoopTraceSink()
48
+
@@ -0,0 +1,117 @@
1
+ """
2
+ Langfuse trace sink implementation.
3
+
4
+ Langfuse is an open-source LLM observability platform.
5
+ This is an OPTIONAL integration - the core runtime doesn't depend on it.
6
+
7
+ See: https://langfuse.com/
8
+ """
9
+
10
+ import logging
11
+ from typing import Optional
12
+ from uuid import UUID
13
+
14
+ from django_agent_runtime.runtime.interfaces import TraceSink
15
+
16
+ try:
17
+ from langfuse import Langfuse
18
+ except ImportError:
19
+ Langfuse = None
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class LangfuseTraceSink(TraceSink):
25
+ """
26
+ Langfuse trace sink for LLM observability.
27
+
28
+ Sends traces to Langfuse for monitoring, debugging, and analytics.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ public_key: Optional[str] = None,
34
+ secret_key: Optional[str] = None,
35
+ host: Optional[str] = None,
36
+ ):
37
+ if Langfuse is None:
38
+ raise ImportError("langfuse package is required for LangfuseTraceSink")
39
+
40
+ self._client = Langfuse(
41
+ public_key=public_key,
42
+ secret_key=secret_key,
43
+ host=host,
44
+ )
45
+ self._traces: dict[UUID, any] = {}
46
+
47
+ def start_run(self, run_id: UUID, metadata: dict) -> None:
48
+ """Start a new trace in Langfuse."""
49
+ try:
50
+ trace = self._client.trace(
51
+ id=str(run_id),
52
+ name=metadata.get("agent_key", "agent_run"),
53
+ metadata=metadata,
54
+ )
55
+ self._traces[run_id] = trace
56
+ except Exception as e:
57
+ logger.warning(f"Failed to start Langfuse trace: {e}")
58
+
59
+ def log_event(self, run_id: UUID, event_type: str, payload: dict) -> None:
60
+ """Log an event to the trace."""
61
+ trace = self._traces.get(run_id)
62
+ if not trace:
63
+ return
64
+
65
+ try:
66
+ # Map event types to Langfuse concepts
67
+ if event_type == "assistant.message":
68
+ trace.generation(
69
+ name="assistant_message",
70
+ output=payload.get("content", ""),
71
+ metadata=payload,
72
+ )
73
+ elif event_type == "tool.call":
74
+ trace.span(
75
+ name=f"tool:{payload.get('name', 'unknown')}",
76
+ input=payload.get("arguments", {}),
77
+ )
78
+ elif event_type == "tool.result":
79
+ # Tool results are logged as part of the span
80
+ pass
81
+ else:
82
+ # Generic event
83
+ trace.event(
84
+ name=event_type,
85
+ metadata=payload,
86
+ )
87
+ except Exception as e:
88
+ logger.warning(f"Failed to log Langfuse event: {e}")
89
+
90
+ def end_run(self, run_id: UUID, outcome: str, metadata: Optional[dict] = None) -> None:
91
+ """End the trace."""
92
+ trace = self._traces.pop(run_id, None)
93
+ if not trace:
94
+ return
95
+
96
+ try:
97
+ # Update trace with final status
98
+ status_map = {
99
+ "succeeded": "SUCCESS",
100
+ "failed": "ERROR",
101
+ "cancelled": "CANCELLED",
102
+ "timed_out": "ERROR",
103
+ }
104
+ trace.update(
105
+ status=status_map.get(outcome, "UNKNOWN"),
106
+ metadata=metadata or {},
107
+ )
108
+ except Exception as e:
109
+ logger.warning(f"Failed to end Langfuse trace: {e}")
110
+
111
+ def flush(self) -> None:
112
+ """Flush any buffered traces to Langfuse."""
113
+ try:
114
+ self._client.flush()
115
+ except Exception as e:
116
+ logger.warning(f"Failed to flush Langfuse traces: {e}")
117
+