dispatch_agents 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. agentservice/__init__.py +0 -0
  2. agentservice/py.typed +0 -0
  3. agentservice/v1/__init__.py +0 -0
  4. agentservice/v1/message_pb2.py +41 -0
  5. agentservice/v1/message_pb2.pyi +22 -0
  6. agentservice/v1/message_pb2_grpc.py +4 -0
  7. agentservice/v1/request_response_pb2.py +46 -0
  8. agentservice/v1/request_response_pb2.pyi +54 -0
  9. agentservice/v1/request_response_pb2_grpc.py +4 -0
  10. agentservice/v1/service_pb2.py +43 -0
  11. agentservice/v1/service_pb2.pyi +6 -0
  12. agentservice/v1/service_pb2_grpc.py +129 -0
  13. dispatch_agents/__init__.py +281 -0
  14. dispatch_agents/agent_service.py +135 -0
  15. dispatch_agents/config.py +490 -0
  16. dispatch_agents/contrib/__init__.py +1 -0
  17. dispatch_agents/contrib/claude/__init__.py +246 -0
  18. dispatch_agents/contrib/openai/__init__.py +167 -0
  19. dispatch_agents/events.py +986 -0
  20. dispatch_agents/grpc_server.py +565 -0
  21. dispatch_agents/instrument.py +217 -0
  22. dispatch_agents/integrations/__init__.py +1 -0
  23. dispatch_agents/integrations/github/README.md +9 -0
  24. dispatch_agents/integrations/github/__init__.py +4268 -0
  25. dispatch_agents/invocation.py +25 -0
  26. dispatch_agents/llm.py +1017 -0
  27. dispatch_agents/llm_langchain.py +394 -0
  28. dispatch_agents/logging_config.py +133 -0
  29. dispatch_agents/mcp.py +266 -0
  30. dispatch_agents/memory.py +264 -0
  31. dispatch_agents/models.py +748 -0
  32. dispatch_agents/proxy/__init__.py +6 -0
  33. dispatch_agents/proxy/server.py +1137 -0
  34. dispatch_agents/proxy/sse_utils.py +76 -0
  35. dispatch_agents/py.typed +0 -0
  36. dispatch_agents/resources.py +68 -0
  37. dispatch_agents/version.py +19 -0
  38. dispatch_agents-0.9.0.dist-info/METADATA +20 -0
  39. dispatch_agents-0.9.0.dist-info/RECORD +43 -0
  40. dispatch_agents-0.9.0.dist-info/WHEEL +4 -0
  41. dispatch_agents-0.9.0.dist-info/licenses/LICENSE +191 -0
  42. dispatch_agents-0.9.0.dist-info/licenses/LICENSE-3rdparty.csv +12 -0
  43. dispatch_agents-0.9.0.dist-info/licenses/NOTICE +5 -0
dispatch_agents/llm.py ADDED
@@ -0,0 +1,1017 @@
1
+ """LLM inference client for Dispatch agents.
2
+
3
+ Provides easy access to LLM inference via the Dispatch proxy with automatic
4
+ trace correlation. LLM calls made inside handler functions (@fn() or @on())
5
+ are automatically correlated with the invocation trace.
6
+
7
+ IMPORTANT: LLM calls should be made inside handler functions, not at module level.
8
+ Calls made outside handlers won't be associated with any trace.
9
+
10
+ Example:
11
+ from dispatch_agents import fn, llm
12
+
13
+ @fn()
14
+ async def my_handler(payload):
15
+ # Simple chat (one-off message)
16
+ response = await llm.chat("What is 2+2?")
17
+ print(response.content) # "4"
18
+
19
+ # With system prompt
20
+ response = await llm.chat(
21
+ "Summarize this document",
22
+ system="You are a helpful assistant that summarizes text concisely."
23
+ )
24
+
25
+ # Full conversation with message history
26
+ response = await llm.inference([
27
+ {"role": "system", "content": "You are helpful."},
28
+ {"role": "user", "content": "Hello!"},
29
+ {"role": "assistant", "content": "Hi there!"},
30
+ {"role": "user", "content": "What's the weather?"}
31
+ ])
32
+ return response.content
33
+
34
+ # With structured output (JSON mode)
35
+ from pydantic import BaseModel
36
+
37
+ class Analysis(BaseModel):
38
+ sentiment: str
39
+ confidence: float
40
+
41
+ @fn()
42
+ async def analyze_sentiment(payload):
43
+ response = await llm.chat(
44
+ f"Analyze: {payload.text}",
45
+ response_format=Analysis
46
+ )
47
+ return response.parse_json(Analysis)
48
+
49
+ # With tool calling
50
+ @fn()
51
+ async def agent_with_tools(payload):
52
+ tools = [{"type": "function", "function": {"name": "get_weather", ...}}]
53
+ response = await llm.inference([{"role": "user", "content": payload.query}], tools=tools)
54
+ if response.tool_calls:
55
+ for call in response.tool_calls:
56
+ print(f"Call {call.function.name} with {call.function.arguments}")
57
+ return response.content
58
+ """
59
+
60
+ import os
61
+ from collections.abc import Generator, Sequence
62
+ from contextlib import contextmanager
63
+ from contextvars import ContextVar
64
+ from typing import Any, TypeVar, overload
65
+
66
+ import httpx
67
+ from pydantic import BaseModel
68
+
69
+ BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
70
+
71
+ from .events import (
72
+ _get_api_base_url,
73
+ _get_auth_headers,
74
+ get_current_invocation_id,
75
+ get_current_trace_id,
76
+ )
77
+
78
+ # ContextVar for per-request extra headers to forward to LLM providers.
79
+ # Used by the extra_headers() context manager — async-safe so concurrent
80
+ # handler invocations each get their own copy.
81
+ _extra_llm_headers: ContextVar[dict[str, str] | None] = ContextVar(
82
+ "extra_llm_headers", default=None
83
+ )
84
+
85
+
86
+ @contextmanager
87
+ def extra_headers(headers: dict[str, str]) -> Generator[None, None, None]:
88
+ """Context manager to attach extra headers to LLM provider requests.
89
+
90
+ Headers set here are forwarded through the Dispatch proxy to the
91
+ underlying LLM provider (e.g., an internal OpenAI-compatible gateway).
92
+ Nested contexts merge with outer ones; inner keys override outer keys.
93
+
94
+ Example:
95
+ from dispatch_agents import extra_headers
96
+
97
+ @fn()
98
+ async def my_handler(payload):
99
+ with extra_headers({"X-Dataset-Id": "team-ml"}):
100
+ response = await llm.chat("Hello!") # X-Dataset-Id sent to provider
101
+ """
102
+ current = _extra_llm_headers.get() or {}
103
+ merged = {**current, **headers}
104
+ token = _extra_llm_headers.set(merged)
105
+ try:
106
+ yield
107
+ finally:
108
+ _extra_llm_headers.reset(token)
109
+
110
+
111
+ def get_extra_llm_headers() -> dict[str, str]:
112
+ """Return the current extra LLM headers (empty dict if none set)."""
113
+ return _extra_llm_headers.get() or {}
114
+
115
+
116
+ class LLMMessage(BaseModel):
117
+ """A message in an LLM conversation."""
118
+
119
+ role: str # system, user, assistant, tool
120
+ content: str | list[dict[str, Any]]
121
+ name: str | None = None
122
+ tool_call_id: str | None = None
123
+
124
+
125
+ class LLMFunctionCall(BaseModel):
126
+ """A function call within an LLM tool call."""
127
+
128
+ name: str
129
+ # "arguments" is a JSON-encoded string per the OpenAI chat completions API
130
+ # (e.g. '{"location": "NYC"}'), not a collection. The singular concept is
131
+ # "the arguments blob"; the plural name mirrors the upstream API field name.
132
+ arguments: str
133
+
134
+
135
+ class LLMToolCall(BaseModel):
136
+ """A tool call from the LLM response."""
137
+
138
+ id: str
139
+ type: str = "function"
140
+ function: LLMFunctionCall
141
+
142
+
143
+ class LLMResponse(BaseModel):
144
+ """Response from LLM inference."""
145
+
146
+ llm_call_id: str
147
+ content: str | None
148
+ tool_calls: list[LLMToolCall] | None
149
+ finish_reason: str
150
+ model: str
151
+ provider: str
152
+ variant_name: str | None
153
+ input_tokens: int
154
+ output_tokens: int
155
+ cost_usd: float
156
+ latency_ms: int
157
+
158
+ def __str__(self) -> str:
159
+ """Return the content for easy string conversion."""
160
+ return self.content or ""
161
+
162
+ @property
163
+ def total_tokens(self) -> int:
164
+ """Total tokens used (input + output)."""
165
+ return self.input_tokens + self.output_tokens
166
+
167
+ # @overload lets type checkers narrow the return type based on whether a
168
+ # model class is passed:
169
+ # response.parse_json(MyModel) -> MyModel
170
+ # response.parse_json() -> dict[str, Any]
171
+ #
172
+ # We use overloads instead of making LLMResponse generic (e.g.
173
+ # LLMResponse[T]) because LLMResponse is constructed inside inference()
174
+ # from raw HTTP data — the target model type is only known later at parse
175
+ # time, not at response construction time. Pydantic generics require the
176
+ # type parameter to be bound at class instantiation, which doesn't fit
177
+ # this deferred-parsing pattern.
178
+ @overload
179
+ def parse_json(self, model: type[BaseModelT]) -> BaseModelT: ...
180
+
181
+ @overload
182
+ def parse_json(self, model: None = None) -> dict[str, Any]: ...
183
+
184
+ def parse_json(
185
+ self, model: type[BaseModel] | None = None
186
+ ) -> dict[str, Any] | BaseModel:
187
+ """Parse the response content as JSON.
188
+
189
+ Args:
190
+ model: Optional Pydantic model to validate against
191
+
192
+ Returns:
193
+ Parsed JSON as dict, or validated Pydantic model if provided
194
+
195
+ Raises:
196
+ ValueError: If content is not valid JSON
197
+ """
198
+ import json
199
+
200
+ if not self.content:
201
+ raise ValueError("Response has no content to parse")
202
+
203
+ data = json.loads(self.content)
204
+ if model is not None:
205
+ return model.model_validate(data)
206
+ return data
207
+
208
+
209
+ class LLMClient:
210
+ """Client for LLM inference via Dispatch proxy.
211
+
212
+ Automatically propagates trace context for correlation with agent invocations.
213
+
214
+ Example:
215
+ from dispatch_agents import llm
216
+
217
+ # Simple one-liner
218
+ response = await llm.chat("What is Python?")
219
+
220
+ # With system prompt
221
+ response = await llm.chat(
222
+ "Explain quantum computing",
223
+ system="You explain complex topics simply."
224
+ )
225
+
226
+ # Full conversation history
227
+ response = await llm.inference([
228
+ {"role": "system", "content": "You are helpful."},
229
+ {"role": "user", "content": "Hello!"}
230
+ ])
231
+
232
+ # With structured output
233
+ response = await llm.chat("List 3 colors", response_format={"type": "json_object"})
234
+ colors = response.parse_json()
235
+ """
236
+
237
+ def __init__(
238
+ self,
239
+ *,
240
+ model: str | None = None,
241
+ provider: str | None = None,
242
+ temperature: float = 1.0,
243
+ max_tokens: int | None = None,
244
+ ) -> None:
245
+ """Initialize LLM client with optional defaults.
246
+
247
+ Args:
248
+ model: Default model to use (e.g., "gpt-4o", "claude-3-5-sonnet")
249
+ provider: Default provider (e.g., "openai", "anthropic")
250
+ temperature: Default sampling temperature (0-2)
251
+ max_tokens: Default maximum tokens in response
252
+ """
253
+ self._api_base_url: str | None = None
254
+ self._default_model = model
255
+ self._default_provider = provider
256
+ self._default_temperature = temperature
257
+ self._default_max_tokens = max_tokens
258
+
259
+ def _ensure_api_base_url(self) -> str:
260
+ """Lazily initialize API base URL when first needed."""
261
+ if self._api_base_url is None:
262
+ self._api_base_url = _get_api_base_url()
263
+ return self._api_base_url
264
+
265
+ async def chat(
266
+ self,
267
+ message: str,
268
+ *,
269
+ system: str | None = None,
270
+ model: str | None = None,
271
+ provider: str | None = None,
272
+ temperature: float | None = None,
273
+ max_tokens: int | None = None,
274
+ response_format: dict[str, Any] | type[BaseModel] | None = None,
275
+ ) -> LLMResponse:
276
+ """Simple chat interface for one-off messages.
277
+
278
+ This is the easiest way to call an LLM - just pass a string!
279
+
280
+ Args:
281
+ message: The user message to send
282
+ system: Optional system prompt
283
+ model: Model to use (uses client default or org default if not specified)
284
+ provider: Provider to use (uses client default or org default if not specified)
285
+ temperature: Sampling temperature (0-2)
286
+ max_tokens: Maximum tokens in response
287
+ response_format: Request structured output. Can be:
288
+ - {"type": "json_object"} for JSON mode
289
+ - A Pydantic model class for schema-guided generation
290
+
291
+ Returns:
292
+ LLMResponse with content, usage metrics, and cost
293
+
294
+ Example:
295
+ # Basic
296
+ response = await llm.chat("What is 2+2?")
297
+ print(response.content)
298
+
299
+ # With system prompt
300
+ response = await llm.chat(
301
+ "Summarize this text",
302
+ system="You summarize text in exactly 3 bullet points."
303
+ )
304
+
305
+ # Structured output with Pydantic model
306
+ class Colors(BaseModel):
307
+ colors: list[str]
308
+
309
+ response = await llm.chat(
310
+ "List 3 primary colors",
311
+ response_format=Colors
312
+ )
313
+ result = response.parse_json(Colors)
314
+ print(result.colors) # ['red', 'blue', 'yellow']
315
+ """
316
+ messages: list[dict[str, Any]] = []
317
+ if system:
318
+ messages.append({"role": "system", "content": system})
319
+ messages.append({"role": "user", "content": message})
320
+
321
+ # Handle response_format - convert Pydantic model to JSON schema
322
+ format_dict: dict[str, Any] | None = None
323
+ if response_format is not None:
324
+ if isinstance(response_format, dict):
325
+ format_dict = response_format
326
+ elif isinstance(response_format, type) and issubclass(
327
+ response_format, BaseModel
328
+ ):
329
+ # Convert Pydantic model to JSON schema
330
+ format_dict = {
331
+ "type": "json_schema",
332
+ "json_schema": {
333
+ "name": response_format.__name__,
334
+ "schema": response_format.model_json_schema(),
335
+ },
336
+ }
337
+
338
+ return await self.inference(
339
+ messages,
340
+ model=model,
341
+ provider=provider,
342
+ temperature=temperature,
343
+ max_tokens=max_tokens,
344
+ response_format=format_dict,
345
+ )
346
+
347
+ async def inference(
348
+ self,
349
+ messages: Sequence[dict[str, Any] | LLMMessage],
350
+ *,
351
+ model: str | None = None,
352
+ provider: str | None = None,
353
+ tools: list[dict[str, Any]] | None = None,
354
+ temperature: float | None = None,
355
+ max_tokens: int | None = None,
356
+ response_format: dict[str, Any] | None = None,
357
+ trace_id: str | None = None,
358
+ invocation_id: str | None = None,
359
+ extra_headers: dict[str, str] | None = None,
360
+ ) -> LLMResponse:
361
+ """Execute LLM inference via Dispatch proxy.
362
+
363
+ Automatically includes trace context from the current execution for
364
+ correlation with agent invocations in observability tools.
365
+
366
+ Args:
367
+ messages: Conversation messages (list of dicts with role/content)
368
+ model: Model to use (e.g., "gpt-4o", "claude-sonnet-4-5").
369
+ If omitted, falls back to the provider's configured default_model.
370
+ provider: Provider to route the request to (e.g., "openai", "anthropic").
371
+ If omitted, falls back to the org's ``default_provider``.
372
+ If no default is configured, the request will fail with an error.
373
+ **Tip:** always pass ``provider=`` explicitly when you pass
374
+ ``model=`` to avoid accidentally sending a model name to the
375
+ wrong provider.
376
+ tools: Tool definitions for function calling
377
+ temperature: Sampling temperature (0-2). Uses client default if not specified.
378
+ max_tokens: Maximum tokens in response. Uses client default if not specified.
379
+ response_format: Request structured output format (e.g., {"type": "json_object"})
380
+ trace_id: Override trace ID (auto-detected from handler context if not provided)
381
+ invocation_id: Override invocation ID (auto-detected from handler context if not provided).
382
+ This links the LLM call to its parent invocation in the trace tree.
383
+
384
+ Returns:
385
+ LLMResponse with content, usage metrics, and cost
386
+
387
+ Raises:
388
+ httpx.HTTPStatusError: If the request fails
389
+ RuntimeError: If DISPATCH_NAMESPACE is not set
390
+
391
+ Example:
392
+ response = await llm_client.inference([
393
+ {"role": "system", "content": "You are a helpful assistant."},
394
+ {"role": "user", "content": "What is 2+2?"}
395
+ ])
396
+ print(f"Answer: {response.content}")
397
+ print(f"Cost: ${response.cost_usd:.4f}")
398
+ """
399
+ api_base_url = self._ensure_api_base_url()
400
+
401
+ # Convert LLMMessage objects to dicts
402
+ message_dicts = []
403
+ for msg in messages:
404
+ if isinstance(msg, LLMMessage):
405
+ message_dicts.append(msg.model_dump(exclude_none=True))
406
+ else:
407
+ message_dicts.append(msg)
408
+
409
+ # Auto-detect context from current execution if not provided
410
+ # This enables automatic trace correlation when called from within a handler
411
+ if trace_id is None:
412
+ trace_id = get_current_trace_id()
413
+ if invocation_id is None:
414
+ invocation_id = get_current_invocation_id()
415
+
416
+ # Apply client defaults
417
+ effective_model = model if model is not None else self._default_model
418
+ effective_provider = (
419
+ provider if provider is not None else self._default_provider
420
+ )
421
+ effective_temperature = (
422
+ temperature if temperature is not None else self._default_temperature
423
+ )
424
+ effective_max_tokens = (
425
+ max_tokens if max_tokens is not None else self._default_max_tokens
426
+ )
427
+
428
+ # Build request payload
429
+ payload: dict[str, Any] = {
430
+ "messages": message_dicts,
431
+ }
432
+
433
+ # Only include temperature if we have a value
434
+ if effective_temperature is not None:
435
+ payload["temperature"] = effective_temperature
436
+ if effective_model is not None:
437
+ payload["model"] = effective_model
438
+ if effective_provider is not None:
439
+ payload["provider"] = effective_provider
440
+ if tools is not None:
441
+ payload["tools"] = tools
442
+ if effective_max_tokens is not None:
443
+ payload["max_tokens"] = effective_max_tokens
444
+ if response_format is not None:
445
+ payload["response_format"] = response_format
446
+ if trace_id is not None:
447
+ payload["trace_id"] = trace_id
448
+ if invocation_id is not None:
449
+ payload["invocation_id"] = invocation_id
450
+
451
+ # Include agent name for cost tracking and budget enforcement
452
+ agent_name = os.environ.get("DISPATCH_AGENT_NAME")
453
+ if agent_name:
454
+ payload["agent_name"] = agent_name
455
+
456
+ # Merge extra headers: ContextVar first, then explicit param overrides
457
+ merged_headers = {**get_extra_llm_headers()}
458
+ if extra_headers:
459
+ merged_headers.update(extra_headers)
460
+ if merged_headers:
461
+ payload["extra_headers"] = merged_headers
462
+
463
+ url = f"{api_base_url}/llm/inference"
464
+ auth_headers = _get_auth_headers()
465
+
466
+ async with httpx.AsyncClient() as client:
467
+ response = await client.post(
468
+ url,
469
+ json=payload,
470
+ headers=auth_headers,
471
+ timeout=600.0, # 10min — matches ALB idle timeout for long-context LLM calls
472
+ )
473
+ response.raise_for_status()
474
+ data = response.json()
475
+
476
+ # Parse tool calls if present
477
+ tool_calls = None
478
+ if data.get("tool_calls"):
479
+ tool_calls = [LLMToolCall(**tc) for tc in data["tool_calls"]]
480
+
481
+ return LLMResponse(
482
+ llm_call_id=data["llm_call_id"],
483
+ content=data.get("content"),
484
+ tool_calls=tool_calls,
485
+ finish_reason=data["finish_reason"],
486
+ model=data["model"],
487
+ provider=data["provider"],
488
+ variant_name=data.get("variant_name"),
489
+ input_tokens=data["input_tokens"],
490
+ output_tokens=data["output_tokens"],
491
+ cost_usd=data["cost_usd"],
492
+ latency_ms=data["latency_ms"],
493
+ )
494
+
495
+
496
+ # Module-level singleton for convenient access
497
+ llm = LLMClient()
498
+
499
+
500
+ # Convenience functions for direct usage
501
+ async def chat(
502
+ message: str,
503
+ *,
504
+ system: str | None = None,
505
+ model: str | None = None,
506
+ provider: str | None = None,
507
+ temperature: float | None = None,
508
+ max_tokens: int | None = None,
509
+ response_format: dict[str, Any] | type[BaseModel] | None = None,
510
+ ) -> LLMResponse:
511
+ """Simple chat interface for one-off messages.
512
+
513
+ This is a convenience function that uses the module-level singleton.
514
+ See LLMClient.chat() for full documentation.
515
+
516
+ Example:
517
+ from dispatch_agents.llm import chat
518
+
519
+ response = await chat("What is 2+2?")
520
+ print(response.content)
521
+
522
+ # With system prompt
523
+ response = await chat(
524
+ "Explain quantum computing",
525
+ system="You explain complex topics simply."
526
+ )
527
+ """
528
+ return await llm.chat(
529
+ message,
530
+ system=system,
531
+ model=model,
532
+ provider=provider,
533
+ temperature=temperature,
534
+ max_tokens=max_tokens,
535
+ response_format=response_format,
536
+ )
537
+
538
+
539
+ async def inference(
540
+ messages: Sequence[dict[str, Any] | LLMMessage],
541
+ *,
542
+ model: str | None = None,
543
+ provider: str | None = None,
544
+ tools: list[dict[str, Any]] | None = None,
545
+ temperature: float | None = None,
546
+ max_tokens: int | None = None,
547
+ response_format: dict[str, Any] | None = None,
548
+ trace_id: str | None = None,
549
+ invocation_id: str | None = None,
550
+ extra_headers: dict[str, str] | None = None,
551
+ ) -> LLMResponse:
552
+ """Execute LLM inference via Dispatch proxy.
553
+
554
+ This is a convenience function that uses the module-level singleton.
555
+ See LLMClient.inference() for full documentation.
556
+
557
+ Example:
558
+ from dispatch_agents.llm import inference
559
+
560
+ response = await inference([
561
+ {"role": "user", "content": "Hello!"}
562
+ ])
563
+ print(response.content)
564
+ """
565
+ return await llm.inference(
566
+ messages,
567
+ model=model,
568
+ provider=provider,
569
+ tools=tools,
570
+ temperature=temperature,
571
+ max_tokens=max_tokens,
572
+ response_format=response_format,
573
+ trace_id=trace_id,
574
+ invocation_id=invocation_id,
575
+ extra_headers=extra_headers,
576
+ )
577
+
578
+
579
+ async def log_llm_call(
580
+ input_messages: Sequence[dict[str, Any] | LLMMessage],
581
+ response_content: str | None = None,
582
+ *,
583
+ model: str,
584
+ provider: str,
585
+ input_tokens: int,
586
+ output_tokens: int,
587
+ tool_calls: list[dict[str, Any]] | None = None,
588
+ finish_reason: str = "stop",
589
+ latency_ms: int | None = None,
590
+ trace_id: str | None = None,
591
+ invocation_id: str | None = None,
592
+ ) -> str:
593
+ """Log an LLM call made to an external service for trace correlation.
594
+
595
+ IMPORTANT: You do NOT need this function if you use Dispatch's built-in LLM client!
596
+ The llm.chat() and llm.inference() functions automatically log calls for you.
597
+
598
+ This function is ONLY needed when you call LLM providers directly using their
599
+ SDKs (OpenAI, Anthropic, etc.) instead of Dispatch's llm.chat()/inference() proxy.
600
+ It enables those external calls to appear in Dispatch traces alongside other
601
+ agent activity.
602
+
603
+ When to use this function:
604
+ - You're using the OpenAI SDK directly for streaming or advanced features
605
+ - You have existing code using provider SDKs that you don't want to migrate
606
+ - You need features not yet supported by Dispatch's LLM client
607
+
608
+ When NOT to use this function:
609
+ - You're using llm.chat() or llm.inference() - they log automatically!
610
+
611
+ Args:
612
+ input_messages: The conversation messages sent to the LLM (full context, not deltas)
613
+ response_content: The text content of the LLM's response
614
+ model: Model used (e.g., "gpt-4o", "claude-3-5-sonnet-20241022")
615
+ provider: Provider name (e.g., "openai", "anthropic")
616
+ input_tokens: Number of input tokens
617
+ output_tokens: Number of output tokens
618
+ tool_calls: Tool/function calls returned by the LLM (optional)
619
+ finish_reason: Reason the generation stopped (default: "stop")
620
+ latency_ms: Time taken in milliseconds (optional)
621
+ trace_id: Override trace ID (auto-detected from handler context if not provided)
622
+ invocation_id: Override invocation ID (auto-detected from handler context)
623
+
624
+ Returns:
625
+ The llm_call_id assigned to this logged call
626
+
627
+ Example:
628
+ # Using OpenAI client directly (only do this if you need features
629
+ # not available in llm.chat(), otherwise just use llm.chat()!)
630
+ from openai import AsyncOpenAI
631
+ from dispatch_agents import llm
632
+
633
+ client = AsyncOpenAI()
634
+ messages = [{"role": "user", "content": "Hello!"}]
635
+
636
+ # Make the call directly to OpenAI
637
+ response = await client.chat.completions.create(
638
+ model="gpt-4o-mini",
639
+ messages=messages,
640
+ )
641
+
642
+ # Log it to Dispatch for trace visibility
643
+ await llm.log_llm_call(
644
+ input_messages=messages,
645
+ response_content=response.choices[0].message.content,
646
+ model="gpt-4o-mini",
647
+ provider="openai",
648
+ input_tokens=response.usage.prompt_tokens,
649
+ output_tokens=response.usage.completion_tokens,
650
+ finish_reason=response.choices[0].finish_reason,
651
+ )
652
+ """
653
+ api_base_url = _get_api_base_url()
654
+
655
+ # Convert LLMMessage objects to dicts
656
+ message_dicts = []
657
+ for msg in input_messages:
658
+ if isinstance(msg, LLMMessage):
659
+ message_dicts.append(msg.model_dump(exclude_none=True))
660
+ else:
661
+ message_dicts.append(msg)
662
+
663
+ # Auto-detect context from current execution if not provided
664
+ if trace_id is None:
665
+ trace_id = get_current_trace_id()
666
+ if invocation_id is None:
667
+ invocation_id = get_current_invocation_id()
668
+
669
+ # Build request payload
670
+ payload: dict[str, Any] = {
671
+ "input_messages": message_dicts,
672
+ "response_content": response_content,
673
+ "model": model,
674
+ "provider": provider,
675
+ "input_tokens": input_tokens,
676
+ "output_tokens": output_tokens,
677
+ "finish_reason": finish_reason,
678
+ }
679
+
680
+ if tool_calls is not None:
681
+ payload["tool_calls"] = tool_calls
682
+ if latency_ms is not None:
683
+ payload["latency_ms"] = latency_ms
684
+ if trace_id is not None:
685
+ payload["trace_id"] = trace_id
686
+ if invocation_id is not None:
687
+ payload["invocation_id"] = invocation_id
688
+
689
+ # Include agent name for cost tracking
690
+ agent_name = os.environ.get("DISPATCH_AGENT_NAME")
691
+ if agent_name:
692
+ payload["agent_name"] = agent_name
693
+
694
+ url = f"{api_base_url}/llm/log"
695
+ auth_headers = _get_auth_headers()
696
+
697
+ async with httpx.AsyncClient() as client:
698
+ response = await client.post(
699
+ url,
700
+ json=payload,
701
+ headers=auth_headers,
702
+ timeout=10.0,
703
+ )
704
+ response.raise_for_status()
705
+ data = response.json()
706
+
707
+ return data["llm_call_id"]
708
+
709
+
710
+ # =============================================================================
711
+ # Ergonomic helpers for popular SDKs
712
+ # =============================================================================
713
+ # These functions auto-extract fields from SDK response objects so users
714
+ # don't have to manually pull out tokens, content, etc.
715
+
716
+
717
+ def _extract_openai_response(response: Any) -> dict[str, Any]:
718
+ """Extract fields from an OpenAI ChatCompletion response.
719
+
720
+ Works with both sync and async OpenAI SDK responses.
721
+
722
+ Args:
723
+ response: OpenAI ChatCompletion object
724
+
725
+ Returns:
726
+ Dict with extracted fields for log_llm_call()
727
+ """
728
+ choice = response.choices[0] if response.choices else None
729
+ message = choice.message if choice else None
730
+
731
+ # Extract content
732
+ content = message.content if message else None
733
+
734
+ # Extract tool calls (OpenAI format)
735
+ tool_calls = None
736
+ if message and message.tool_calls:
737
+ tool_calls = [
738
+ {
739
+ "id": tc.id,
740
+ "type": tc.type,
741
+ "function": {
742
+ "name": tc.function.name,
743
+ "arguments": tc.function.arguments,
744
+ },
745
+ }
746
+ for tc in message.tool_calls
747
+ ]
748
+
749
+ return {
750
+ "response_content": content,
751
+ "model": response.model,
752
+ "provider": "openai",
753
+ "input_tokens": response.usage.prompt_tokens if response.usage else 0,
754
+ "output_tokens": response.usage.completion_tokens if response.usage else 0,
755
+ "tool_calls": tool_calls,
756
+ "finish_reason": choice.finish_reason if choice else "stop",
757
+ }
758
+
759
+
760
+ def _extract_anthropic_response(response: Any) -> dict[str, Any]:
761
+ """Extract fields from an Anthropic Message response.
762
+
763
+ Args:
764
+ response: Anthropic Message object
765
+
766
+ Returns:
767
+ Dict with extracted fields for log_llm_call()
768
+ """
769
+ # Extract text content (Anthropic uses content blocks)
770
+ content = None
771
+ tool_calls = None
772
+
773
+ if response.content:
774
+ text_blocks = []
775
+ tool_use_blocks = []
776
+
777
+ for block in response.content:
778
+ # Duck type check for text block
779
+ if hasattr(block, "text"):
780
+ text_blocks.append(block.text)
781
+ # Duck type check for tool_use block
782
+ elif hasattr(block, "type") and block.type == "tool_use":
783
+ tool_use_blocks.append(
784
+ {
785
+ "id": block.id,
786
+ "type": "function",
787
+ "function": {
788
+ "name": block.name,
789
+ "arguments": (
790
+ block.input
791
+ if isinstance(block.input, str)
792
+ else str(block.input)
793
+ ),
794
+ },
795
+ }
796
+ )
797
+
798
+ if text_blocks:
799
+ content = "\n".join(text_blocks)
800
+ if tool_use_blocks:
801
+ tool_calls = tool_use_blocks
802
+
803
+ # Map Anthropic stop_reason to standard finish_reason
804
+ finish_reason_map = {
805
+ "end_turn": "stop",
806
+ "stop_sequence": "stop",
807
+ "tool_use": "tool_calls",
808
+ "max_tokens": "length",
809
+ }
810
+ finish_reason = finish_reason_map.get(response.stop_reason, response.stop_reason)
811
+
812
+ return {
813
+ "response_content": content,
814
+ "model": response.model,
815
+ "provider": "anthropic",
816
+ "input_tokens": response.usage.input_tokens if response.usage else 0,
817
+ "output_tokens": response.usage.output_tokens if response.usage else 0,
818
+ "tool_calls": tool_calls,
819
+ "finish_reason": finish_reason,
820
+ }
821
+
822
+
823
+ def _is_openai_response(response: Any) -> bool:
824
+ """Check if response is an OpenAI ChatCompletion."""
825
+ return (
826
+ hasattr(response, "choices")
827
+ and hasattr(response, "usage")
828
+ and hasattr(response, "model")
829
+ and hasattr(response.usage, "prompt_tokens")
830
+ )
831
+
832
+
833
+ def _is_anthropic_response(response: Any) -> bool:
834
+ """Check if response is an Anthropic Message."""
835
+ return (
836
+ hasattr(response, "content")
837
+ and hasattr(response, "usage")
838
+ and hasattr(response, "stop_reason")
839
+ and hasattr(response.usage, "input_tokens")
840
+ )
841
+
842
+
843
+ async def log_openai_response(
844
+ input_messages: Sequence[dict[str, Any]],
845
+ response: Any,
846
+ *,
847
+ latency_ms: int | None = None,
848
+ trace_id: str | None = None,
849
+ invocation_id: str | None = None,
850
+ ) -> str:
851
+ """Log an OpenAI ChatCompletion response for trace correlation.
852
+
853
+ This is a convenience wrapper around log_llm_call() that automatically
854
+ extracts fields from the OpenAI response object.
855
+
856
+ IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
857
+
858
+ Args:
859
+ input_messages: The messages array you sent to OpenAI
860
+ response: The ChatCompletion response from OpenAI
861
+ latency_ms: Time taken in milliseconds (optional)
862
+ trace_id: Override trace ID (auto-detected from handler context)
863
+ invocation_id: Override invocation ID (auto-detected from handler context)
864
+
865
+ Returns:
866
+ The llm_call_id assigned to this logged call
867
+
868
+ Example:
869
+ from openai import AsyncOpenAI
870
+ from dispatch_agents import llm
871
+
872
+ client = AsyncOpenAI()
873
+ messages = [{"role": "user", "content": "Hello!"}]
874
+
875
+ response = await client.chat.completions.create(
876
+ model="gpt-4o-mini",
877
+ messages=messages,
878
+ )
879
+
880
+ # One line to log - no manual field extraction!
881
+ await llm.log_openai_response(messages, response)
882
+ """
883
+ extracted = _extract_openai_response(response)
884
+
885
+ return await log_llm_call(
886
+ input_messages=input_messages,
887
+ response_content=extracted["response_content"],
888
+ model=extracted["model"],
889
+ provider=extracted["provider"],
890
+ input_tokens=extracted["input_tokens"],
891
+ output_tokens=extracted["output_tokens"],
892
+ tool_calls=extracted["tool_calls"],
893
+ finish_reason=extracted["finish_reason"],
894
+ latency_ms=latency_ms,
895
+ trace_id=trace_id,
896
+ invocation_id=invocation_id,
897
+ )
898
+
899
+
900
+ async def log_anthropic_response(
901
+ input_messages: Sequence[dict[str, Any]],
902
+ response: Any,
903
+ *,
904
+ latency_ms: int | None = None,
905
+ trace_id: str | None = None,
906
+ invocation_id: str | None = None,
907
+ ) -> str:
908
+ """Log an Anthropic Message response for trace correlation.
909
+
910
+ This is a convenience wrapper around log_llm_call() that automatically
911
+ extracts fields from the Anthropic response object.
912
+
913
+ IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
914
+
915
+ Args:
916
+ input_messages: The messages array you sent to Anthropic
917
+ response: The Message response from Anthropic
918
+ latency_ms: Time taken in milliseconds (optional)
919
+ trace_id: Override trace ID (auto-detected from handler context)
920
+ invocation_id: Override invocation ID (auto-detected from handler context)
921
+
922
+ Returns:
923
+ The llm_call_id assigned to this logged call
924
+
925
+ Example:
926
+ import anthropic
927
+ from dispatch_agents import llm
928
+
929
+ client = anthropic.AsyncAnthropic()
930
+ messages = [{"role": "user", "content": "Hello!"}]
931
+
932
+ response = await client.messages.create(
933
+ model="claude-sonnet-4-20250514",
934
+ max_tokens=1024,
935
+ messages=messages,
936
+ )
937
+
938
+ # One line to log - no manual field extraction!
939
+ await llm.log_anthropic_response(messages, response)
940
+ """
941
+ extracted = _extract_anthropic_response(response)
942
+
943
+ return await log_llm_call(
944
+ input_messages=input_messages,
945
+ response_content=extracted["response_content"],
946
+ model=extracted["model"],
947
+ provider=extracted["provider"],
948
+ input_tokens=extracted["input_tokens"],
949
+ output_tokens=extracted["output_tokens"],
950
+ tool_calls=extracted["tool_calls"],
951
+ finish_reason=extracted["finish_reason"],
952
+ latency_ms=latency_ms,
953
+ trace_id=trace_id,
954
+ invocation_id=invocation_id,
955
+ )
956
+
957
+
958
+ async def log_response(
959
+ input_messages: Sequence[dict[str, Any]],
960
+ response: Any,
961
+ *,
962
+ latency_ms: int | None = None,
963
+ trace_id: str | None = None,
964
+ invocation_id: str | None = None,
965
+ ) -> str:
966
+ """Log an LLM response for trace correlation (auto-detects provider).
967
+
968
+ This function automatically detects whether the response is from OpenAI
969
+ or Anthropic and extracts the appropriate fields.
970
+
971
+ IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
972
+
973
+ Args:
974
+ input_messages: The messages array you sent to the LLM
975
+ response: The response object from OpenAI or Anthropic
976
+ latency_ms: Time taken in milliseconds (optional)
977
+ trace_id: Override trace ID (auto-detected from handler context)
978
+ invocation_id: Override invocation ID (auto-detected from handler context)
979
+
980
+ Returns:
981
+ The llm_call_id assigned to this logged call
982
+
983
+ Raises:
984
+ ValueError: If the response type is not recognized
985
+
986
+ Example:
987
+ from dispatch_agents import llm
988
+
989
+ # Works with OpenAI
990
+ response = await openai_client.chat.completions.create(...)
991
+ await llm.log_response(messages, response)
992
+
993
+ # Works with Anthropic
994
+ response = await anthropic_client.messages.create(...)
995
+ await llm.log_response(messages, response)
996
+ """
997
+ if _is_openai_response(response):
998
+ return await log_openai_response(
999
+ input_messages,
1000
+ response,
1001
+ latency_ms=latency_ms,
1002
+ trace_id=trace_id,
1003
+ invocation_id=invocation_id,
1004
+ )
1005
+ elif _is_anthropic_response(response):
1006
+ return await log_anthropic_response(
1007
+ input_messages,
1008
+ response,
1009
+ latency_ms=latency_ms,
1010
+ trace_id=trace_id,
1011
+ invocation_id=invocation_id,
1012
+ )
1013
+ else:
1014
+ raise ValueError(
1015
+ "Unrecognized response type. Use log_openai_response(), "
1016
+ "log_anthropic_response(), or log_llm_call() with manual fields."
1017
+ )