dispatch_agents 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentservice/__init__.py +0 -0
- agentservice/py.typed +0 -0
- agentservice/v1/__init__.py +0 -0
- agentservice/v1/message_pb2.py +41 -0
- agentservice/v1/message_pb2.pyi +22 -0
- agentservice/v1/message_pb2_grpc.py +4 -0
- agentservice/v1/request_response_pb2.py +46 -0
- agentservice/v1/request_response_pb2.pyi +54 -0
- agentservice/v1/request_response_pb2_grpc.py +4 -0
- agentservice/v1/service_pb2.py +43 -0
- agentservice/v1/service_pb2.pyi +6 -0
- agentservice/v1/service_pb2_grpc.py +129 -0
- dispatch_agents/__init__.py +281 -0
- dispatch_agents/agent_service.py +135 -0
- dispatch_agents/config.py +490 -0
- dispatch_agents/contrib/__init__.py +1 -0
- dispatch_agents/contrib/claude/__init__.py +246 -0
- dispatch_agents/contrib/openai/__init__.py +167 -0
- dispatch_agents/events.py +986 -0
- dispatch_agents/grpc_server.py +565 -0
- dispatch_agents/instrument.py +217 -0
- dispatch_agents/integrations/__init__.py +1 -0
- dispatch_agents/integrations/github/README.md +9 -0
- dispatch_agents/integrations/github/__init__.py +4268 -0
- dispatch_agents/invocation.py +25 -0
- dispatch_agents/llm.py +1017 -0
- dispatch_agents/llm_langchain.py +394 -0
- dispatch_agents/logging_config.py +133 -0
- dispatch_agents/mcp.py +266 -0
- dispatch_agents/memory.py +264 -0
- dispatch_agents/models.py +748 -0
- dispatch_agents/proxy/__init__.py +6 -0
- dispatch_agents/proxy/server.py +1137 -0
- dispatch_agents/proxy/sse_utils.py +76 -0
- dispatch_agents/py.typed +0 -0
- dispatch_agents/resources.py +68 -0
- dispatch_agents/version.py +19 -0
- dispatch_agents-0.9.0.dist-info/METADATA +20 -0
- dispatch_agents-0.9.0.dist-info/RECORD +43 -0
- dispatch_agents-0.9.0.dist-info/WHEEL +4 -0
- dispatch_agents-0.9.0.dist-info/licenses/LICENSE +191 -0
- dispatch_agents-0.9.0.dist-info/licenses/LICENSE-3rdparty.csv +12 -0
- dispatch_agents-0.9.0.dist-info/licenses/NOTICE +5 -0
dispatch_agents/llm.py
ADDED
|
@@ -0,0 +1,1017 @@
|
|
|
1
|
+
"""LLM inference client for Dispatch agents.
|
|
2
|
+
|
|
3
|
+
Provides easy access to LLM inference via the Dispatch proxy with automatic
|
|
4
|
+
trace correlation. LLM calls made inside handler functions (@fn() or @on())
|
|
5
|
+
are automatically correlated with the invocation trace.
|
|
6
|
+
|
|
7
|
+
IMPORTANT: LLM calls should be made inside handler functions, not at module level.
|
|
8
|
+
Calls made outside handlers won't be associated with any trace.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
from dispatch_agents import fn, llm
|
|
12
|
+
|
|
13
|
+
@fn()
|
|
14
|
+
async def my_handler(payload):
|
|
15
|
+
# Simple chat (one-off message)
|
|
16
|
+
response = await llm.chat("What is 2+2?")
|
|
17
|
+
print(response.content) # "4"
|
|
18
|
+
|
|
19
|
+
# With system prompt
|
|
20
|
+
response = await llm.chat(
|
|
21
|
+
"Summarize this document",
|
|
22
|
+
system="You are a helpful assistant that summarizes text concisely."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Full conversation with message history
|
|
26
|
+
response = await llm.inference([
|
|
27
|
+
{"role": "system", "content": "You are helpful."},
|
|
28
|
+
{"role": "user", "content": "Hello!"},
|
|
29
|
+
{"role": "assistant", "content": "Hi there!"},
|
|
30
|
+
{"role": "user", "content": "What's the weather?"}
|
|
31
|
+
])
|
|
32
|
+
return response.content
|
|
33
|
+
|
|
34
|
+
# With structured output (JSON mode)
|
|
35
|
+
from pydantic import BaseModel
|
|
36
|
+
|
|
37
|
+
class Analysis(BaseModel):
|
|
38
|
+
sentiment: str
|
|
39
|
+
confidence: float
|
|
40
|
+
|
|
41
|
+
@fn()
|
|
42
|
+
async def analyze_sentiment(payload):
|
|
43
|
+
response = await llm.chat(
|
|
44
|
+
f"Analyze: {payload.text}",
|
|
45
|
+
response_format=Analysis
|
|
46
|
+
)
|
|
47
|
+
return response.parse_json(Analysis)
|
|
48
|
+
|
|
49
|
+
# With tool calling
|
|
50
|
+
@fn()
|
|
51
|
+
async def agent_with_tools(payload):
|
|
52
|
+
tools = [{"type": "function", "function": {"name": "get_weather", ...}}]
|
|
53
|
+
response = await llm.inference([{"role": "user", "content": payload.query}], tools=tools)
|
|
54
|
+
if response.tool_calls:
|
|
55
|
+
for call in response.tool_calls:
|
|
56
|
+
print(f"Call {call.function.name} with {call.function.arguments}")
|
|
57
|
+
return response.content
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
import os
|
|
61
|
+
from collections.abc import Generator, Sequence
|
|
62
|
+
from contextlib import contextmanager
|
|
63
|
+
from contextvars import ContextVar
|
|
64
|
+
from typing import Any, TypeVar, overload
|
|
65
|
+
|
|
66
|
+
import httpx
|
|
67
|
+
from pydantic import BaseModel
|
|
68
|
+
|
|
69
|
+
BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
|
|
70
|
+
|
|
71
|
+
from .events import (
|
|
72
|
+
_get_api_base_url,
|
|
73
|
+
_get_auth_headers,
|
|
74
|
+
get_current_invocation_id,
|
|
75
|
+
get_current_trace_id,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# ContextVar for per-request extra headers to forward to LLM providers.
|
|
79
|
+
# Used by the extra_headers() context manager — async-safe so concurrent
|
|
80
|
+
# handler invocations each get their own copy.
|
|
81
|
+
_extra_llm_headers: ContextVar[dict[str, str] | None] = ContextVar(
|
|
82
|
+
"extra_llm_headers", default=None
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@contextmanager
|
|
87
|
+
def extra_headers(headers: dict[str, str]) -> Generator[None, None, None]:
|
|
88
|
+
"""Context manager to attach extra headers to LLM provider requests.
|
|
89
|
+
|
|
90
|
+
Headers set here are forwarded through the Dispatch proxy to the
|
|
91
|
+
underlying LLM provider (e.g., an internal OpenAI-compatible gateway).
|
|
92
|
+
Nested contexts merge with outer ones; inner keys override outer keys.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
from dispatch_agents import extra_headers
|
|
96
|
+
|
|
97
|
+
@fn()
|
|
98
|
+
async def my_handler(payload):
|
|
99
|
+
with extra_headers({"X-Dataset-Id": "team-ml"}):
|
|
100
|
+
response = await llm.chat("Hello!") # X-Dataset-Id sent to provider
|
|
101
|
+
"""
|
|
102
|
+
current = _extra_llm_headers.get() or {}
|
|
103
|
+
merged = {**current, **headers}
|
|
104
|
+
token = _extra_llm_headers.set(merged)
|
|
105
|
+
try:
|
|
106
|
+
yield
|
|
107
|
+
finally:
|
|
108
|
+
_extra_llm_headers.reset(token)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_extra_llm_headers() -> dict[str, str]:
|
|
112
|
+
"""Return the current extra LLM headers (empty dict if none set)."""
|
|
113
|
+
return _extra_llm_headers.get() or {}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class LLMMessage(BaseModel):
|
|
117
|
+
"""A message in an LLM conversation."""
|
|
118
|
+
|
|
119
|
+
role: str # system, user, assistant, tool
|
|
120
|
+
content: str | list[dict[str, Any]]
|
|
121
|
+
name: str | None = None
|
|
122
|
+
tool_call_id: str | None = None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class LLMFunctionCall(BaseModel):
|
|
126
|
+
"""A function call within an LLM tool call."""
|
|
127
|
+
|
|
128
|
+
name: str
|
|
129
|
+
# "arguments" is a JSON-encoded string per the OpenAI chat completions API
|
|
130
|
+
# (e.g. '{"location": "NYC"}'), not a collection. The singular concept is
|
|
131
|
+
# "the arguments blob"; the plural name mirrors the upstream API field name.
|
|
132
|
+
arguments: str
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class LLMToolCall(BaseModel):
|
|
136
|
+
"""A tool call from the LLM response."""
|
|
137
|
+
|
|
138
|
+
id: str
|
|
139
|
+
type: str = "function"
|
|
140
|
+
function: LLMFunctionCall
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class LLMResponse(BaseModel):
|
|
144
|
+
"""Response from LLM inference."""
|
|
145
|
+
|
|
146
|
+
llm_call_id: str
|
|
147
|
+
content: str | None
|
|
148
|
+
tool_calls: list[LLMToolCall] | None
|
|
149
|
+
finish_reason: str
|
|
150
|
+
model: str
|
|
151
|
+
provider: str
|
|
152
|
+
variant_name: str | None
|
|
153
|
+
input_tokens: int
|
|
154
|
+
output_tokens: int
|
|
155
|
+
cost_usd: float
|
|
156
|
+
latency_ms: int
|
|
157
|
+
|
|
158
|
+
def __str__(self) -> str:
|
|
159
|
+
"""Return the content for easy string conversion."""
|
|
160
|
+
return self.content or ""
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def total_tokens(self) -> int:
|
|
164
|
+
"""Total tokens used (input + output)."""
|
|
165
|
+
return self.input_tokens + self.output_tokens
|
|
166
|
+
|
|
167
|
+
# @overload lets type checkers narrow the return type based on whether a
|
|
168
|
+
# model class is passed:
|
|
169
|
+
# response.parse_json(MyModel) -> MyModel
|
|
170
|
+
# response.parse_json() -> dict[str, Any]
|
|
171
|
+
#
|
|
172
|
+
# We use overloads instead of making LLMResponse generic (e.g.
|
|
173
|
+
# LLMResponse[T]) because LLMResponse is constructed inside inference()
|
|
174
|
+
# from raw HTTP data — the target model type is only known later at parse
|
|
175
|
+
# time, not at response construction time. Pydantic generics require the
|
|
176
|
+
# type parameter to be bound at class instantiation, which doesn't fit
|
|
177
|
+
# this deferred-parsing pattern.
|
|
178
|
+
@overload
|
|
179
|
+
def parse_json(self, model: type[BaseModelT]) -> BaseModelT: ...
|
|
180
|
+
|
|
181
|
+
@overload
|
|
182
|
+
def parse_json(self, model: None = None) -> dict[str, Any]: ...
|
|
183
|
+
|
|
184
|
+
def parse_json(
|
|
185
|
+
self, model: type[BaseModel] | None = None
|
|
186
|
+
) -> dict[str, Any] | BaseModel:
|
|
187
|
+
"""Parse the response content as JSON.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
model: Optional Pydantic model to validate against
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Parsed JSON as dict, or validated Pydantic model if provided
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
ValueError: If content is not valid JSON
|
|
197
|
+
"""
|
|
198
|
+
import json
|
|
199
|
+
|
|
200
|
+
if not self.content:
|
|
201
|
+
raise ValueError("Response has no content to parse")
|
|
202
|
+
|
|
203
|
+
data = json.loads(self.content)
|
|
204
|
+
if model is not None:
|
|
205
|
+
return model.model_validate(data)
|
|
206
|
+
return data
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class LLMClient:
|
|
210
|
+
"""Client for LLM inference via Dispatch proxy.
|
|
211
|
+
|
|
212
|
+
Automatically propagates trace context for correlation with agent invocations.
|
|
213
|
+
|
|
214
|
+
Example:
|
|
215
|
+
from dispatch_agents import llm
|
|
216
|
+
|
|
217
|
+
# Simple one-liner
|
|
218
|
+
response = await llm.chat("What is Python?")
|
|
219
|
+
|
|
220
|
+
# With system prompt
|
|
221
|
+
response = await llm.chat(
|
|
222
|
+
"Explain quantum computing",
|
|
223
|
+
system="You explain complex topics simply."
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Full conversation history
|
|
227
|
+
response = await llm.inference([
|
|
228
|
+
{"role": "system", "content": "You are helpful."},
|
|
229
|
+
{"role": "user", "content": "Hello!"}
|
|
230
|
+
])
|
|
231
|
+
|
|
232
|
+
# With structured output
|
|
233
|
+
response = await llm.chat("List 3 colors", response_format={"type": "json_object"})
|
|
234
|
+
colors = response.parse_json()
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
def __init__(
|
|
238
|
+
self,
|
|
239
|
+
*,
|
|
240
|
+
model: str | None = None,
|
|
241
|
+
provider: str | None = None,
|
|
242
|
+
temperature: float = 1.0,
|
|
243
|
+
max_tokens: int | None = None,
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Initialize LLM client with optional defaults.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
model: Default model to use (e.g., "gpt-4o", "claude-3-5-sonnet")
|
|
249
|
+
provider: Default provider (e.g., "openai", "anthropic")
|
|
250
|
+
temperature: Default sampling temperature (0-2)
|
|
251
|
+
max_tokens: Default maximum tokens in response
|
|
252
|
+
"""
|
|
253
|
+
self._api_base_url: str | None = None
|
|
254
|
+
self._default_model = model
|
|
255
|
+
self._default_provider = provider
|
|
256
|
+
self._default_temperature = temperature
|
|
257
|
+
self._default_max_tokens = max_tokens
|
|
258
|
+
|
|
259
|
+
def _ensure_api_base_url(self) -> str:
|
|
260
|
+
"""Lazily initialize API base URL when first needed."""
|
|
261
|
+
if self._api_base_url is None:
|
|
262
|
+
self._api_base_url = _get_api_base_url()
|
|
263
|
+
return self._api_base_url
|
|
264
|
+
|
|
265
|
+
async def chat(
|
|
266
|
+
self,
|
|
267
|
+
message: str,
|
|
268
|
+
*,
|
|
269
|
+
system: str | None = None,
|
|
270
|
+
model: str | None = None,
|
|
271
|
+
provider: str | None = None,
|
|
272
|
+
temperature: float | None = None,
|
|
273
|
+
max_tokens: int | None = None,
|
|
274
|
+
response_format: dict[str, Any] | type[BaseModel] | None = None,
|
|
275
|
+
) -> LLMResponse:
|
|
276
|
+
"""Simple chat interface for one-off messages.
|
|
277
|
+
|
|
278
|
+
This is the easiest way to call an LLM - just pass a string!
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
message: The user message to send
|
|
282
|
+
system: Optional system prompt
|
|
283
|
+
model: Model to use (uses client default or org default if not specified)
|
|
284
|
+
provider: Provider to use (uses client default or org default if not specified)
|
|
285
|
+
temperature: Sampling temperature (0-2)
|
|
286
|
+
max_tokens: Maximum tokens in response
|
|
287
|
+
response_format: Request structured output. Can be:
|
|
288
|
+
- {"type": "json_object"} for JSON mode
|
|
289
|
+
- A Pydantic model class for schema-guided generation
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
LLMResponse with content, usage metrics, and cost
|
|
293
|
+
|
|
294
|
+
Example:
|
|
295
|
+
# Basic
|
|
296
|
+
response = await llm.chat("What is 2+2?")
|
|
297
|
+
print(response.content)
|
|
298
|
+
|
|
299
|
+
# With system prompt
|
|
300
|
+
response = await llm.chat(
|
|
301
|
+
"Summarize this text",
|
|
302
|
+
system="You summarize text in exactly 3 bullet points."
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Structured output with Pydantic model
|
|
306
|
+
class Colors(BaseModel):
|
|
307
|
+
colors: list[str]
|
|
308
|
+
|
|
309
|
+
response = await llm.chat(
|
|
310
|
+
"List 3 primary colors",
|
|
311
|
+
response_format=Colors
|
|
312
|
+
)
|
|
313
|
+
result = response.parse_json(Colors)
|
|
314
|
+
print(result.colors) # ['red', 'blue', 'yellow']
|
|
315
|
+
"""
|
|
316
|
+
messages: list[dict[str, Any]] = []
|
|
317
|
+
if system:
|
|
318
|
+
messages.append({"role": "system", "content": system})
|
|
319
|
+
messages.append({"role": "user", "content": message})
|
|
320
|
+
|
|
321
|
+
# Handle response_format - convert Pydantic model to JSON schema
|
|
322
|
+
format_dict: dict[str, Any] | None = None
|
|
323
|
+
if response_format is not None:
|
|
324
|
+
if isinstance(response_format, dict):
|
|
325
|
+
format_dict = response_format
|
|
326
|
+
elif isinstance(response_format, type) and issubclass(
|
|
327
|
+
response_format, BaseModel
|
|
328
|
+
):
|
|
329
|
+
# Convert Pydantic model to JSON schema
|
|
330
|
+
format_dict = {
|
|
331
|
+
"type": "json_schema",
|
|
332
|
+
"json_schema": {
|
|
333
|
+
"name": response_format.__name__,
|
|
334
|
+
"schema": response_format.model_json_schema(),
|
|
335
|
+
},
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return await self.inference(
|
|
339
|
+
messages,
|
|
340
|
+
model=model,
|
|
341
|
+
provider=provider,
|
|
342
|
+
temperature=temperature,
|
|
343
|
+
max_tokens=max_tokens,
|
|
344
|
+
response_format=format_dict,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
async def inference(
|
|
348
|
+
self,
|
|
349
|
+
messages: Sequence[dict[str, Any] | LLMMessage],
|
|
350
|
+
*,
|
|
351
|
+
model: str | None = None,
|
|
352
|
+
provider: str | None = None,
|
|
353
|
+
tools: list[dict[str, Any]] | None = None,
|
|
354
|
+
temperature: float | None = None,
|
|
355
|
+
max_tokens: int | None = None,
|
|
356
|
+
response_format: dict[str, Any] | None = None,
|
|
357
|
+
trace_id: str | None = None,
|
|
358
|
+
invocation_id: str | None = None,
|
|
359
|
+
extra_headers: dict[str, str] | None = None,
|
|
360
|
+
) -> LLMResponse:
|
|
361
|
+
"""Execute LLM inference via Dispatch proxy.
|
|
362
|
+
|
|
363
|
+
Automatically includes trace context from the current execution for
|
|
364
|
+
correlation with agent invocations in observability tools.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
messages: Conversation messages (list of dicts with role/content)
|
|
368
|
+
model: Model to use (e.g., "gpt-4o", "claude-sonnet-4-5").
|
|
369
|
+
If omitted, falls back to the provider's configured default_model.
|
|
370
|
+
provider: Provider to route the request to (e.g., "openai", "anthropic").
|
|
371
|
+
If omitted, falls back to the org's ``default_provider``.
|
|
372
|
+
If no default is configured, the request will fail with an error.
|
|
373
|
+
**Tip:** always pass ``provider=`` explicitly when you pass
|
|
374
|
+
``model=`` to avoid accidentally sending a model name to the
|
|
375
|
+
wrong provider.
|
|
376
|
+
tools: Tool definitions for function calling
|
|
377
|
+
temperature: Sampling temperature (0-2). Uses client default if not specified.
|
|
378
|
+
max_tokens: Maximum tokens in response. Uses client default if not specified.
|
|
379
|
+
response_format: Request structured output format (e.g., {"type": "json_object"})
|
|
380
|
+
trace_id: Override trace ID (auto-detected from handler context if not provided)
|
|
381
|
+
invocation_id: Override invocation ID (auto-detected from handler context if not provided).
|
|
382
|
+
This links the LLM call to its parent invocation in the trace tree.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
LLMResponse with content, usage metrics, and cost
|
|
386
|
+
|
|
387
|
+
Raises:
|
|
388
|
+
httpx.HTTPStatusError: If the request fails
|
|
389
|
+
RuntimeError: If DISPATCH_NAMESPACE is not set
|
|
390
|
+
|
|
391
|
+
Example:
|
|
392
|
+
response = await llm_client.inference([
|
|
393
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
394
|
+
{"role": "user", "content": "What is 2+2?"}
|
|
395
|
+
])
|
|
396
|
+
print(f"Answer: {response.content}")
|
|
397
|
+
print(f"Cost: ${response.cost_usd:.4f}")
|
|
398
|
+
"""
|
|
399
|
+
api_base_url = self._ensure_api_base_url()
|
|
400
|
+
|
|
401
|
+
# Convert LLMMessage objects to dicts
|
|
402
|
+
message_dicts = []
|
|
403
|
+
for msg in messages:
|
|
404
|
+
if isinstance(msg, LLMMessage):
|
|
405
|
+
message_dicts.append(msg.model_dump(exclude_none=True))
|
|
406
|
+
else:
|
|
407
|
+
message_dicts.append(msg)
|
|
408
|
+
|
|
409
|
+
# Auto-detect context from current execution if not provided
|
|
410
|
+
# This enables automatic trace correlation when called from within a handler
|
|
411
|
+
if trace_id is None:
|
|
412
|
+
trace_id = get_current_trace_id()
|
|
413
|
+
if invocation_id is None:
|
|
414
|
+
invocation_id = get_current_invocation_id()
|
|
415
|
+
|
|
416
|
+
# Apply client defaults
|
|
417
|
+
effective_model = model if model is not None else self._default_model
|
|
418
|
+
effective_provider = (
|
|
419
|
+
provider if provider is not None else self._default_provider
|
|
420
|
+
)
|
|
421
|
+
effective_temperature = (
|
|
422
|
+
temperature if temperature is not None else self._default_temperature
|
|
423
|
+
)
|
|
424
|
+
effective_max_tokens = (
|
|
425
|
+
max_tokens if max_tokens is not None else self._default_max_tokens
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# Build request payload
|
|
429
|
+
payload: dict[str, Any] = {
|
|
430
|
+
"messages": message_dicts,
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
# Only include temperature if we have a value
|
|
434
|
+
if effective_temperature is not None:
|
|
435
|
+
payload["temperature"] = effective_temperature
|
|
436
|
+
if effective_model is not None:
|
|
437
|
+
payload["model"] = effective_model
|
|
438
|
+
if effective_provider is not None:
|
|
439
|
+
payload["provider"] = effective_provider
|
|
440
|
+
if tools is not None:
|
|
441
|
+
payload["tools"] = tools
|
|
442
|
+
if effective_max_tokens is not None:
|
|
443
|
+
payload["max_tokens"] = effective_max_tokens
|
|
444
|
+
if response_format is not None:
|
|
445
|
+
payload["response_format"] = response_format
|
|
446
|
+
if trace_id is not None:
|
|
447
|
+
payload["trace_id"] = trace_id
|
|
448
|
+
if invocation_id is not None:
|
|
449
|
+
payload["invocation_id"] = invocation_id
|
|
450
|
+
|
|
451
|
+
# Include agent name for cost tracking and budget enforcement
|
|
452
|
+
agent_name = os.environ.get("DISPATCH_AGENT_NAME")
|
|
453
|
+
if agent_name:
|
|
454
|
+
payload["agent_name"] = agent_name
|
|
455
|
+
|
|
456
|
+
# Merge extra headers: ContextVar first, then explicit param overrides
|
|
457
|
+
merged_headers = {**get_extra_llm_headers()}
|
|
458
|
+
if extra_headers:
|
|
459
|
+
merged_headers.update(extra_headers)
|
|
460
|
+
if merged_headers:
|
|
461
|
+
payload["extra_headers"] = merged_headers
|
|
462
|
+
|
|
463
|
+
url = f"{api_base_url}/llm/inference"
|
|
464
|
+
auth_headers = _get_auth_headers()
|
|
465
|
+
|
|
466
|
+
async with httpx.AsyncClient() as client:
|
|
467
|
+
response = await client.post(
|
|
468
|
+
url,
|
|
469
|
+
json=payload,
|
|
470
|
+
headers=auth_headers,
|
|
471
|
+
timeout=600.0, # 10min — matches ALB idle timeout for long-context LLM calls
|
|
472
|
+
)
|
|
473
|
+
response.raise_for_status()
|
|
474
|
+
data = response.json()
|
|
475
|
+
|
|
476
|
+
# Parse tool calls if present
|
|
477
|
+
tool_calls = None
|
|
478
|
+
if data.get("tool_calls"):
|
|
479
|
+
tool_calls = [LLMToolCall(**tc) for tc in data["tool_calls"]]
|
|
480
|
+
|
|
481
|
+
return LLMResponse(
|
|
482
|
+
llm_call_id=data["llm_call_id"],
|
|
483
|
+
content=data.get("content"),
|
|
484
|
+
tool_calls=tool_calls,
|
|
485
|
+
finish_reason=data["finish_reason"],
|
|
486
|
+
model=data["model"],
|
|
487
|
+
provider=data["provider"],
|
|
488
|
+
variant_name=data.get("variant_name"),
|
|
489
|
+
input_tokens=data["input_tokens"],
|
|
490
|
+
output_tokens=data["output_tokens"],
|
|
491
|
+
cost_usd=data["cost_usd"],
|
|
492
|
+
latency_ms=data["latency_ms"],
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
# Module-level singleton for convenient access
|
|
497
|
+
llm = LLMClient()
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# Convenience functions for direct usage
|
|
501
|
+
async def chat(
|
|
502
|
+
message: str,
|
|
503
|
+
*,
|
|
504
|
+
system: str | None = None,
|
|
505
|
+
model: str | None = None,
|
|
506
|
+
provider: str | None = None,
|
|
507
|
+
temperature: float | None = None,
|
|
508
|
+
max_tokens: int | None = None,
|
|
509
|
+
response_format: dict[str, Any] | type[BaseModel] | None = None,
|
|
510
|
+
) -> LLMResponse:
|
|
511
|
+
"""Simple chat interface for one-off messages.
|
|
512
|
+
|
|
513
|
+
This is a convenience function that uses the module-level singleton.
|
|
514
|
+
See LLMClient.chat() for full documentation.
|
|
515
|
+
|
|
516
|
+
Example:
|
|
517
|
+
from dispatch_agents.llm import chat
|
|
518
|
+
|
|
519
|
+
response = await chat("What is 2+2?")
|
|
520
|
+
print(response.content)
|
|
521
|
+
|
|
522
|
+
# With system prompt
|
|
523
|
+
response = await chat(
|
|
524
|
+
"Explain quantum computing",
|
|
525
|
+
system="You explain complex topics simply."
|
|
526
|
+
)
|
|
527
|
+
"""
|
|
528
|
+
return await llm.chat(
|
|
529
|
+
message,
|
|
530
|
+
system=system,
|
|
531
|
+
model=model,
|
|
532
|
+
provider=provider,
|
|
533
|
+
temperature=temperature,
|
|
534
|
+
max_tokens=max_tokens,
|
|
535
|
+
response_format=response_format,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
async def inference(
|
|
540
|
+
messages: Sequence[dict[str, Any] | LLMMessage],
|
|
541
|
+
*,
|
|
542
|
+
model: str | None = None,
|
|
543
|
+
provider: str | None = None,
|
|
544
|
+
tools: list[dict[str, Any]] | None = None,
|
|
545
|
+
temperature: float | None = None,
|
|
546
|
+
max_tokens: int | None = None,
|
|
547
|
+
response_format: dict[str, Any] | None = None,
|
|
548
|
+
trace_id: str | None = None,
|
|
549
|
+
invocation_id: str | None = None,
|
|
550
|
+
extra_headers: dict[str, str] | None = None,
|
|
551
|
+
) -> LLMResponse:
|
|
552
|
+
"""Execute LLM inference via Dispatch proxy.
|
|
553
|
+
|
|
554
|
+
This is a convenience function that uses the module-level singleton.
|
|
555
|
+
See LLMClient.inference() for full documentation.
|
|
556
|
+
|
|
557
|
+
Example:
|
|
558
|
+
from dispatch_agents.llm import inference
|
|
559
|
+
|
|
560
|
+
response = await inference([
|
|
561
|
+
{"role": "user", "content": "Hello!"}
|
|
562
|
+
])
|
|
563
|
+
print(response.content)
|
|
564
|
+
"""
|
|
565
|
+
return await llm.inference(
|
|
566
|
+
messages,
|
|
567
|
+
model=model,
|
|
568
|
+
provider=provider,
|
|
569
|
+
tools=tools,
|
|
570
|
+
temperature=temperature,
|
|
571
|
+
max_tokens=max_tokens,
|
|
572
|
+
response_format=response_format,
|
|
573
|
+
trace_id=trace_id,
|
|
574
|
+
invocation_id=invocation_id,
|
|
575
|
+
extra_headers=extra_headers,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
async def log_llm_call(
|
|
580
|
+
input_messages: Sequence[dict[str, Any] | LLMMessage],
|
|
581
|
+
response_content: str | None = None,
|
|
582
|
+
*,
|
|
583
|
+
model: str,
|
|
584
|
+
provider: str,
|
|
585
|
+
input_tokens: int,
|
|
586
|
+
output_tokens: int,
|
|
587
|
+
tool_calls: list[dict[str, Any]] | None = None,
|
|
588
|
+
finish_reason: str = "stop",
|
|
589
|
+
latency_ms: int | None = None,
|
|
590
|
+
trace_id: str | None = None,
|
|
591
|
+
invocation_id: str | None = None,
|
|
592
|
+
) -> str:
|
|
593
|
+
"""Log an LLM call made to an external service for trace correlation.
|
|
594
|
+
|
|
595
|
+
IMPORTANT: You do NOT need this function if you use Dispatch's built-in LLM client!
|
|
596
|
+
The llm.chat() and llm.inference() functions automatically log calls for you.
|
|
597
|
+
|
|
598
|
+
This function is ONLY needed when you call LLM providers directly using their
|
|
599
|
+
SDKs (OpenAI, Anthropic, etc.) instead of Dispatch's llm.chat()/inference() proxy.
|
|
600
|
+
It enables those external calls to appear in Dispatch traces alongside other
|
|
601
|
+
agent activity.
|
|
602
|
+
|
|
603
|
+
When to use this function:
|
|
604
|
+
- You're using the OpenAI SDK directly for streaming or advanced features
|
|
605
|
+
- You have existing code using provider SDKs that you don't want to migrate
|
|
606
|
+
- You need features not yet supported by Dispatch's LLM client
|
|
607
|
+
|
|
608
|
+
When NOT to use this function:
|
|
609
|
+
- You're using llm.chat() or llm.inference() - they log automatically!
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
input_messages: The conversation messages sent to the LLM (full context, not deltas)
|
|
613
|
+
response_content: The text content of the LLM's response
|
|
614
|
+
model: Model used (e.g., "gpt-4o", "claude-3-5-sonnet-20241022")
|
|
615
|
+
provider: Provider name (e.g., "openai", "anthropic")
|
|
616
|
+
input_tokens: Number of input tokens
|
|
617
|
+
output_tokens: Number of output tokens
|
|
618
|
+
tool_calls: Tool/function calls returned by the LLM (optional)
|
|
619
|
+
finish_reason: Reason the generation stopped (default: "stop")
|
|
620
|
+
latency_ms: Time taken in milliseconds (optional)
|
|
621
|
+
trace_id: Override trace ID (auto-detected from handler context if not provided)
|
|
622
|
+
invocation_id: Override invocation ID (auto-detected from handler context)
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
The llm_call_id assigned to this logged call
|
|
626
|
+
|
|
627
|
+
Example:
|
|
628
|
+
# Using OpenAI client directly (only do this if you need features
|
|
629
|
+
# not available in llm.chat(), otherwise just use llm.chat()!)
|
|
630
|
+
from openai import AsyncOpenAI
|
|
631
|
+
from dispatch_agents import llm
|
|
632
|
+
|
|
633
|
+
client = AsyncOpenAI()
|
|
634
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
635
|
+
|
|
636
|
+
# Make the call directly to OpenAI
|
|
637
|
+
response = await client.chat.completions.create(
|
|
638
|
+
model="gpt-4o-mini",
|
|
639
|
+
messages=messages,
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Log it to Dispatch for trace visibility
|
|
643
|
+
await llm.log_llm_call(
|
|
644
|
+
input_messages=messages,
|
|
645
|
+
response_content=response.choices[0].message.content,
|
|
646
|
+
model="gpt-4o-mini",
|
|
647
|
+
provider="openai",
|
|
648
|
+
input_tokens=response.usage.prompt_tokens,
|
|
649
|
+
output_tokens=response.usage.completion_tokens,
|
|
650
|
+
finish_reason=response.choices[0].finish_reason,
|
|
651
|
+
)
|
|
652
|
+
"""
|
|
653
|
+
api_base_url = _get_api_base_url()
|
|
654
|
+
|
|
655
|
+
# Convert LLMMessage objects to dicts
|
|
656
|
+
message_dicts = []
|
|
657
|
+
for msg in input_messages:
|
|
658
|
+
if isinstance(msg, LLMMessage):
|
|
659
|
+
message_dicts.append(msg.model_dump(exclude_none=True))
|
|
660
|
+
else:
|
|
661
|
+
message_dicts.append(msg)
|
|
662
|
+
|
|
663
|
+
# Auto-detect context from current execution if not provided
|
|
664
|
+
if trace_id is None:
|
|
665
|
+
trace_id = get_current_trace_id()
|
|
666
|
+
if invocation_id is None:
|
|
667
|
+
invocation_id = get_current_invocation_id()
|
|
668
|
+
|
|
669
|
+
# Build request payload
|
|
670
|
+
payload: dict[str, Any] = {
|
|
671
|
+
"input_messages": message_dicts,
|
|
672
|
+
"response_content": response_content,
|
|
673
|
+
"model": model,
|
|
674
|
+
"provider": provider,
|
|
675
|
+
"input_tokens": input_tokens,
|
|
676
|
+
"output_tokens": output_tokens,
|
|
677
|
+
"finish_reason": finish_reason,
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
if tool_calls is not None:
|
|
681
|
+
payload["tool_calls"] = tool_calls
|
|
682
|
+
if latency_ms is not None:
|
|
683
|
+
payload["latency_ms"] = latency_ms
|
|
684
|
+
if trace_id is not None:
|
|
685
|
+
payload["trace_id"] = trace_id
|
|
686
|
+
if invocation_id is not None:
|
|
687
|
+
payload["invocation_id"] = invocation_id
|
|
688
|
+
|
|
689
|
+
# Include agent name for cost tracking
|
|
690
|
+
agent_name = os.environ.get("DISPATCH_AGENT_NAME")
|
|
691
|
+
if agent_name:
|
|
692
|
+
payload["agent_name"] = agent_name
|
|
693
|
+
|
|
694
|
+
url = f"{api_base_url}/llm/log"
|
|
695
|
+
auth_headers = _get_auth_headers()
|
|
696
|
+
|
|
697
|
+
async with httpx.AsyncClient() as client:
|
|
698
|
+
response = await client.post(
|
|
699
|
+
url,
|
|
700
|
+
json=payload,
|
|
701
|
+
headers=auth_headers,
|
|
702
|
+
timeout=10.0,
|
|
703
|
+
)
|
|
704
|
+
response.raise_for_status()
|
|
705
|
+
data = response.json()
|
|
706
|
+
|
|
707
|
+
return data["llm_call_id"]
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
# =============================================================================
|
|
711
|
+
# Ergonomic helpers for popular SDKs
|
|
712
|
+
# =============================================================================
|
|
713
|
+
# These functions auto-extract fields from SDK response objects so users
|
|
714
|
+
# don't have to manually pull out tokens, content, etc.
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _extract_openai_response(response: Any) -> dict[str, Any]:
|
|
718
|
+
"""Extract fields from an OpenAI ChatCompletion response.
|
|
719
|
+
|
|
720
|
+
Works with both sync and async OpenAI SDK responses.
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
response: OpenAI ChatCompletion object
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
Dict with extracted fields for log_llm_call()
|
|
727
|
+
"""
|
|
728
|
+
choice = response.choices[0] if response.choices else None
|
|
729
|
+
message = choice.message if choice else None
|
|
730
|
+
|
|
731
|
+
# Extract content
|
|
732
|
+
content = message.content if message else None
|
|
733
|
+
|
|
734
|
+
# Extract tool calls (OpenAI format)
|
|
735
|
+
tool_calls = None
|
|
736
|
+
if message and message.tool_calls:
|
|
737
|
+
tool_calls = [
|
|
738
|
+
{
|
|
739
|
+
"id": tc.id,
|
|
740
|
+
"type": tc.type,
|
|
741
|
+
"function": {
|
|
742
|
+
"name": tc.function.name,
|
|
743
|
+
"arguments": tc.function.arguments,
|
|
744
|
+
},
|
|
745
|
+
}
|
|
746
|
+
for tc in message.tool_calls
|
|
747
|
+
]
|
|
748
|
+
|
|
749
|
+
return {
|
|
750
|
+
"response_content": content,
|
|
751
|
+
"model": response.model,
|
|
752
|
+
"provider": "openai",
|
|
753
|
+
"input_tokens": response.usage.prompt_tokens if response.usage else 0,
|
|
754
|
+
"output_tokens": response.usage.completion_tokens if response.usage else 0,
|
|
755
|
+
"tool_calls": tool_calls,
|
|
756
|
+
"finish_reason": choice.finish_reason if choice else "stop",
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def _extract_anthropic_response(response: Any) -> dict[str, Any]:
|
|
761
|
+
"""Extract fields from an Anthropic Message response.
|
|
762
|
+
|
|
763
|
+
Args:
|
|
764
|
+
response: Anthropic Message object
|
|
765
|
+
|
|
766
|
+
Returns:
|
|
767
|
+
Dict with extracted fields for log_llm_call()
|
|
768
|
+
"""
|
|
769
|
+
# Extract text content (Anthropic uses content blocks)
|
|
770
|
+
content = None
|
|
771
|
+
tool_calls = None
|
|
772
|
+
|
|
773
|
+
if response.content:
|
|
774
|
+
text_blocks = []
|
|
775
|
+
tool_use_blocks = []
|
|
776
|
+
|
|
777
|
+
for block in response.content:
|
|
778
|
+
# Duck type check for text block
|
|
779
|
+
if hasattr(block, "text"):
|
|
780
|
+
text_blocks.append(block.text)
|
|
781
|
+
# Duck type check for tool_use block
|
|
782
|
+
elif hasattr(block, "type") and block.type == "tool_use":
|
|
783
|
+
tool_use_blocks.append(
|
|
784
|
+
{
|
|
785
|
+
"id": block.id,
|
|
786
|
+
"type": "function",
|
|
787
|
+
"function": {
|
|
788
|
+
"name": block.name,
|
|
789
|
+
"arguments": (
|
|
790
|
+
block.input
|
|
791
|
+
if isinstance(block.input, str)
|
|
792
|
+
else str(block.input)
|
|
793
|
+
),
|
|
794
|
+
},
|
|
795
|
+
}
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
if text_blocks:
|
|
799
|
+
content = "\n".join(text_blocks)
|
|
800
|
+
if tool_use_blocks:
|
|
801
|
+
tool_calls = tool_use_blocks
|
|
802
|
+
|
|
803
|
+
# Map Anthropic stop_reason to standard finish_reason
|
|
804
|
+
finish_reason_map = {
|
|
805
|
+
"end_turn": "stop",
|
|
806
|
+
"stop_sequence": "stop",
|
|
807
|
+
"tool_use": "tool_calls",
|
|
808
|
+
"max_tokens": "length",
|
|
809
|
+
}
|
|
810
|
+
finish_reason = finish_reason_map.get(response.stop_reason, response.stop_reason)
|
|
811
|
+
|
|
812
|
+
return {
|
|
813
|
+
"response_content": content,
|
|
814
|
+
"model": response.model,
|
|
815
|
+
"provider": "anthropic",
|
|
816
|
+
"input_tokens": response.usage.input_tokens if response.usage else 0,
|
|
817
|
+
"output_tokens": response.usage.output_tokens if response.usage else 0,
|
|
818
|
+
"tool_calls": tool_calls,
|
|
819
|
+
"finish_reason": finish_reason,
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _is_openai_response(response: Any) -> bool:
|
|
824
|
+
"""Check if response is an OpenAI ChatCompletion."""
|
|
825
|
+
return (
|
|
826
|
+
hasattr(response, "choices")
|
|
827
|
+
and hasattr(response, "usage")
|
|
828
|
+
and hasattr(response, "model")
|
|
829
|
+
and hasattr(response.usage, "prompt_tokens")
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def _is_anthropic_response(response: Any) -> bool:
|
|
834
|
+
"""Check if response is an Anthropic Message."""
|
|
835
|
+
return (
|
|
836
|
+
hasattr(response, "content")
|
|
837
|
+
and hasattr(response, "usage")
|
|
838
|
+
and hasattr(response, "stop_reason")
|
|
839
|
+
and hasattr(response.usage, "input_tokens")
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
async def log_openai_response(
|
|
844
|
+
input_messages: Sequence[dict[str, Any]],
|
|
845
|
+
response: Any,
|
|
846
|
+
*,
|
|
847
|
+
latency_ms: int | None = None,
|
|
848
|
+
trace_id: str | None = None,
|
|
849
|
+
invocation_id: str | None = None,
|
|
850
|
+
) -> str:
|
|
851
|
+
"""Log an OpenAI ChatCompletion response for trace correlation.
|
|
852
|
+
|
|
853
|
+
This is a convenience wrapper around log_llm_call() that automatically
|
|
854
|
+
extracts fields from the OpenAI response object.
|
|
855
|
+
|
|
856
|
+
IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
|
|
857
|
+
|
|
858
|
+
Args:
|
|
859
|
+
input_messages: The messages array you sent to OpenAI
|
|
860
|
+
response: The ChatCompletion response from OpenAI
|
|
861
|
+
latency_ms: Time taken in milliseconds (optional)
|
|
862
|
+
trace_id: Override trace ID (auto-detected from handler context)
|
|
863
|
+
invocation_id: Override invocation ID (auto-detected from handler context)
|
|
864
|
+
|
|
865
|
+
Returns:
|
|
866
|
+
The llm_call_id assigned to this logged call
|
|
867
|
+
|
|
868
|
+
Example:
|
|
869
|
+
from openai import AsyncOpenAI
|
|
870
|
+
from dispatch_agents import llm
|
|
871
|
+
|
|
872
|
+
client = AsyncOpenAI()
|
|
873
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
874
|
+
|
|
875
|
+
response = await client.chat.completions.create(
|
|
876
|
+
model="gpt-4o-mini",
|
|
877
|
+
messages=messages,
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
# One line to log - no manual field extraction!
|
|
881
|
+
await llm.log_openai_response(messages, response)
|
|
882
|
+
"""
|
|
883
|
+
extracted = _extract_openai_response(response)
|
|
884
|
+
|
|
885
|
+
return await log_llm_call(
|
|
886
|
+
input_messages=input_messages,
|
|
887
|
+
response_content=extracted["response_content"],
|
|
888
|
+
model=extracted["model"],
|
|
889
|
+
provider=extracted["provider"],
|
|
890
|
+
input_tokens=extracted["input_tokens"],
|
|
891
|
+
output_tokens=extracted["output_tokens"],
|
|
892
|
+
tool_calls=extracted["tool_calls"],
|
|
893
|
+
finish_reason=extracted["finish_reason"],
|
|
894
|
+
latency_ms=latency_ms,
|
|
895
|
+
trace_id=trace_id,
|
|
896
|
+
invocation_id=invocation_id,
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
async def log_anthropic_response(
|
|
901
|
+
input_messages: Sequence[dict[str, Any]],
|
|
902
|
+
response: Any,
|
|
903
|
+
*,
|
|
904
|
+
latency_ms: int | None = None,
|
|
905
|
+
trace_id: str | None = None,
|
|
906
|
+
invocation_id: str | None = None,
|
|
907
|
+
) -> str:
|
|
908
|
+
"""Log an Anthropic Message response for trace correlation.
|
|
909
|
+
|
|
910
|
+
This is a convenience wrapper around log_llm_call() that automatically
|
|
911
|
+
extracts fields from the Anthropic response object.
|
|
912
|
+
|
|
913
|
+
IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
|
|
914
|
+
|
|
915
|
+
Args:
|
|
916
|
+
input_messages: The messages array you sent to Anthropic
|
|
917
|
+
response: The Message response from Anthropic
|
|
918
|
+
latency_ms: Time taken in milliseconds (optional)
|
|
919
|
+
trace_id: Override trace ID (auto-detected from handler context)
|
|
920
|
+
invocation_id: Override invocation ID (auto-detected from handler context)
|
|
921
|
+
|
|
922
|
+
Returns:
|
|
923
|
+
The llm_call_id assigned to this logged call
|
|
924
|
+
|
|
925
|
+
Example:
|
|
926
|
+
import anthropic
|
|
927
|
+
from dispatch_agents import llm
|
|
928
|
+
|
|
929
|
+
client = anthropic.AsyncAnthropic()
|
|
930
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
931
|
+
|
|
932
|
+
response = await client.messages.create(
|
|
933
|
+
model="claude-sonnet-4-20250514",
|
|
934
|
+
max_tokens=1024,
|
|
935
|
+
messages=messages,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
# One line to log - no manual field extraction!
|
|
939
|
+
await llm.log_anthropic_response(messages, response)
|
|
940
|
+
"""
|
|
941
|
+
extracted = _extract_anthropic_response(response)
|
|
942
|
+
|
|
943
|
+
return await log_llm_call(
|
|
944
|
+
input_messages=input_messages,
|
|
945
|
+
response_content=extracted["response_content"],
|
|
946
|
+
model=extracted["model"],
|
|
947
|
+
provider=extracted["provider"],
|
|
948
|
+
input_tokens=extracted["input_tokens"],
|
|
949
|
+
output_tokens=extracted["output_tokens"],
|
|
950
|
+
tool_calls=extracted["tool_calls"],
|
|
951
|
+
finish_reason=extracted["finish_reason"],
|
|
952
|
+
latency_ms=latency_ms,
|
|
953
|
+
trace_id=trace_id,
|
|
954
|
+
invocation_id=invocation_id,
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
async def log_response(
|
|
959
|
+
input_messages: Sequence[dict[str, Any]],
|
|
960
|
+
response: Any,
|
|
961
|
+
*,
|
|
962
|
+
latency_ms: int | None = None,
|
|
963
|
+
trace_id: str | None = None,
|
|
964
|
+
invocation_id: str | None = None,
|
|
965
|
+
) -> str:
|
|
966
|
+
"""Log an LLM response for trace correlation (auto-detects provider).
|
|
967
|
+
|
|
968
|
+
This function automatically detects whether the response is from OpenAI
|
|
969
|
+
or Anthropic and extracts the appropriate fields.
|
|
970
|
+
|
|
971
|
+
IMPORTANT: You do NOT need this if you use llm.chat() - it logs automatically!
|
|
972
|
+
|
|
973
|
+
Args:
|
|
974
|
+
input_messages: The messages array you sent to the LLM
|
|
975
|
+
response: The response object from OpenAI or Anthropic
|
|
976
|
+
latency_ms: Time taken in milliseconds (optional)
|
|
977
|
+
trace_id: Override trace ID (auto-detected from handler context)
|
|
978
|
+
invocation_id: Override invocation ID (auto-detected from handler context)
|
|
979
|
+
|
|
980
|
+
Returns:
|
|
981
|
+
The llm_call_id assigned to this logged call
|
|
982
|
+
|
|
983
|
+
Raises:
|
|
984
|
+
ValueError: If the response type is not recognized
|
|
985
|
+
|
|
986
|
+
Example:
|
|
987
|
+
from dispatch_agents import llm
|
|
988
|
+
|
|
989
|
+
# Works with OpenAI
|
|
990
|
+
response = await openai_client.chat.completions.create(...)
|
|
991
|
+
await llm.log_response(messages, response)
|
|
992
|
+
|
|
993
|
+
# Works with Anthropic
|
|
994
|
+
response = await anthropic_client.messages.create(...)
|
|
995
|
+
await llm.log_response(messages, response)
|
|
996
|
+
"""
|
|
997
|
+
if _is_openai_response(response):
|
|
998
|
+
return await log_openai_response(
|
|
999
|
+
input_messages,
|
|
1000
|
+
response,
|
|
1001
|
+
latency_ms=latency_ms,
|
|
1002
|
+
trace_id=trace_id,
|
|
1003
|
+
invocation_id=invocation_id,
|
|
1004
|
+
)
|
|
1005
|
+
elif _is_anthropic_response(response):
|
|
1006
|
+
return await log_anthropic_response(
|
|
1007
|
+
input_messages,
|
|
1008
|
+
response,
|
|
1009
|
+
latency_ms=latency_ms,
|
|
1010
|
+
trace_id=trace_id,
|
|
1011
|
+
invocation_id=invocation_id,
|
|
1012
|
+
)
|
|
1013
|
+
else:
|
|
1014
|
+
raise ValueError(
|
|
1015
|
+
"Unrecognized response type. Use log_openai_response(), "
|
|
1016
|
+
"log_anthropic_response(), or log_llm_call() with manual fields."
|
|
1017
|
+
)
|