nvidia-nat-autogen 1.4.0a20260120__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nat/meta/pypi.md +23 -0
- nat/plugins/autogen/__init__.py +14 -0
- nat/plugins/autogen/callback_handler.py +627 -0
- nat/plugins/autogen/llm.py +459 -0
- nat/plugins/autogen/register.py +22 -0
- nat/plugins/autogen/tool_wrapper.py +181 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/METADATA +44 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/RECORD +13 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/WHEEL +5 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/entry_points.txt +2 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat_autogen-1.4.0a20260120.dist-info/top_level.txt +1 -0
nat/meta/pypi.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
you may not use this file except in compliance with the License.
|
|
7
|
+
You may obtain a copy of the License at
|
|
8
|
+
|
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
|
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
See the License for the specific language governing permissions and
|
|
15
|
+
limitations under the License.
|
|
16
|
+
-->
|
|
17
|
+
|
|
18
|
+

|
|
19
|
+
|
|
20
|
+
# NVIDIA NeMo Agent Toolkit Subpackage
|
|
21
|
+
This is a subpackage for [`Microsoft AutoGen`](https://github.com/microsoft/autogen) integration in NeMo Agent toolkit.
|
|
22
|
+
|
|
23
|
+
For more information about the NVIDIA NeMo Agent toolkit, please visit the [NeMo Agent toolkit GitHub Repo](https://github.com/NVIDIA/NeMo-Agent-Toolkit).
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""AutoGen callback handler for usage statistics collection.
|
|
16
|
+
|
|
17
|
+
This module provides profiling instrumentation for AutoGen agents by monkey-patching
|
|
18
|
+
LLM client and tool classes to collect telemetry data.
|
|
19
|
+
|
|
20
|
+
Supported LLM Clients
|
|
21
|
+
---------------------
|
|
22
|
+
- ``OpenAIChatCompletionClient``: OpenAI and OpenAI-compatible APIs (NIM, LiteLLM)
|
|
23
|
+
- ``AzureOpenAIChatCompletionClient``: Azure OpenAI deployments
|
|
24
|
+
- ``AnthropicBedrockChatCompletionClient``: AWS Bedrock (Anthropic models)
|
|
25
|
+
|
|
26
|
+
Supported Methods
|
|
27
|
+
-----------------
|
|
28
|
+
- ``create``: Non-streaming LLM completions
|
|
29
|
+
- ``create_stream``: Streaming LLM completions
|
|
30
|
+
- ``BaseTool.run_json``: Tool executions
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import copy
|
|
34
|
+
import logging
|
|
35
|
+
import threading
|
|
36
|
+
import time
|
|
37
|
+
from collections.abc import AsyncGenerator
|
|
38
|
+
from collections.abc import Callable
|
|
39
|
+
from dataclasses import dataclass
|
|
40
|
+
from dataclasses import field
|
|
41
|
+
from typing import Any
|
|
42
|
+
|
|
43
|
+
from nat.builder.context import Context
|
|
44
|
+
from nat.builder.framework_enum import LLMFrameworkEnum
|
|
45
|
+
from nat.data_models.intermediate_step import IntermediateStepPayload
|
|
46
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
47
|
+
from nat.data_models.intermediate_step import StreamEventData
|
|
48
|
+
from nat.data_models.intermediate_step import TraceMetadata
|
|
49
|
+
from nat.data_models.intermediate_step import UsageInfo
|
|
50
|
+
from nat.profiler.callbacks.base_callback_class import BaseProfilerCallback
|
|
51
|
+
from nat.profiler.callbacks.token_usage_base_model import TokenUsageBaseModel
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class ClientPatchInfo:
|
|
58
|
+
"""Stores original method references for a patched client class."""
|
|
59
|
+
|
|
60
|
+
create: Callable[..., Any] | None = None
|
|
61
|
+
create_stream: Callable[..., Any] | None = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class PatchedClients:
|
|
66
|
+
"""Stores all patched client information for restoration."""
|
|
67
|
+
|
|
68
|
+
openai: ClientPatchInfo = field(default_factory=ClientPatchInfo)
|
|
69
|
+
azure: ClientPatchInfo = field(default_factory=ClientPatchInfo)
|
|
70
|
+
bedrock: ClientPatchInfo = field(default_factory=ClientPatchInfo)
|
|
71
|
+
tool: Callable[..., Any] | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class AutoGenProfilerHandler(BaseProfilerCallback):
|
|
75
|
+
"""Callback handler for AutoGen that intercepts LLM and tool calls for profiling.
|
|
76
|
+
|
|
77
|
+
This handler monkey-patches AutoGen client classes to collect usage statistics
|
|
78
|
+
including token usage, inputs, outputs, and timing information.
|
|
79
|
+
|
|
80
|
+
Supported clients:
|
|
81
|
+
- OpenAIChatCompletionClient (OpenAI, NIM, LiteLLM)
|
|
82
|
+
- AzureOpenAIChatCompletionClient (Azure OpenAI)
|
|
83
|
+
- AnthropicBedrockChatCompletionClient (AWS Bedrock)
|
|
84
|
+
|
|
85
|
+
Supported methods:
|
|
86
|
+
- create (non-streaming)
|
|
87
|
+
- create_stream (streaming)
|
|
88
|
+
- BaseTool.run_json (tool execution)
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
>>> handler = AutoGenProfilerHandler()
|
|
92
|
+
>>> handler.instrument()
|
|
93
|
+
>>> # ... run AutoGen workflow ...
|
|
94
|
+
>>> handler.uninstrument()
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self) -> None:
|
|
98
|
+
"""Initialize the AutoGenProfilerHandler."""
|
|
99
|
+
super().__init__()
|
|
100
|
+
self._lock = threading.Lock()
|
|
101
|
+
self.last_call_ts = time.time()
|
|
102
|
+
self.step_manager = Context.get().intermediate_step_manager
|
|
103
|
+
self._patched = PatchedClients()
|
|
104
|
+
self._instrumented = False
|
|
105
|
+
|
|
106
|
+
def instrument(self) -> None:
|
|
107
|
+
"""Monkey-patch AutoGen methods with usage-stat collection logic.
|
|
108
|
+
|
|
109
|
+
Patches the following classes if available:
|
|
110
|
+
- OpenAIChatCompletionClient.create, create_stream
|
|
111
|
+
- AzureOpenAIChatCompletionClient.create, create_stream
|
|
112
|
+
- AnthropicBedrockChatCompletionClient.create
|
|
113
|
+
- BaseTool.run_json
|
|
114
|
+
|
|
115
|
+
Does nothing if already instrumented or if imports fail.
|
|
116
|
+
"""
|
|
117
|
+
if self._instrumented:
|
|
118
|
+
logger.debug("AutoGenProfilerHandler already instrumented; skipping.")
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
# Import and patch tool class
|
|
122
|
+
try:
|
|
123
|
+
from autogen_core.tools import BaseTool
|
|
124
|
+
self._patched.tool = getattr(BaseTool, "run_json", None)
|
|
125
|
+
if self._patched.tool:
|
|
126
|
+
BaseTool.run_json = self._create_tool_wrapper(self._patched.tool)
|
|
127
|
+
logger.debug("Patched BaseTool.run_json")
|
|
128
|
+
except ImportError:
|
|
129
|
+
logger.debug("autogen_core.tools not available; skipping tool instrumentation")
|
|
130
|
+
|
|
131
|
+
# Import and patch OpenAI client
|
|
132
|
+
try:
|
|
133
|
+
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
|
134
|
+
self._patched.openai.create = getattr(OpenAIChatCompletionClient, "create", None)
|
|
135
|
+
self._patched.openai.create_stream = getattr(OpenAIChatCompletionClient, "create_stream", None)
|
|
136
|
+
|
|
137
|
+
if self._patched.openai.create:
|
|
138
|
+
OpenAIChatCompletionClient.create = self._create_llm_wrapper(self._patched.openai.create)
|
|
139
|
+
logger.debug("Patched OpenAIChatCompletionClient.create")
|
|
140
|
+
if self._patched.openai.create_stream:
|
|
141
|
+
OpenAIChatCompletionClient.create_stream = self._create_stream_wrapper(
|
|
142
|
+
self._patched.openai.create_stream)
|
|
143
|
+
logger.debug("Patched OpenAIChatCompletionClient.create_stream")
|
|
144
|
+
except ImportError:
|
|
145
|
+
logger.debug("autogen_ext.models.openai not available; skipping OpenAI instrumentation")
|
|
146
|
+
|
|
147
|
+
# Import and patch Azure client
|
|
148
|
+
try:
|
|
149
|
+
from autogen_ext.models.openai import AzureOpenAIChatCompletionClient
|
|
150
|
+
self._patched.azure.create = getattr(AzureOpenAIChatCompletionClient, "create", None)
|
|
151
|
+
self._patched.azure.create_stream = getattr(AzureOpenAIChatCompletionClient, "create_stream", None)
|
|
152
|
+
|
|
153
|
+
if self._patched.azure.create:
|
|
154
|
+
AzureOpenAIChatCompletionClient.create = self._create_llm_wrapper(self._patched.azure.create)
|
|
155
|
+
logger.debug("Patched AzureOpenAIChatCompletionClient.create")
|
|
156
|
+
if self._patched.azure.create_stream:
|
|
157
|
+
AzureOpenAIChatCompletionClient.create_stream = self._create_stream_wrapper(
|
|
158
|
+
self._patched.azure.create_stream)
|
|
159
|
+
logger.debug("Patched AzureOpenAIChatCompletionClient.create_stream")
|
|
160
|
+
except ImportError:
|
|
161
|
+
logger.debug("AzureOpenAIChatCompletionClient not available; skipping Azure instrumentation")
|
|
162
|
+
|
|
163
|
+
# Import and patch Bedrock client
|
|
164
|
+
try:
|
|
165
|
+
from autogen_ext.models.anthropic import AnthropicBedrockChatCompletionClient
|
|
166
|
+
self._patched.bedrock.create = getattr(AnthropicBedrockChatCompletionClient, "create", None)
|
|
167
|
+
|
|
168
|
+
if self._patched.bedrock.create:
|
|
169
|
+
AnthropicBedrockChatCompletionClient.create = self._create_llm_wrapper(self._patched.bedrock.create)
|
|
170
|
+
logger.debug("Patched AnthropicBedrockChatCompletionClient.create")
|
|
171
|
+
# Note: Bedrock client may not have create_stream - check if available
|
|
172
|
+
if hasattr(AnthropicBedrockChatCompletionClient, "create_stream"):
|
|
173
|
+
self._patched.bedrock.create_stream = getattr(AnthropicBedrockChatCompletionClient,
|
|
174
|
+
"create_stream",
|
|
175
|
+
None)
|
|
176
|
+
if self._patched.bedrock.create_stream:
|
|
177
|
+
AnthropicBedrockChatCompletionClient.create_stream = self._create_stream_wrapper(
|
|
178
|
+
self._patched.bedrock.create_stream)
|
|
179
|
+
logger.debug("Patched AnthropicBedrockChatCompletionClient.create_stream")
|
|
180
|
+
except ImportError:
|
|
181
|
+
logger.debug("autogen_ext.models.anthropic not available; skipping Bedrock instrumentation")
|
|
182
|
+
|
|
183
|
+
self._instrumented = True
|
|
184
|
+
logger.debug("AutoGenProfilerHandler instrumentation applied successfully.")
|
|
185
|
+
|
|
186
|
+
def uninstrument(self) -> None:
|
|
187
|
+
"""Restore original AutoGen methods.
|
|
188
|
+
|
|
189
|
+
Should be called to clean up monkey patches, especially in test environments.
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
# Restore tool
|
|
193
|
+
if self._patched.tool:
|
|
194
|
+
from autogen_core.tools import BaseTool
|
|
195
|
+
BaseTool.run_json = self._patched.tool
|
|
196
|
+
logger.debug("Restored BaseTool.run_json")
|
|
197
|
+
|
|
198
|
+
# Restore OpenAI client
|
|
199
|
+
if self._patched.openai.create or self._patched.openai.create_stream:
|
|
200
|
+
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
|
201
|
+
if self._patched.openai.create:
|
|
202
|
+
OpenAIChatCompletionClient.create = self._patched.openai.create
|
|
203
|
+
if self._patched.openai.create_stream:
|
|
204
|
+
OpenAIChatCompletionClient.create_stream = self._patched.openai.create_stream
|
|
205
|
+
logger.debug("Restored OpenAIChatCompletionClient methods")
|
|
206
|
+
|
|
207
|
+
# Restore Azure client
|
|
208
|
+
if self._patched.azure.create or self._patched.azure.create_stream:
|
|
209
|
+
from autogen_ext.models.openai import AzureOpenAIChatCompletionClient
|
|
210
|
+
if self._patched.azure.create:
|
|
211
|
+
AzureOpenAIChatCompletionClient.create = self._patched.azure.create
|
|
212
|
+
if self._patched.azure.create_stream:
|
|
213
|
+
AzureOpenAIChatCompletionClient.create_stream = self._patched.azure.create_stream
|
|
214
|
+
logger.debug("Restored AzureOpenAIChatCompletionClient methods")
|
|
215
|
+
|
|
216
|
+
# Restore Bedrock client
|
|
217
|
+
if self._patched.bedrock.create or self._patched.bedrock.create_stream:
|
|
218
|
+
from autogen_ext.models.anthropic import AnthropicBedrockChatCompletionClient
|
|
219
|
+
if self._patched.bedrock.create:
|
|
220
|
+
AnthropicBedrockChatCompletionClient.create = self._patched.bedrock.create
|
|
221
|
+
if self._patched.bedrock.create_stream:
|
|
222
|
+
AnthropicBedrockChatCompletionClient.create_stream = self._patched.bedrock.create_stream
|
|
223
|
+
logger.debug("Restored AnthropicBedrockChatCompletionClient methods")
|
|
224
|
+
|
|
225
|
+
# Reset state
|
|
226
|
+
self._patched = PatchedClients()
|
|
227
|
+
self._instrumented = False
|
|
228
|
+
logger.debug("AutoGenProfilerHandler uninstrumented successfully.")
|
|
229
|
+
|
|
230
|
+
except Exception:
|
|
231
|
+
logger.exception("Failed to uninstrument AutoGenProfilerHandler")
|
|
232
|
+
|
|
233
|
+
def _extract_model_name(self, client: Any) -> str:
|
|
234
|
+
"""Extract model name from AutoGen client instance.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
client: AutoGen chat completion client instance
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
str: Model name or 'unknown_model' if extraction fails
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
raw_config = getattr(client, "_raw_config", {})
|
|
244
|
+
if raw_config and "model" in raw_config:
|
|
245
|
+
return str(raw_config["model"])
|
|
246
|
+
except Exception:
|
|
247
|
+
logger.debug("Failed to extract model from _raw_config")
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
return str(getattr(client, "model", "unknown_model"))
|
|
251
|
+
except Exception:
|
|
252
|
+
return "unknown_model"
|
|
253
|
+
|
|
254
|
+
def _extract_input_text(self, messages: list[Any]) -> str:
|
|
255
|
+
"""Extract text content from message list.
|
|
256
|
+
|
|
257
|
+
Handles both dict-style messages and AutoGen typed message objects
|
|
258
|
+
(UserMessage, AssistantMessage, SystemMessage).
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
messages: List of message dictionaries or AutoGen message objects
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
str: Concatenated text content from messages
|
|
265
|
+
"""
|
|
266
|
+
model_input = ""
|
|
267
|
+
try:
|
|
268
|
+
for message in messages:
|
|
269
|
+
# Handle dict-style messages
|
|
270
|
+
if isinstance(message, dict):
|
|
271
|
+
content = message.get("content", "")
|
|
272
|
+
# Handle AutoGen typed message objects (UserMessage, AssistantMessage, etc.)
|
|
273
|
+
elif hasattr(message, "content"):
|
|
274
|
+
content = message.content
|
|
275
|
+
else:
|
|
276
|
+
# Fallback to string conversion
|
|
277
|
+
content = str(message)
|
|
278
|
+
|
|
279
|
+
if isinstance(content, list):
|
|
280
|
+
for part in content:
|
|
281
|
+
if isinstance(part, dict):
|
|
282
|
+
model_input += str(part.get("text", ""))
|
|
283
|
+
else:
|
|
284
|
+
model_input += str(part)
|
|
285
|
+
else:
|
|
286
|
+
model_input += str(content) if content else ""
|
|
287
|
+
except Exception:
|
|
288
|
+
logger.debug("Error extracting input text from messages")
|
|
289
|
+
return model_input
|
|
290
|
+
|
|
291
|
+
def _extract_output_text(self, output: Any) -> str:
|
|
292
|
+
"""Extract text content from LLM response.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
output: LLM response object
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
str: Concatenated text content from response
|
|
299
|
+
"""
|
|
300
|
+
model_output = ""
|
|
301
|
+
try:
|
|
302
|
+
for content in output.content:
|
|
303
|
+
model_output += str(content) if content else ""
|
|
304
|
+
except Exception:
|
|
305
|
+
logger.debug("Error extracting output text from response")
|
|
306
|
+
return model_output
|
|
307
|
+
|
|
308
|
+
def _extract_usage(self, output: Any) -> dict[str, Any]:
|
|
309
|
+
"""Extract token usage from LLM response.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
output: LLM response object
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
dict: Token usage dictionary
|
|
316
|
+
"""
|
|
317
|
+
try:
|
|
318
|
+
usage_obj = getattr(output, "usage", None)
|
|
319
|
+
if not usage_obj:
|
|
320
|
+
usage_obj = (getattr(output, "model_extra", {}) or {}).get("usage")
|
|
321
|
+
if usage_obj:
|
|
322
|
+
if hasattr(usage_obj, "model_dump"):
|
|
323
|
+
return usage_obj.model_dump()
|
|
324
|
+
elif isinstance(usage_obj, dict):
|
|
325
|
+
return usage_obj
|
|
326
|
+
except Exception:
|
|
327
|
+
logger.debug("Error extracting usage from response")
|
|
328
|
+
return {}
|
|
329
|
+
|
|
330
|
+
def _extract_chat_response(self, output: Any) -> dict[str, Any]:
|
|
331
|
+
"""Extract chat response metadata from LLM response.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
output: LLM response object
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
dict: Chat response metadata
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
choices = getattr(output, "choices", [])
|
|
341
|
+
if choices:
|
|
342
|
+
first_choice = choices[0]
|
|
343
|
+
if hasattr(first_choice, "model_dump"):
|
|
344
|
+
return first_choice.model_dump()
|
|
345
|
+
return getattr(first_choice, "__dict__", {}) or {}
|
|
346
|
+
except Exception:
|
|
347
|
+
logger.debug("Error extracting chat response metadata")
|
|
348
|
+
return {}
|
|
349
|
+
|
|
350
|
+
def _create_llm_wrapper(self, original_func: Callable[..., Any]) -> Callable[..., Any]:
|
|
351
|
+
"""Create wrapper for non-streaming LLM calls.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
original_func: Original create method to wrap
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Callable: Wrapped function with profiling
|
|
358
|
+
"""
|
|
359
|
+
handler = self
|
|
360
|
+
|
|
361
|
+
async def wrapped_llm_call(*args: Any, **kwargs: Any) -> Any:
|
|
362
|
+
now = time.time()
|
|
363
|
+
with handler._lock:
|
|
364
|
+
seconds_between_calls = int(now - handler.last_call_ts)
|
|
365
|
+
|
|
366
|
+
# Extract model info
|
|
367
|
+
client = args[0] if args else None
|
|
368
|
+
model_name = handler._extract_model_name(client) if client else "unknown_model"
|
|
369
|
+
messages = kwargs.get("messages", [])
|
|
370
|
+
model_input = handler._extract_input_text(messages)
|
|
371
|
+
|
|
372
|
+
# Push LLM_START event
|
|
373
|
+
start_payload = IntermediateStepPayload(
|
|
374
|
+
event_type=IntermediateStepType.LLM_START,
|
|
375
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
376
|
+
name=model_name,
|
|
377
|
+
data=StreamEventData(input=model_input),
|
|
378
|
+
metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages)),
|
|
379
|
+
usage_info=UsageInfo(
|
|
380
|
+
token_usage=TokenUsageBaseModel(),
|
|
381
|
+
num_llm_calls=1,
|
|
382
|
+
seconds_between_calls=seconds_between_calls,
|
|
383
|
+
),
|
|
384
|
+
)
|
|
385
|
+
start_uuid = start_payload.UUID
|
|
386
|
+
handler.step_manager.push_intermediate_step(start_payload)
|
|
387
|
+
|
|
388
|
+
# Call original function
|
|
389
|
+
try:
|
|
390
|
+
output = await original_func(*args, **kwargs)
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error("Error during LLM call: %s", e)
|
|
393
|
+
handler.step_manager.push_intermediate_step(
|
|
394
|
+
IntermediateStepPayload(
|
|
395
|
+
event_type=IntermediateStepType.LLM_END,
|
|
396
|
+
span_event_timestamp=time.time(),
|
|
397
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
398
|
+
name=model_name,
|
|
399
|
+
data=StreamEventData(input=model_input, output=str(e)),
|
|
400
|
+
metadata=TraceMetadata(error=str(e)),
|
|
401
|
+
usage_info=UsageInfo(token_usage=TokenUsageBaseModel()),
|
|
402
|
+
UUID=start_uuid,
|
|
403
|
+
))
|
|
404
|
+
with handler._lock:
|
|
405
|
+
handler.last_call_ts = time.time()
|
|
406
|
+
raise
|
|
407
|
+
|
|
408
|
+
# Extract response data
|
|
409
|
+
model_output = handler._extract_output_text(output)
|
|
410
|
+
usage_payload = handler._extract_usage(output)
|
|
411
|
+
chat_resp = handler._extract_chat_response(output)
|
|
412
|
+
|
|
413
|
+
# Push LLM_END event
|
|
414
|
+
end_time = time.time()
|
|
415
|
+
handler.step_manager.push_intermediate_step(
|
|
416
|
+
IntermediateStepPayload(
|
|
417
|
+
event_type=IntermediateStepType.LLM_END,
|
|
418
|
+
span_event_timestamp=end_time,
|
|
419
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
420
|
+
name=model_name,
|
|
421
|
+
data=StreamEventData(input=model_input, output=model_output),
|
|
422
|
+
metadata=TraceMetadata(chat_responses=chat_resp),
|
|
423
|
+
usage_info=UsageInfo(
|
|
424
|
+
token_usage=TokenUsageBaseModel(**usage_payload),
|
|
425
|
+
num_llm_calls=1,
|
|
426
|
+
seconds_between_calls=seconds_between_calls,
|
|
427
|
+
),
|
|
428
|
+
UUID=start_uuid,
|
|
429
|
+
))
|
|
430
|
+
|
|
431
|
+
with handler._lock:
|
|
432
|
+
handler.last_call_ts = end_time
|
|
433
|
+
|
|
434
|
+
return output
|
|
435
|
+
|
|
436
|
+
return wrapped_llm_call
|
|
437
|
+
|
|
438
|
+
def _create_stream_wrapper(self, original_func: Callable[..., Any]) -> Callable[..., Any]:
|
|
439
|
+
"""Create wrapper for streaming LLM calls.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
original_func: Original create_stream method to wrap
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
Callable: Wrapped function with profiling
|
|
446
|
+
"""
|
|
447
|
+
handler = self
|
|
448
|
+
|
|
449
|
+
async def wrapped_stream_call(*args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
|
|
450
|
+
now = time.time()
|
|
451
|
+
with handler._lock:
|
|
452
|
+
seconds_between_calls = int(now - handler.last_call_ts)
|
|
453
|
+
|
|
454
|
+
# Extract model info
|
|
455
|
+
client = args[0] if args else None
|
|
456
|
+
model_name = handler._extract_model_name(client) if client else "unknown_model"
|
|
457
|
+
messages = kwargs.get("messages", [])
|
|
458
|
+
model_input = handler._extract_input_text(messages)
|
|
459
|
+
|
|
460
|
+
# Push LLM_START event
|
|
461
|
+
start_payload = IntermediateStepPayload(
|
|
462
|
+
event_type=IntermediateStepType.LLM_START,
|
|
463
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
464
|
+
name=model_name,
|
|
465
|
+
data=StreamEventData(input=model_input),
|
|
466
|
+
metadata=TraceMetadata(chat_inputs=copy.deepcopy(messages)),
|
|
467
|
+
usage_info=UsageInfo(
|
|
468
|
+
token_usage=TokenUsageBaseModel(),
|
|
469
|
+
num_llm_calls=1,
|
|
470
|
+
seconds_between_calls=seconds_between_calls,
|
|
471
|
+
),
|
|
472
|
+
)
|
|
473
|
+
start_uuid = start_payload.UUID
|
|
474
|
+
handler.step_manager.push_intermediate_step(start_payload)
|
|
475
|
+
|
|
476
|
+
# Collect streaming output
|
|
477
|
+
output_chunks: list[str] = []
|
|
478
|
+
usage_payload: dict[str, Any] = {}
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
async for chunk in original_func(*args, **kwargs):
|
|
482
|
+
# Extract text from chunk if available
|
|
483
|
+
try:
|
|
484
|
+
if hasattr(chunk, "content") and chunk.content:
|
|
485
|
+
output_chunks.append(str(chunk.content))
|
|
486
|
+
# Check for usage in final chunk
|
|
487
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
|
488
|
+
if hasattr(chunk.usage, "model_dump"):
|
|
489
|
+
usage_payload = chunk.usage.model_dump()
|
|
490
|
+
elif isinstance(chunk.usage, dict):
|
|
491
|
+
usage_payload = chunk.usage
|
|
492
|
+
except Exception:
|
|
493
|
+
pass
|
|
494
|
+
yield chunk
|
|
495
|
+
|
|
496
|
+
# Success path - push LLM_END event after stream completes
|
|
497
|
+
end_time = time.time()
|
|
498
|
+
model_output = "".join(output_chunks)
|
|
499
|
+
handler.step_manager.push_intermediate_step(
|
|
500
|
+
IntermediateStepPayload(
|
|
501
|
+
event_type=IntermediateStepType.LLM_END,
|
|
502
|
+
span_event_timestamp=end_time,
|
|
503
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
504
|
+
name=model_name,
|
|
505
|
+
data=StreamEventData(input=model_input, output=model_output),
|
|
506
|
+
metadata=TraceMetadata(chat_responses={}),
|
|
507
|
+
usage_info=UsageInfo(
|
|
508
|
+
token_usage=TokenUsageBaseModel(**usage_payload),
|
|
509
|
+
num_llm_calls=1,
|
|
510
|
+
seconds_between_calls=seconds_between_calls,
|
|
511
|
+
),
|
|
512
|
+
UUID=start_uuid,
|
|
513
|
+
))
|
|
514
|
+
with handler._lock:
|
|
515
|
+
handler.last_call_ts = end_time
|
|
516
|
+
|
|
517
|
+
except Exception as e:
|
|
518
|
+
# Error path - push error LLM_END event
|
|
519
|
+
logger.error("Error during streaming LLM call: %s", e)
|
|
520
|
+
handler.step_manager.push_intermediate_step(
|
|
521
|
+
IntermediateStepPayload(
|
|
522
|
+
event_type=IntermediateStepType.LLM_END,
|
|
523
|
+
span_event_timestamp=time.time(),
|
|
524
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
525
|
+
name=model_name,
|
|
526
|
+
data=StreamEventData(input=model_input, output=str(e)),
|
|
527
|
+
metadata=TraceMetadata(error=str(e)),
|
|
528
|
+
usage_info=UsageInfo(token_usage=TokenUsageBaseModel()),
|
|
529
|
+
UUID=start_uuid,
|
|
530
|
+
))
|
|
531
|
+
with handler._lock:
|
|
532
|
+
handler.last_call_ts = time.time()
|
|
533
|
+
raise
|
|
534
|
+
|
|
535
|
+
return wrapped_stream_call
|
|
536
|
+
|
|
537
|
+
def _create_tool_wrapper(self, original_func: Callable[..., Any]) -> Callable[..., Any]:
|
|
538
|
+
"""Create wrapper for tool execution calls.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
original_func: Original run_json method to wrap
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
Callable: Wrapped function with profiling
|
|
545
|
+
"""
|
|
546
|
+
handler = self
|
|
547
|
+
|
|
548
|
+
async def wrapped_tool_call(*args: Any, **kwargs: Any) -> Any:
|
|
549
|
+
now = time.time()
|
|
550
|
+
with handler._lock:
|
|
551
|
+
seconds_between_calls = int(now - handler.last_call_ts)
|
|
552
|
+
|
|
553
|
+
# Extract tool name
|
|
554
|
+
tool_name = "unknown_tool"
|
|
555
|
+
try:
|
|
556
|
+
tool_name = str(getattr(args[0], "name", "unknown_tool"))
|
|
557
|
+
except Exception:
|
|
558
|
+
logger.debug("Error getting tool name")
|
|
559
|
+
|
|
560
|
+
# Extract tool input
|
|
561
|
+
tool_input = ""
|
|
562
|
+
try:
|
|
563
|
+
if len(args) > 1:
|
|
564
|
+
call_data = args[1]
|
|
565
|
+
if hasattr(call_data, "kwargs"):
|
|
566
|
+
tool_input = str(call_data.kwargs)
|
|
567
|
+
elif isinstance(call_data, dict):
|
|
568
|
+
tool_input = str(call_data.get("kwargs", {}))
|
|
569
|
+
except Exception:
|
|
570
|
+
logger.debug("Error extracting tool input")
|
|
571
|
+
|
|
572
|
+
# Push TOOL_START event
|
|
573
|
+
start_payload = IntermediateStepPayload(
|
|
574
|
+
event_type=IntermediateStepType.TOOL_START,
|
|
575
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
576
|
+
name=tool_name,
|
|
577
|
+
data=StreamEventData(input=tool_input),
|
|
578
|
+
metadata=TraceMetadata(tool_inputs={"input": tool_input}),
|
|
579
|
+
usage_info=UsageInfo(
|
|
580
|
+
token_usage=TokenUsageBaseModel(),
|
|
581
|
+
num_llm_calls=0,
|
|
582
|
+
seconds_between_calls=seconds_between_calls,
|
|
583
|
+
),
|
|
584
|
+
)
|
|
585
|
+
start_uuid = start_payload.UUID
|
|
586
|
+
handler.step_manager.push_intermediate_step(start_payload)
|
|
587
|
+
|
|
588
|
+
# Call original function
|
|
589
|
+
try:
|
|
590
|
+
output = await original_func(*args, **kwargs)
|
|
591
|
+
except Exception as e:
|
|
592
|
+
logger.error("Tool execution failed: %s", e)
|
|
593
|
+
handler.step_manager.push_intermediate_step(
|
|
594
|
+
IntermediateStepPayload(
|
|
595
|
+
event_type=IntermediateStepType.TOOL_END,
|
|
596
|
+
span_event_timestamp=time.time(),
|
|
597
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
598
|
+
name=tool_name,
|
|
599
|
+
data=StreamEventData(input=tool_input, output=str(e)),
|
|
600
|
+
metadata=TraceMetadata(error=str(e)),
|
|
601
|
+
usage_info=UsageInfo(token_usage=TokenUsageBaseModel()),
|
|
602
|
+
UUID=start_uuid,
|
|
603
|
+
))
|
|
604
|
+
with handler._lock:
|
|
605
|
+
handler.last_call_ts = time.time()
|
|
606
|
+
raise
|
|
607
|
+
|
|
608
|
+
# Push TOOL_END event
|
|
609
|
+
end_time = time.time()
|
|
610
|
+
handler.step_manager.push_intermediate_step(
|
|
611
|
+
IntermediateStepPayload(
|
|
612
|
+
event_type=IntermediateStepType.TOOL_END,
|
|
613
|
+
span_event_timestamp=end_time,
|
|
614
|
+
framework=LLMFrameworkEnum.AUTOGEN,
|
|
615
|
+
name=tool_name,
|
|
616
|
+
data=StreamEventData(input=tool_input, output=str(output)),
|
|
617
|
+
metadata=TraceMetadata(tool_outputs={"result": str(output)}),
|
|
618
|
+
usage_info=UsageInfo(token_usage=TokenUsageBaseModel()),
|
|
619
|
+
UUID=start_uuid,
|
|
620
|
+
))
|
|
621
|
+
|
|
622
|
+
with handler._lock:
|
|
623
|
+
handler.last_call_ts = end_time
|
|
624
|
+
|
|
625
|
+
return output
|
|
626
|
+
|
|
627
|
+
return wrapped_tool_call
|