agentreplay 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentreplay/__init__.py +81 -0
- agentreplay/auto_instrument/__init__.py +237 -0
- agentreplay/auto_instrument/openai.py +431 -0
- agentreplay/batching.py +270 -0
- agentreplay/bootstrap.py +202 -0
- agentreplay/circuit_breaker.py +300 -0
- agentreplay/client.py +1560 -0
- agentreplay/config.py +215 -0
- agentreplay/context.py +168 -0
- agentreplay/env_config.py +327 -0
- agentreplay/env_init.py +128 -0
- agentreplay/exceptions.py +92 -0
- agentreplay/genai.py +510 -0
- agentreplay/genai_conventions.py +502 -0
- agentreplay/install_pth.py +159 -0
- agentreplay/langchain_tracer.py +385 -0
- agentreplay/models.py +120 -0
- agentreplay/otel_bridge.py +281 -0
- agentreplay/patch.py +308 -0
- agentreplay/propagation.py +328 -0
- agentreplay/py.typed +3 -0
- agentreplay/retry.py +151 -0
- agentreplay/sampling.py +298 -0
- agentreplay/session.py +164 -0
- agentreplay/sitecustomize.py +73 -0
- agentreplay/span.py +270 -0
- agentreplay/unified.py +465 -0
- agentreplay-0.1.2.dist-info/METADATA +285 -0
- agentreplay-0.1.2.dist-info/RECORD +33 -0
- agentreplay-0.1.2.dist-info/WHEEL +5 -0
- agentreplay-0.1.2.dist-info/entry_points.txt +2 -0
- agentreplay-0.1.2.dist-info/licenses/LICENSE +190 -0
- agentreplay-0.1.2.dist-info/top_level.txt +1 -0
agentreplay/genai.py
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
# Copyright 2025 Sushanth (https://github.com/sushanthpy)
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""OpenTelemetry GenAI Semantic Conventions for Agentreplay SDK.
|
|
16
|
+
|
|
17
|
+
This module provides utilities for tracking LLM calls with proper OpenTelemetry
|
|
18
|
+
GenAI semantic conventions v1.36+.
|
|
19
|
+
|
|
20
|
+
Reference: https://opentelemetry.io/docs/specs/semconv/gen-ai/
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from typing import Dict, List, Optional, Any
|
|
24
|
+
from dataclasses import dataclass, field, asdict
|
|
25
|
+
import json
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class GenAIAttributes:
|
|
30
|
+
"""OpenTelemetry GenAI semantic conventions attributes.
|
|
31
|
+
|
|
32
|
+
This class represents the standard attributes for LLM observability
|
|
33
|
+
according to OpenTelemetry GenAI semantic conventions v1.36+.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# =========================================================================
|
|
37
|
+
# PROVIDER IDENTIFICATION (REQUIRED)
|
|
38
|
+
# =========================================================================
|
|
39
|
+
system: Optional[str] = None # Legacy: "openai", "anthropic", etc.
|
|
40
|
+
provider_name: Optional[str] = None # New: "openai", "anthropic", "aws.bedrock", etc.
|
|
41
|
+
operation_name: Optional[str] = None # "chat", "completion", "embedding"
|
|
42
|
+
|
|
43
|
+
# =========================================================================
|
|
44
|
+
# MODEL INFORMATION (REQUIRED)
|
|
45
|
+
# =========================================================================
|
|
46
|
+
request_model: Optional[str] = None # Model requested
|
|
47
|
+
response_model: Optional[str] = None # Actual model used
|
|
48
|
+
response_id: Optional[str] = None # Provider response ID
|
|
49
|
+
|
|
50
|
+
# =========================================================================
|
|
51
|
+
# TOKEN USAGE (CRITICAL for cost calculation)
|
|
52
|
+
# =========================================================================
|
|
53
|
+
input_tokens: Optional[int] = None
|
|
54
|
+
output_tokens: Optional[int] = None
|
|
55
|
+
total_tokens: Optional[int] = None
|
|
56
|
+
reasoning_tokens: Optional[int] = None # OpenAI o1 models
|
|
57
|
+
cache_read_tokens: Optional[int] = None # Anthropic cache hits
|
|
58
|
+
cache_creation_tokens: Optional[int] = None # Anthropic cache creation
|
|
59
|
+
|
|
60
|
+
# =========================================================================
|
|
61
|
+
# FINISH REASONS
|
|
62
|
+
# =========================================================================
|
|
63
|
+
finish_reasons: Optional[List[str]] = None
|
|
64
|
+
|
|
65
|
+
# =========================================================================
|
|
66
|
+
# REQUEST PARAMETERS / HYPERPARAMETERS (RECOMMENDED)
|
|
67
|
+
# =========================================================================
|
|
68
|
+
temperature: Optional[float] = None
|
|
69
|
+
top_p: Optional[float] = None
|
|
70
|
+
top_k: Optional[float] = None # Anthropic/Google
|
|
71
|
+
max_tokens: Optional[int] = None
|
|
72
|
+
frequency_penalty: Optional[float] = None
|
|
73
|
+
presence_penalty: Optional[float] = None
|
|
74
|
+
stop_sequences: Optional[List[str]] = None
|
|
75
|
+
seed: Optional[int] = None # Reproducibility
|
|
76
|
+
choice_count: Optional[int] = None # n parameter
|
|
77
|
+
|
|
78
|
+
# =========================================================================
|
|
79
|
+
# SERVER INFORMATION (REQUIRED for distributed tracing)
|
|
80
|
+
# =========================================================================
|
|
81
|
+
server_address: Optional[str] = None
|
|
82
|
+
server_port: Optional[int] = None
|
|
83
|
+
|
|
84
|
+
# =========================================================================
|
|
85
|
+
# ERROR TRACKING (REQUIRED when errors occur)
|
|
86
|
+
# =========================================================================
|
|
87
|
+
error_type: Optional[str] = None
|
|
88
|
+
|
|
89
|
+
# =========================================================================
|
|
90
|
+
# AGENT ATTRIBUTES (for agentic systems)
|
|
91
|
+
# =========================================================================
|
|
92
|
+
agent_id: Optional[str] = None
|
|
93
|
+
agent_name: Optional[str] = None
|
|
94
|
+
agent_description: Optional[str] = None
|
|
95
|
+
conversation_id: Optional[str] = None
|
|
96
|
+
|
|
97
|
+
# =========================================================================
|
|
98
|
+
# TOOL DEFINITIONS (array of tool schemas)
|
|
99
|
+
# =========================================================================
|
|
100
|
+
tool_definitions: Optional[List[Dict[str, Any]]] = None
|
|
101
|
+
|
|
102
|
+
# =========================================================================
|
|
103
|
+
# STRUCTURED CONTENT
|
|
104
|
+
# =========================================================================
|
|
105
|
+
prompts: List[Dict[str, Any]] = field(default_factory=list)
|
|
106
|
+
completions: List[Dict[str, Any]] = field(default_factory=list)
|
|
107
|
+
system_instructions: Optional[str] = None
|
|
108
|
+
|
|
109
|
+
# =========================================================================
|
|
110
|
+
# ADDITIONAL ATTRIBUTES
|
|
111
|
+
# =========================================================================
|
|
112
|
+
additional: Dict[str, Any] = field(default_factory=dict)
|
|
113
|
+
|
|
114
|
+
def to_attributes_dict(self) -> Dict[str, str]:
|
|
115
|
+
"""Convert to flat attributes dictionary for Agentreplay ingestion.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Dictionary with OpenTelemetry GenAI attribute names as keys.
|
|
119
|
+
"""
|
|
120
|
+
attrs = {}
|
|
121
|
+
|
|
122
|
+
# =====================================================================
|
|
123
|
+
# PROVIDER IDENTIFICATION
|
|
124
|
+
# =====================================================================
|
|
125
|
+
if self.system:
|
|
126
|
+
attrs["gen_ai.system"] = self.system
|
|
127
|
+
if self.provider_name:
|
|
128
|
+
attrs["gen_ai.provider.name"] = self.provider_name
|
|
129
|
+
if self.operation_name:
|
|
130
|
+
attrs["gen_ai.operation.name"] = self.operation_name
|
|
131
|
+
|
|
132
|
+
# =====================================================================
|
|
133
|
+
# MODEL INFORMATION
|
|
134
|
+
# =====================================================================
|
|
135
|
+
if self.request_model:
|
|
136
|
+
attrs["gen_ai.request.model"] = self.request_model
|
|
137
|
+
if self.response_model:
|
|
138
|
+
attrs["gen_ai.response.model"] = self.response_model
|
|
139
|
+
if self.response_id:
|
|
140
|
+
attrs["gen_ai.response.id"] = self.response_id
|
|
141
|
+
|
|
142
|
+
# =====================================================================
|
|
143
|
+
# TOKEN USAGE
|
|
144
|
+
# =====================================================================
|
|
145
|
+
if self.input_tokens is not None:
|
|
146
|
+
attrs["gen_ai.usage.input_tokens"] = str(self.input_tokens)
|
|
147
|
+
if self.output_tokens is not None:
|
|
148
|
+
attrs["gen_ai.usage.output_tokens"] = str(self.output_tokens)
|
|
149
|
+
if self.total_tokens is not None:
|
|
150
|
+
attrs["gen_ai.usage.total_tokens"] = str(self.total_tokens)
|
|
151
|
+
if self.reasoning_tokens is not None:
|
|
152
|
+
attrs["gen_ai.usage.reasoning_tokens"] = str(self.reasoning_tokens)
|
|
153
|
+
if self.cache_read_tokens is not None:
|
|
154
|
+
attrs["gen_ai.usage.cache_read_tokens"] = str(self.cache_read_tokens)
|
|
155
|
+
if self.cache_creation_tokens is not None:
|
|
156
|
+
attrs["gen_ai.usage.cache_creation_tokens"] = str(self.cache_creation_tokens)
|
|
157
|
+
|
|
158
|
+
# =====================================================================
|
|
159
|
+
# FINISH REASONS
|
|
160
|
+
# =====================================================================
|
|
161
|
+
if self.finish_reasons:
|
|
162
|
+
attrs["gen_ai.response.finish_reasons"] = json.dumps(self.finish_reasons)
|
|
163
|
+
|
|
164
|
+
# =====================================================================
|
|
165
|
+
# REQUEST PARAMETERS / HYPERPARAMETERS
|
|
166
|
+
# =====================================================================
|
|
167
|
+
if self.temperature is not None:
|
|
168
|
+
attrs["gen_ai.request.temperature"] = str(self.temperature)
|
|
169
|
+
if self.top_p is not None:
|
|
170
|
+
attrs["gen_ai.request.top_p"] = str(self.top_p)
|
|
171
|
+
if self.top_k is not None:
|
|
172
|
+
attrs["gen_ai.request.top_k"] = str(self.top_k)
|
|
173
|
+
if self.max_tokens is not None:
|
|
174
|
+
attrs["gen_ai.request.max_tokens"] = str(self.max_tokens)
|
|
175
|
+
if self.frequency_penalty is not None:
|
|
176
|
+
attrs["gen_ai.request.frequency_penalty"] = str(self.frequency_penalty)
|
|
177
|
+
if self.presence_penalty is not None:
|
|
178
|
+
attrs["gen_ai.request.presence_penalty"] = str(self.presence_penalty)
|
|
179
|
+
if self.stop_sequences:
|
|
180
|
+
attrs["gen_ai.request.stop_sequences"] = json.dumps(self.stop_sequences)
|
|
181
|
+
if self.seed is not None:
|
|
182
|
+
attrs["gen_ai.request.seed"] = str(self.seed)
|
|
183
|
+
if self.choice_count is not None:
|
|
184
|
+
attrs["gen_ai.request.choice.count"] = str(self.choice_count)
|
|
185
|
+
|
|
186
|
+
# =====================================================================
|
|
187
|
+
# SERVER INFORMATION
|
|
188
|
+
# =====================================================================
|
|
189
|
+
if self.server_address:
|
|
190
|
+
attrs["server.address"] = self.server_address
|
|
191
|
+
if self.server_port is not None:
|
|
192
|
+
attrs["server.port"] = str(self.server_port)
|
|
193
|
+
|
|
194
|
+
# =====================================================================
|
|
195
|
+
# ERROR TRACKING
|
|
196
|
+
# =====================================================================
|
|
197
|
+
if self.error_type:
|
|
198
|
+
attrs["error.type"] = self.error_type
|
|
199
|
+
|
|
200
|
+
# =====================================================================
|
|
201
|
+
# AGENT ATTRIBUTES
|
|
202
|
+
# =====================================================================
|
|
203
|
+
if self.agent_id:
|
|
204
|
+
attrs["gen_ai.agent.id"] = self.agent_id
|
|
205
|
+
if self.agent_name:
|
|
206
|
+
attrs["gen_ai.agent.name"] = self.agent_name
|
|
207
|
+
if self.agent_description:
|
|
208
|
+
attrs["gen_ai.agent.description"] = self.agent_description
|
|
209
|
+
if self.conversation_id:
|
|
210
|
+
attrs["gen_ai.conversation.id"] = self.conversation_id
|
|
211
|
+
|
|
212
|
+
# =====================================================================
|
|
213
|
+
# TOOL DEFINITIONS
|
|
214
|
+
# =====================================================================
|
|
215
|
+
if self.tool_definitions:
|
|
216
|
+
attrs["gen_ai.tool.definitions"] = json.dumps(self.tool_definitions)
|
|
217
|
+
|
|
218
|
+
# =====================================================================
|
|
219
|
+
# SYSTEM INSTRUCTIONS
|
|
220
|
+
# =====================================================================
|
|
221
|
+
if self.system_instructions:
|
|
222
|
+
attrs["gen_ai.system_instructions"] = self.system_instructions
|
|
223
|
+
|
|
224
|
+
# =====================================================================
|
|
225
|
+
# STRUCTURED PROMPTS
|
|
226
|
+
# =====================================================================
|
|
227
|
+
for i, prompt in enumerate(self.prompts):
|
|
228
|
+
if "role" in prompt:
|
|
229
|
+
attrs[f"gen_ai.prompt.{i}.role"] = prompt["role"]
|
|
230
|
+
if "content" in prompt:
|
|
231
|
+
attrs[f"gen_ai.prompt.{i}.content"] = prompt["content"]
|
|
232
|
+
|
|
233
|
+
# Structured completions
|
|
234
|
+
for i, completion in enumerate(self.completions):
|
|
235
|
+
if "role" in completion:
|
|
236
|
+
attrs[f"gen_ai.completion.{i}.role"] = completion["role"]
|
|
237
|
+
if "content" in completion:
|
|
238
|
+
attrs[f"gen_ai.completion.{i}.content"] = completion["content"]
|
|
239
|
+
if "finish_reason" in completion:
|
|
240
|
+
attrs[f"gen_ai.completion.{i}.finish_reason"] = completion["finish_reason"]
|
|
241
|
+
|
|
242
|
+
# Additional attributes
|
|
243
|
+
for key, value in self.additional.items():
|
|
244
|
+
attrs[key] = str(value)
|
|
245
|
+
|
|
246
|
+
return attrs
|
|
247
|
+
|
|
248
|
+
@classmethod
|
|
249
|
+
def from_openai_response(
|
|
250
|
+
cls,
|
|
251
|
+
response: Any,
|
|
252
|
+
request_params: Optional[Dict] = None,
|
|
253
|
+
server_address: str = "api.openai.com",
|
|
254
|
+
server_port: int = 443,
|
|
255
|
+
) -> "GenAIAttributes":
|
|
256
|
+
"""Create GenAI attributes from OpenAI API response.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
response: OpenAI API response object
|
|
260
|
+
request_params: Optional request parameters (temperature, etc.)
|
|
261
|
+
server_address: API server address (default: api.openai.com)
|
|
262
|
+
server_port: API server port (default: 443)
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
GenAIAttributes instance with OTEL-compliant attributes
|
|
266
|
+
"""
|
|
267
|
+
request_params = request_params or {}
|
|
268
|
+
|
|
269
|
+
attrs = cls(
|
|
270
|
+
system="openai",
|
|
271
|
+
provider_name="openai", # New OTEL attribute
|
|
272
|
+
operation_name="chat",
|
|
273
|
+
request_model=request_params.get("model"),
|
|
274
|
+
response_model=getattr(response, "model", None),
|
|
275
|
+
response_id=getattr(response, "id", None),
|
|
276
|
+
server_address=server_address,
|
|
277
|
+
server_port=server_port,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Extract token usage
|
|
281
|
+
if hasattr(response, "usage") and response.usage:
|
|
282
|
+
usage = response.usage
|
|
283
|
+
attrs.input_tokens = getattr(usage, "prompt_tokens", None)
|
|
284
|
+
attrs.output_tokens = getattr(usage, "completion_tokens", None)
|
|
285
|
+
attrs.total_tokens = getattr(usage, "total_tokens", None)
|
|
286
|
+
|
|
287
|
+
# Handle o1 reasoning tokens
|
|
288
|
+
if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
|
|
289
|
+
details = usage.completion_tokens_details
|
|
290
|
+
attrs.reasoning_tokens = getattr(details, "reasoning_tokens", None)
|
|
291
|
+
|
|
292
|
+
# Handle cached tokens
|
|
293
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
294
|
+
details = usage.prompt_tokens_details
|
|
295
|
+
attrs.cache_read_tokens = getattr(details, "cached_tokens", None)
|
|
296
|
+
|
|
297
|
+
# Extract all hyperparameters
|
|
298
|
+
attrs.temperature = request_params.get("temperature")
|
|
299
|
+
attrs.top_p = request_params.get("top_p")
|
|
300
|
+
attrs.max_tokens = request_params.get("max_tokens")
|
|
301
|
+
attrs.frequency_penalty = request_params.get("frequency_penalty")
|
|
302
|
+
attrs.presence_penalty = request_params.get("presence_penalty")
|
|
303
|
+
attrs.seed = request_params.get("seed")
|
|
304
|
+
attrs.choice_count = request_params.get("n")
|
|
305
|
+
|
|
306
|
+
# Stop sequences
|
|
307
|
+
stop = request_params.get("stop")
|
|
308
|
+
if stop:
|
|
309
|
+
attrs.stop_sequences = stop if isinstance(stop, list) else [stop]
|
|
310
|
+
|
|
311
|
+
# Extract tool definitions if provided
|
|
312
|
+
tools = request_params.get("tools")
|
|
313
|
+
if tools:
|
|
314
|
+
attrs.tool_definitions = tools
|
|
315
|
+
|
|
316
|
+
# Extract prompts
|
|
317
|
+
if "messages" in request_params:
|
|
318
|
+
attrs.prompts = [
|
|
319
|
+
{"role": msg.get("role"), "content": msg.get("content")}
|
|
320
|
+
for msg in request_params["messages"]
|
|
321
|
+
]
|
|
322
|
+
# Extract system instructions
|
|
323
|
+
for msg in request_params["messages"]:
|
|
324
|
+
if msg.get("role") == "system":
|
|
325
|
+
attrs.system_instructions = msg.get("content")
|
|
326
|
+
break
|
|
327
|
+
|
|
328
|
+
# Extract completions
|
|
329
|
+
if hasattr(response, "choices"):
|
|
330
|
+
attrs.completions = []
|
|
331
|
+
attrs.finish_reasons = []
|
|
332
|
+
for choice in response.choices:
|
|
333
|
+
if hasattr(choice, "message"):
|
|
334
|
+
attrs.completions.append({
|
|
335
|
+
"role": getattr(choice.message, "role", "assistant"),
|
|
336
|
+
"content": getattr(choice.message, "content", ""),
|
|
337
|
+
})
|
|
338
|
+
if hasattr(choice, "finish_reason"):
|
|
339
|
+
attrs.finish_reasons.append(choice.finish_reason)
|
|
340
|
+
|
|
341
|
+
attrs.server_address = "api.openai.com"
|
|
342
|
+
attrs.server_port = 443
|
|
343
|
+
|
|
344
|
+
return attrs
|
|
345
|
+
|
|
346
|
+
@classmethod
|
|
347
|
+
def from_anthropic_response(
|
|
348
|
+
cls,
|
|
349
|
+
response: Any,
|
|
350
|
+
request_params: Optional[Dict] = None,
|
|
351
|
+
server_address: str = "api.anthropic.com",
|
|
352
|
+
server_port: int = 443,
|
|
353
|
+
) -> "GenAIAttributes":
|
|
354
|
+
"""Create GenAI attributes from Anthropic API response.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
response: Anthropic API response object
|
|
358
|
+
request_params: Optional request parameters
|
|
359
|
+
server_address: API server address (default: api.anthropic.com)
|
|
360
|
+
server_port: API server port (default: 443)
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
GenAIAttributes instance with OTEL-compliant attributes
|
|
364
|
+
"""
|
|
365
|
+
request_params = request_params or {}
|
|
366
|
+
|
|
367
|
+
attrs = cls(
|
|
368
|
+
system="anthropic",
|
|
369
|
+
provider_name="anthropic", # New OTEL attribute
|
|
370
|
+
operation_name="chat",
|
|
371
|
+
request_model=request_params.get("model"),
|
|
372
|
+
response_model=getattr(response, "model", None),
|
|
373
|
+
response_id=getattr(response, "id", None),
|
|
374
|
+
server_address=server_address,
|
|
375
|
+
server_port=server_port,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Extract token usage
|
|
379
|
+
if hasattr(response, "usage") and response.usage:
|
|
380
|
+
usage = response.usage
|
|
381
|
+
attrs.input_tokens = getattr(usage, "input_tokens", None)
|
|
382
|
+
attrs.output_tokens = getattr(usage, "output_tokens", None)
|
|
383
|
+
|
|
384
|
+
# Calculate total
|
|
385
|
+
if attrs.input_tokens and attrs.output_tokens:
|
|
386
|
+
attrs.total_tokens = attrs.input_tokens + attrs.output_tokens
|
|
387
|
+
|
|
388
|
+
# Extract cache tokens (Anthropic prompt caching)
|
|
389
|
+
if hasattr(usage, "cache_read_input_tokens"):
|
|
390
|
+
attrs.cache_read_tokens = usage.cache_read_input_tokens
|
|
391
|
+
if hasattr(usage, "cache_creation_input_tokens"):
|
|
392
|
+
attrs.cache_creation_tokens = usage.cache_creation_input_tokens
|
|
393
|
+
|
|
394
|
+
# Extract all hyperparameters
|
|
395
|
+
attrs.temperature = request_params.get("temperature")
|
|
396
|
+
attrs.top_p = request_params.get("top_p")
|
|
397
|
+
attrs.top_k = request_params.get("top_k") # Anthropic-specific
|
|
398
|
+
attrs.max_tokens = request_params.get("max_tokens")
|
|
399
|
+
|
|
400
|
+
# Stop sequences
|
|
401
|
+
stop = request_params.get("stop_sequences")
|
|
402
|
+
if stop:
|
|
403
|
+
attrs.stop_sequences = stop if isinstance(stop, list) else [stop]
|
|
404
|
+
|
|
405
|
+
# Extract tool definitions if provided
|
|
406
|
+
tools = request_params.get("tools")
|
|
407
|
+
if tools:
|
|
408
|
+
attrs.tool_definitions = tools
|
|
409
|
+
|
|
410
|
+
# Extract prompts
|
|
411
|
+
if "messages" in request_params:
|
|
412
|
+
attrs.prompts = [
|
|
413
|
+
{"role": msg.get("role"), "content": msg.get("content")}
|
|
414
|
+
for msg in request_params["messages"]
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
# Add system prompt if present (Anthropic uses separate system param)
|
|
418
|
+
if "system" in request_params:
|
|
419
|
+
attrs.system_instructions = request_params["system"]
|
|
420
|
+
attrs.prompts.insert(0, {
|
|
421
|
+
"role": "system",
|
|
422
|
+
"content": request_params["system"]
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
# Extract completions
|
|
426
|
+
if hasattr(response, "content") and response.content:
|
|
427
|
+
attrs.completions = []
|
|
428
|
+
for content_block in response.content:
|
|
429
|
+
if hasattr(content_block, "text"):
|
|
430
|
+
attrs.completions.append({
|
|
431
|
+
"role": "assistant",
|
|
432
|
+
"content": content_block.text,
|
|
433
|
+
})
|
|
434
|
+
elif hasattr(content_block, "type") and content_block.type == "tool_use":
|
|
435
|
+
# Tool use content block
|
|
436
|
+
attrs.completions.append({
|
|
437
|
+
"role": "assistant",
|
|
438
|
+
"content": f"[tool_use: {getattr(content_block, 'name', 'unknown')}]",
|
|
439
|
+
})
|
|
440
|
+
|
|
441
|
+
if hasattr(response, "stop_reason"):
|
|
442
|
+
attrs.finish_reasons = [response.stop_reason]
|
|
443
|
+
|
|
444
|
+
return attrs
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def calculate_cost(attrs: GenAIAttributes) -> float:
|
|
448
|
+
"""Calculate cost based on token usage and model pricing.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
attrs: GenAI attributes with token counts
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
Estimated cost in USD
|
|
455
|
+
"""
|
|
456
|
+
if not attrs.system or not attrs.request_model:
|
|
457
|
+
return 0.0
|
|
458
|
+
|
|
459
|
+
input_tokens = attrs.input_tokens or 0
|
|
460
|
+
output_tokens = attrs.output_tokens or 0
|
|
461
|
+
reasoning_tokens = attrs.reasoning_tokens or 0
|
|
462
|
+
cache_tokens = attrs.cache_read_tokens or 0
|
|
463
|
+
|
|
464
|
+
# Model pricing (per 1M tokens)
|
|
465
|
+
pricing = _get_model_pricing(attrs.system, attrs.request_model)
|
|
466
|
+
|
|
467
|
+
# Calculate regular input cost (excluding cached tokens)
|
|
468
|
+
regular_input = max(0, input_tokens - cache_tokens)
|
|
469
|
+
input_cost = (regular_input / 1_000_000) * pricing["input"]
|
|
470
|
+
|
|
471
|
+
# Cache cost (90% discount for Anthropic)
|
|
472
|
+
cache_cost = (cache_tokens / 1_000_000) * pricing.get("cache", pricing["input"] * 0.1)
|
|
473
|
+
|
|
474
|
+
# Output cost
|
|
475
|
+
output_cost = (output_tokens / 1_000_000) * pricing["output"]
|
|
476
|
+
|
|
477
|
+
# Reasoning cost (for o1 models)
|
|
478
|
+
reasoning_cost = (reasoning_tokens / 1_000_000) * pricing.get("reasoning", 0.0)
|
|
479
|
+
|
|
480
|
+
return input_cost + cache_cost + output_cost + reasoning_cost
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _get_model_pricing(system: str, model: str) -> Dict[str, float]:
|
|
484
|
+
"""Get pricing for a model (per 1M tokens)."""
|
|
485
|
+
# OpenAI models
|
|
486
|
+
if system == "openai":
|
|
487
|
+
if "gpt-4o" in model and "mini" not in model:
|
|
488
|
+
return {"input": 2.50, "output": 10.0}
|
|
489
|
+
elif "gpt-4o-mini" in model:
|
|
490
|
+
return {"input": 0.15, "output": 0.60}
|
|
491
|
+
elif "gpt-4-turbo" in model:
|
|
492
|
+
return {"input": 10.0, "output": 30.0}
|
|
493
|
+
elif "o1-preview" in model:
|
|
494
|
+
return {"input": 15.0, "output": 60.0, "reasoning": 15.0}
|
|
495
|
+
elif "o1-mini" in model:
|
|
496
|
+
return {"input": 3.0, "output": 12.0, "reasoning": 3.0}
|
|
497
|
+
|
|
498
|
+
# Anthropic models
|
|
499
|
+
elif system == "anthropic":
|
|
500
|
+
if "claude-3-5-sonnet" in model:
|
|
501
|
+
return {"input": 3.0, "output": 15.0, "cache": 0.30}
|
|
502
|
+
elif "claude-3-opus" in model:
|
|
503
|
+
return {"input": 15.0, "output": 75.0, "cache": 1.50}
|
|
504
|
+
elif "claude-3-sonnet" in model:
|
|
505
|
+
return {"input": 3.0, "output": 15.0, "cache": 0.30}
|
|
506
|
+
elif "claude-3-haiku" in model:
|
|
507
|
+
return {"input": 0.25, "output": 1.25, "cache": 0.03}
|
|
508
|
+
|
|
509
|
+
# Default pricing
|
|
510
|
+
return {"input": 10.0, "output": 30.0}
|