agentreplay 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentreplay/genai.py ADDED
@@ -0,0 +1,510 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """OpenTelemetry GenAI Semantic Conventions for Agentreplay SDK.
16
+
17
+ This module provides utilities for tracking LLM calls with proper OpenTelemetry
18
+ GenAI semantic conventions v1.36+.
19
+
20
+ Reference: https://opentelemetry.io/docs/specs/semconv/gen-ai/
21
+ """
22
+
23
+ from typing import Dict, List, Optional, Any
24
+ from dataclasses import dataclass, field, asdict
25
+ import json
26
+
27
+
28
+ @dataclass
29
+ class GenAIAttributes:
30
+ """OpenTelemetry GenAI semantic conventions attributes.
31
+
32
+ This class represents the standard attributes for LLM observability
33
+ according to OpenTelemetry GenAI semantic conventions v1.36+.
34
+ """
35
+
36
+ # =========================================================================
37
+ # PROVIDER IDENTIFICATION (REQUIRED)
38
+ # =========================================================================
39
+ system: Optional[str] = None # Legacy: "openai", "anthropic", etc.
40
+ provider_name: Optional[str] = None # New: "openai", "anthropic", "aws.bedrock", etc.
41
+ operation_name: Optional[str] = None # "chat", "completion", "embedding"
42
+
43
+ # =========================================================================
44
+ # MODEL INFORMATION (REQUIRED)
45
+ # =========================================================================
46
+ request_model: Optional[str] = None # Model requested
47
+ response_model: Optional[str] = None # Actual model used
48
+ response_id: Optional[str] = None # Provider response ID
49
+
50
+ # =========================================================================
51
+ # TOKEN USAGE (CRITICAL for cost calculation)
52
+ # =========================================================================
53
+ input_tokens: Optional[int] = None
54
+ output_tokens: Optional[int] = None
55
+ total_tokens: Optional[int] = None
56
+ reasoning_tokens: Optional[int] = None # OpenAI o1 models
57
+ cache_read_tokens: Optional[int] = None # Anthropic cache hits
58
+ cache_creation_tokens: Optional[int] = None # Anthropic cache creation
59
+
60
+ # =========================================================================
61
+ # FINISH REASONS
62
+ # =========================================================================
63
+ finish_reasons: Optional[List[str]] = None
64
+
65
+ # =========================================================================
66
+ # REQUEST PARAMETERS / HYPERPARAMETERS (RECOMMENDED)
67
+ # =========================================================================
68
+ temperature: Optional[float] = None
69
+ top_p: Optional[float] = None
70
+ top_k: Optional[float] = None # Anthropic/Google
71
+ max_tokens: Optional[int] = None
72
+ frequency_penalty: Optional[float] = None
73
+ presence_penalty: Optional[float] = None
74
+ stop_sequences: Optional[List[str]] = None
75
+ seed: Optional[int] = None # Reproducibility
76
+ choice_count: Optional[int] = None # n parameter
77
+
78
+ # =========================================================================
79
+ # SERVER INFORMATION (REQUIRED for distributed tracing)
80
+ # =========================================================================
81
+ server_address: Optional[str] = None
82
+ server_port: Optional[int] = None
83
+
84
+ # =========================================================================
85
+ # ERROR TRACKING (REQUIRED when errors occur)
86
+ # =========================================================================
87
+ error_type: Optional[str] = None
88
+
89
+ # =========================================================================
90
+ # AGENT ATTRIBUTES (for agentic systems)
91
+ # =========================================================================
92
+ agent_id: Optional[str] = None
93
+ agent_name: Optional[str] = None
94
+ agent_description: Optional[str] = None
95
+ conversation_id: Optional[str] = None
96
+
97
+ # =========================================================================
98
+ # TOOL DEFINITIONS (array of tool schemas)
99
+ # =========================================================================
100
+ tool_definitions: Optional[List[Dict[str, Any]]] = None
101
+
102
+ # =========================================================================
103
+ # STRUCTURED CONTENT
104
+ # =========================================================================
105
+ prompts: List[Dict[str, Any]] = field(default_factory=list)
106
+ completions: List[Dict[str, Any]] = field(default_factory=list)
107
+ system_instructions: Optional[str] = None
108
+
109
+ # =========================================================================
110
+ # ADDITIONAL ATTRIBUTES
111
+ # =========================================================================
112
+ additional: Dict[str, Any] = field(default_factory=dict)
113
+
114
+ def to_attributes_dict(self) -> Dict[str, str]:
115
+ """Convert to flat attributes dictionary for Agentreplay ingestion.
116
+
117
+ Returns:
118
+ Dictionary with OpenTelemetry GenAI attribute names as keys.
119
+ """
120
+ attrs = {}
121
+
122
+ # =====================================================================
123
+ # PROVIDER IDENTIFICATION
124
+ # =====================================================================
125
+ if self.system:
126
+ attrs["gen_ai.system"] = self.system
127
+ if self.provider_name:
128
+ attrs["gen_ai.provider.name"] = self.provider_name
129
+ if self.operation_name:
130
+ attrs["gen_ai.operation.name"] = self.operation_name
131
+
132
+ # =====================================================================
133
+ # MODEL INFORMATION
134
+ # =====================================================================
135
+ if self.request_model:
136
+ attrs["gen_ai.request.model"] = self.request_model
137
+ if self.response_model:
138
+ attrs["gen_ai.response.model"] = self.response_model
139
+ if self.response_id:
140
+ attrs["gen_ai.response.id"] = self.response_id
141
+
142
+ # =====================================================================
143
+ # TOKEN USAGE
144
+ # =====================================================================
145
+ if self.input_tokens is not None:
146
+ attrs["gen_ai.usage.input_tokens"] = str(self.input_tokens)
147
+ if self.output_tokens is not None:
148
+ attrs["gen_ai.usage.output_tokens"] = str(self.output_tokens)
149
+ if self.total_tokens is not None:
150
+ attrs["gen_ai.usage.total_tokens"] = str(self.total_tokens)
151
+ if self.reasoning_tokens is not None:
152
+ attrs["gen_ai.usage.reasoning_tokens"] = str(self.reasoning_tokens)
153
+ if self.cache_read_tokens is not None:
154
+ attrs["gen_ai.usage.cache_read_tokens"] = str(self.cache_read_tokens)
155
+ if self.cache_creation_tokens is not None:
156
+ attrs["gen_ai.usage.cache_creation_tokens"] = str(self.cache_creation_tokens)
157
+
158
+ # =====================================================================
159
+ # FINISH REASONS
160
+ # =====================================================================
161
+ if self.finish_reasons:
162
+ attrs["gen_ai.response.finish_reasons"] = json.dumps(self.finish_reasons)
163
+
164
+ # =====================================================================
165
+ # REQUEST PARAMETERS / HYPERPARAMETERS
166
+ # =====================================================================
167
+ if self.temperature is not None:
168
+ attrs["gen_ai.request.temperature"] = str(self.temperature)
169
+ if self.top_p is not None:
170
+ attrs["gen_ai.request.top_p"] = str(self.top_p)
171
+ if self.top_k is not None:
172
+ attrs["gen_ai.request.top_k"] = str(self.top_k)
173
+ if self.max_tokens is not None:
174
+ attrs["gen_ai.request.max_tokens"] = str(self.max_tokens)
175
+ if self.frequency_penalty is not None:
176
+ attrs["gen_ai.request.frequency_penalty"] = str(self.frequency_penalty)
177
+ if self.presence_penalty is not None:
178
+ attrs["gen_ai.request.presence_penalty"] = str(self.presence_penalty)
179
+ if self.stop_sequences:
180
+ attrs["gen_ai.request.stop_sequences"] = json.dumps(self.stop_sequences)
181
+ if self.seed is not None:
182
+ attrs["gen_ai.request.seed"] = str(self.seed)
183
+ if self.choice_count is not None:
184
+ attrs["gen_ai.request.choice.count"] = str(self.choice_count)
185
+
186
+ # =====================================================================
187
+ # SERVER INFORMATION
188
+ # =====================================================================
189
+ if self.server_address:
190
+ attrs["server.address"] = self.server_address
191
+ if self.server_port is not None:
192
+ attrs["server.port"] = str(self.server_port)
193
+
194
+ # =====================================================================
195
+ # ERROR TRACKING
196
+ # =====================================================================
197
+ if self.error_type:
198
+ attrs["error.type"] = self.error_type
199
+
200
+ # =====================================================================
201
+ # AGENT ATTRIBUTES
202
+ # =====================================================================
203
+ if self.agent_id:
204
+ attrs["gen_ai.agent.id"] = self.agent_id
205
+ if self.agent_name:
206
+ attrs["gen_ai.agent.name"] = self.agent_name
207
+ if self.agent_description:
208
+ attrs["gen_ai.agent.description"] = self.agent_description
209
+ if self.conversation_id:
210
+ attrs["gen_ai.conversation.id"] = self.conversation_id
211
+
212
+ # =====================================================================
213
+ # TOOL DEFINITIONS
214
+ # =====================================================================
215
+ if self.tool_definitions:
216
+ attrs["gen_ai.tool.definitions"] = json.dumps(self.tool_definitions)
217
+
218
+ # =====================================================================
219
+ # SYSTEM INSTRUCTIONS
220
+ # =====================================================================
221
+ if self.system_instructions:
222
+ attrs["gen_ai.system_instructions"] = self.system_instructions
223
+
224
+ # =====================================================================
225
+ # STRUCTURED PROMPTS
226
+ # =====================================================================
227
+ for i, prompt in enumerate(self.prompts):
228
+ if "role" in prompt:
229
+ attrs[f"gen_ai.prompt.{i}.role"] = prompt["role"]
230
+ if "content" in prompt:
231
+ attrs[f"gen_ai.prompt.{i}.content"] = prompt["content"]
232
+
233
+ # Structured completions
234
+ for i, completion in enumerate(self.completions):
235
+ if "role" in completion:
236
+ attrs[f"gen_ai.completion.{i}.role"] = completion["role"]
237
+ if "content" in completion:
238
+ attrs[f"gen_ai.completion.{i}.content"] = completion["content"]
239
+ if "finish_reason" in completion:
240
+ attrs[f"gen_ai.completion.{i}.finish_reason"] = completion["finish_reason"]
241
+
242
+ # Additional attributes
243
+ for key, value in self.additional.items():
244
+ attrs[key] = str(value)
245
+
246
+ return attrs
247
+
248
+ @classmethod
249
+ def from_openai_response(
250
+ cls,
251
+ response: Any,
252
+ request_params: Optional[Dict] = None,
253
+ server_address: str = "api.openai.com",
254
+ server_port: int = 443,
255
+ ) -> "GenAIAttributes":
256
+ """Create GenAI attributes from OpenAI API response.
257
+
258
+ Args:
259
+ response: OpenAI API response object
260
+ request_params: Optional request parameters (temperature, etc.)
261
+ server_address: API server address (default: api.openai.com)
262
+ server_port: API server port (default: 443)
263
+
264
+ Returns:
265
+ GenAIAttributes instance with OTEL-compliant attributes
266
+ """
267
+ request_params = request_params or {}
268
+
269
+ attrs = cls(
270
+ system="openai",
271
+ provider_name="openai", # New OTEL attribute
272
+ operation_name="chat",
273
+ request_model=request_params.get("model"),
274
+ response_model=getattr(response, "model", None),
275
+ response_id=getattr(response, "id", None),
276
+ server_address=server_address,
277
+ server_port=server_port,
278
+ )
279
+
280
+ # Extract token usage
281
+ if hasattr(response, "usage") and response.usage:
282
+ usage = response.usage
283
+ attrs.input_tokens = getattr(usage, "prompt_tokens", None)
284
+ attrs.output_tokens = getattr(usage, "completion_tokens", None)
285
+ attrs.total_tokens = getattr(usage, "total_tokens", None)
286
+
287
+ # Handle o1 reasoning tokens
288
+ if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
289
+ details = usage.completion_tokens_details
290
+ attrs.reasoning_tokens = getattr(details, "reasoning_tokens", None)
291
+
292
+ # Handle cached tokens
293
+ if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
294
+ details = usage.prompt_tokens_details
295
+ attrs.cache_read_tokens = getattr(details, "cached_tokens", None)
296
+
297
+ # Extract all hyperparameters
298
+ attrs.temperature = request_params.get("temperature")
299
+ attrs.top_p = request_params.get("top_p")
300
+ attrs.max_tokens = request_params.get("max_tokens")
301
+ attrs.frequency_penalty = request_params.get("frequency_penalty")
302
+ attrs.presence_penalty = request_params.get("presence_penalty")
303
+ attrs.seed = request_params.get("seed")
304
+ attrs.choice_count = request_params.get("n")
305
+
306
+ # Stop sequences
307
+ stop = request_params.get("stop")
308
+ if stop:
309
+ attrs.stop_sequences = stop if isinstance(stop, list) else [stop]
310
+
311
+ # Extract tool definitions if provided
312
+ tools = request_params.get("tools")
313
+ if tools:
314
+ attrs.tool_definitions = tools
315
+
316
+ # Extract prompts
317
+ if "messages" in request_params:
318
+ attrs.prompts = [
319
+ {"role": msg.get("role"), "content": msg.get("content")}
320
+ for msg in request_params["messages"]
321
+ ]
322
+ # Extract system instructions
323
+ for msg in request_params["messages"]:
324
+ if msg.get("role") == "system":
325
+ attrs.system_instructions = msg.get("content")
326
+ break
327
+
328
+ # Extract completions
329
+ if hasattr(response, "choices"):
330
+ attrs.completions = []
331
+ attrs.finish_reasons = []
332
+ for choice in response.choices:
333
+ if hasattr(choice, "message"):
334
+ attrs.completions.append({
335
+ "role": getattr(choice.message, "role", "assistant"),
336
+ "content": getattr(choice.message, "content", ""),
337
+ })
338
+ if hasattr(choice, "finish_reason"):
339
+ attrs.finish_reasons.append(choice.finish_reason)
340
+
341
+ attrs.server_address = "api.openai.com"
342
+ attrs.server_port = 443
343
+
344
+ return attrs
345
+
346
+ @classmethod
347
+ def from_anthropic_response(
348
+ cls,
349
+ response: Any,
350
+ request_params: Optional[Dict] = None,
351
+ server_address: str = "api.anthropic.com",
352
+ server_port: int = 443,
353
+ ) -> "GenAIAttributes":
354
+ """Create GenAI attributes from Anthropic API response.
355
+
356
+ Args:
357
+ response: Anthropic API response object
358
+ request_params: Optional request parameters
359
+ server_address: API server address (default: api.anthropic.com)
360
+ server_port: API server port (default: 443)
361
+
362
+ Returns:
363
+ GenAIAttributes instance with OTEL-compliant attributes
364
+ """
365
+ request_params = request_params or {}
366
+
367
+ attrs = cls(
368
+ system="anthropic",
369
+ provider_name="anthropic", # New OTEL attribute
370
+ operation_name="chat",
371
+ request_model=request_params.get("model"),
372
+ response_model=getattr(response, "model", None),
373
+ response_id=getattr(response, "id", None),
374
+ server_address=server_address,
375
+ server_port=server_port,
376
+ )
377
+
378
+ # Extract token usage
379
+ if hasattr(response, "usage") and response.usage:
380
+ usage = response.usage
381
+ attrs.input_tokens = getattr(usage, "input_tokens", None)
382
+ attrs.output_tokens = getattr(usage, "output_tokens", None)
383
+
384
+ # Calculate total
385
+ if attrs.input_tokens and attrs.output_tokens:
386
+ attrs.total_tokens = attrs.input_tokens + attrs.output_tokens
387
+
388
+ # Extract cache tokens (Anthropic prompt caching)
389
+ if hasattr(usage, "cache_read_input_tokens"):
390
+ attrs.cache_read_tokens = usage.cache_read_input_tokens
391
+ if hasattr(usage, "cache_creation_input_tokens"):
392
+ attrs.cache_creation_tokens = usage.cache_creation_input_tokens
393
+
394
+ # Extract all hyperparameters
395
+ attrs.temperature = request_params.get("temperature")
396
+ attrs.top_p = request_params.get("top_p")
397
+ attrs.top_k = request_params.get("top_k") # Anthropic-specific
398
+ attrs.max_tokens = request_params.get("max_tokens")
399
+
400
+ # Stop sequences
401
+ stop = request_params.get("stop_sequences")
402
+ if stop:
403
+ attrs.stop_sequences = stop if isinstance(stop, list) else [stop]
404
+
405
+ # Extract tool definitions if provided
406
+ tools = request_params.get("tools")
407
+ if tools:
408
+ attrs.tool_definitions = tools
409
+
410
+ # Extract prompts
411
+ if "messages" in request_params:
412
+ attrs.prompts = [
413
+ {"role": msg.get("role"), "content": msg.get("content")}
414
+ for msg in request_params["messages"]
415
+ ]
416
+
417
+ # Add system prompt if present (Anthropic uses separate system param)
418
+ if "system" in request_params:
419
+ attrs.system_instructions = request_params["system"]
420
+ attrs.prompts.insert(0, {
421
+ "role": "system",
422
+ "content": request_params["system"]
423
+ })
424
+
425
+ # Extract completions
426
+ if hasattr(response, "content") and response.content:
427
+ attrs.completions = []
428
+ for content_block in response.content:
429
+ if hasattr(content_block, "text"):
430
+ attrs.completions.append({
431
+ "role": "assistant",
432
+ "content": content_block.text,
433
+ })
434
+ elif hasattr(content_block, "type") and content_block.type == "tool_use":
435
+ # Tool use content block
436
+ attrs.completions.append({
437
+ "role": "assistant",
438
+ "content": f"[tool_use: {getattr(content_block, 'name', 'unknown')}]",
439
+ })
440
+
441
+ if hasattr(response, "stop_reason"):
442
+ attrs.finish_reasons = [response.stop_reason]
443
+
444
+ return attrs
445
+
446
+
447
+ def calculate_cost(attrs: GenAIAttributes) -> float:
448
+ """Calculate cost based on token usage and model pricing.
449
+
450
+ Args:
451
+ attrs: GenAI attributes with token counts
452
+
453
+ Returns:
454
+ Estimated cost in USD
455
+ """
456
+ if not attrs.system or not attrs.request_model:
457
+ return 0.0
458
+
459
+ input_tokens = attrs.input_tokens or 0
460
+ output_tokens = attrs.output_tokens or 0
461
+ reasoning_tokens = attrs.reasoning_tokens or 0
462
+ cache_tokens = attrs.cache_read_tokens or 0
463
+
464
+ # Model pricing (per 1M tokens)
465
+ pricing = _get_model_pricing(attrs.system, attrs.request_model)
466
+
467
+ # Calculate regular input cost (excluding cached tokens)
468
+ regular_input = max(0, input_tokens - cache_tokens)
469
+ input_cost = (regular_input / 1_000_000) * pricing["input"]
470
+
471
+ # Cache cost (90% discount for Anthropic)
472
+ cache_cost = (cache_tokens / 1_000_000) * pricing.get("cache", pricing["input"] * 0.1)
473
+
474
+ # Output cost
475
+ output_cost = (output_tokens / 1_000_000) * pricing["output"]
476
+
477
+ # Reasoning cost (for o1 models)
478
+ reasoning_cost = (reasoning_tokens / 1_000_000) * pricing.get("reasoning", 0.0)
479
+
480
+ return input_cost + cache_cost + output_cost + reasoning_cost
481
+
482
+
483
+ def _get_model_pricing(system: str, model: str) -> Dict[str, float]:
484
+ """Get pricing for a model (per 1M tokens)."""
485
+ # OpenAI models
486
+ if system == "openai":
487
+ if "gpt-4o" in model and "mini" not in model:
488
+ return {"input": 2.50, "output": 10.0}
489
+ elif "gpt-4o-mini" in model:
490
+ return {"input": 0.15, "output": 0.60}
491
+ elif "gpt-4-turbo" in model:
492
+ return {"input": 10.0, "output": 30.0}
493
+ elif "o1-preview" in model:
494
+ return {"input": 15.0, "output": 60.0, "reasoning": 15.0}
495
+ elif "o1-mini" in model:
496
+ return {"input": 3.0, "output": 12.0, "reasoning": 3.0}
497
+
498
+ # Anthropic models
499
+ elif system == "anthropic":
500
+ if "claude-3-5-sonnet" in model:
501
+ return {"input": 3.0, "output": 15.0, "cache": 0.30}
502
+ elif "claude-3-opus" in model:
503
+ return {"input": 15.0, "output": 75.0, "cache": 1.50}
504
+ elif "claude-3-sonnet" in model:
505
+ return {"input": 3.0, "output": 15.0, "cache": 0.30}
506
+ elif "claude-3-haiku" in model:
507
+ return {"input": 0.25, "output": 1.25, "cache": 0.03}
508
+
509
+ # Default pricing
510
+ return {"input": 10.0, "output": 30.0}