prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Auto-instrumentation for detecting and instrumenting LLM SDKs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from prela.core.tracer import Tracer
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Registry of available instrumentors
|
|
15
|
+
# Format: "library_name": ("module.path", "InstrumentorClassName")
|
|
16
|
+
INSTRUMENTORS = {
|
|
17
|
+
# LLM providers
|
|
18
|
+
"anthropic": ("prela.instrumentation.anthropic", "AnthropicInstrumentor"),
|
|
19
|
+
"openai": ("prela.instrumentation.openai", "OpenAIInstrumentor"),
|
|
20
|
+
# Agent frameworks
|
|
21
|
+
"langchain": ("prela.instrumentation.langchain", "LangChainInstrumentor"),
|
|
22
|
+
"llamaindex": ("prela.instrumentation.llamaindex", "LlamaIndexInstrumentor"),
|
|
23
|
+
# Multi-agent frameworks
|
|
24
|
+
"crewai": ("prela.instrumentation.multi_agent.crewai", "CrewAIInstrumentor"),
|
|
25
|
+
"autogen": ("prela.instrumentation.multi_agent.autogen", "AutoGenInstrumentor"),
|
|
26
|
+
"langgraph": ("prela.instrumentation.multi_agent.langgraph", "LangGraphInstrumentor"),
|
|
27
|
+
"swarm": ("prela.instrumentation.multi_agent.swarm", "SwarmInstrumentor"),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Package detection mapping
|
|
31
|
+
# Maps library name to the import name used to check if it's installed
|
|
32
|
+
PACKAGE_DETECTION = {
|
|
33
|
+
"anthropic": "anthropic",
|
|
34
|
+
"openai": "openai",
|
|
35
|
+
"langchain": "langchain_core", # LangChain uses langchain-core as the base package
|
|
36
|
+
"llamaindex": "llama_index.core", # LlamaIndex uses llama-index-core package
|
|
37
|
+
# Multi-agent frameworks
|
|
38
|
+
"crewai": "crewai",
|
|
39
|
+
"autogen": "autogen",
|
|
40
|
+
"langgraph": "langgraph",
|
|
41
|
+
"swarm": "swarm",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_package_installed(package_name: str) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Check if a package is installed.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
package_name: Name of the package to check (e.g., "anthropic", "openai")
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
bool: True if the package can be imported, False otherwise
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
importlib.import_module(package_name)
|
|
57
|
+
return True
|
|
58
|
+
except ImportError:
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def auto_instrument(tracer: Tracer) -> list[str]:
|
|
63
|
+
"""
|
|
64
|
+
Automatically instrument all detected libraries.
|
|
65
|
+
|
|
66
|
+
This function:
|
|
67
|
+
1. Checks which supported LLM SDKs are installed
|
|
68
|
+
2. Imports and initializes their instrumentors
|
|
69
|
+
3. Calls instrument(tracer) on each
|
|
70
|
+
4. Returns list of successfully instrumented libraries
|
|
71
|
+
|
|
72
|
+
The function is designed to be safe:
|
|
73
|
+
- Missing libraries are skipped (not an error)
|
|
74
|
+
- Instrumentation failures are logged but don't crash
|
|
75
|
+
- Returns empty list if nothing was instrumented
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
tracer: The tracer instance to use for instrumentation
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
List of library names that were successfully instrumented
|
|
82
|
+
(e.g., ["anthropic", "openai"])
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
86
|
+
from prela.core.tracer import Tracer
|
|
87
|
+
from prela.instrumentation.auto import auto_instrument
|
|
88
|
+
|
|
89
|
+
tracer = Tracer(service_name="my-app")
|
|
90
|
+
instrumented = auto_instrument(tracer)
|
|
91
|
+
print(f"Auto-instrumented: {instrumented}")
|
|
92
|
+
# Output: Auto-instrumented: ['anthropic', 'openai']
|
|
93
|
+
|
|
94
|
+
# Now all calls to these SDKs are automatically traced
|
|
95
|
+
from anthropic import Anthropic
|
|
96
|
+
client = Anthropic()
|
|
97
|
+
response = client.messages.create(...) # Automatically traced!
|
|
98
|
+
```
|
|
99
|
+
"""
|
|
100
|
+
instrumented = []
|
|
101
|
+
|
|
102
|
+
for lib_name, (module_path, class_name) in INSTRUMENTORS.items():
|
|
103
|
+
# Check if the library is installed
|
|
104
|
+
package_name = PACKAGE_DETECTION.get(lib_name, lib_name)
|
|
105
|
+
if not is_package_installed(package_name):
|
|
106
|
+
logger.debug(
|
|
107
|
+
f"Package '{package_name}' not installed, skipping instrumentation"
|
|
108
|
+
)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
# Import instrumentor class
|
|
113
|
+
module = importlib.import_module(module_path)
|
|
114
|
+
instrumentor_class = getattr(module, class_name)
|
|
115
|
+
|
|
116
|
+
# Create and instrument
|
|
117
|
+
instrumentor = instrumentor_class()
|
|
118
|
+
instrumentor.instrument(tracer)
|
|
119
|
+
|
|
120
|
+
instrumented.append(lib_name)
|
|
121
|
+
logger.debug(f"Successfully instrumented '{lib_name}'")
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
# Log warning but don't fail - one broken instrumentor
|
|
125
|
+
# shouldn't prevent others from working
|
|
126
|
+
logger.warning(f"Failed to instrument '{lib_name}': {e}")
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
return instrumented
|
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""Base classes and utilities for instrumenting external libraries.
|
|
2
|
+
|
|
3
|
+
This module provides the foundation for auto-instrumentation of LLM SDKs
|
|
4
|
+
and agent frameworks. It includes:
|
|
5
|
+
|
|
6
|
+
1. Instrumentor abstract base class
|
|
7
|
+
2. Monkey-patching utilities for function wrapping
|
|
8
|
+
3. Attribute extraction helpers for LLM requests/responses
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
16
|
+
from types import ModuleType
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
# Avoid circular imports - Tracer will be implemented separately
|
|
20
|
+
from prela.core.tracer import Tracer
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Attribute name for storing original functions on modules
|
|
25
|
+
_ORIGINALS_ATTR = "__prela_originals__"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Instrumentor(ABC):
|
|
29
|
+
"""Abstract base class for library instrumentors.
|
|
30
|
+
|
|
31
|
+
Instrumentors provide automatic tracing for external libraries by
|
|
32
|
+
monkey-patching their functions to create spans around operations.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
```python
|
|
36
|
+
class OpenAIInstrumentor(Instrumentor):
|
|
37
|
+
def instrument(self, tracer: Tracer) -> None:
|
|
38
|
+
# Wrap OpenAI API calls
|
|
39
|
+
wrap_function(openai, "create", wrapper)
|
|
40
|
+
|
|
41
|
+
def uninstrument(self) -> None:
|
|
42
|
+
# Restore original functions
|
|
43
|
+
unwrap_function(openai, "create")
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def is_instrumented(self) -> bool:
|
|
47
|
+
return hasattr(openai, _ORIGINALS_ATTR)
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def instrument(self, tracer: Tracer) -> None:
|
|
53
|
+
"""Enable instrumentation for this library.
|
|
54
|
+
|
|
55
|
+
This method should wrap the library's functions to create spans
|
|
56
|
+
automatically. It should be idempotent - calling it multiple times
|
|
57
|
+
should not create multiple layers of wrapping.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
tracer: The tracer to use for creating spans
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
RuntimeError: If instrumentation fails
|
|
64
|
+
"""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def uninstrument(self) -> None:
|
|
69
|
+
"""Disable instrumentation and restore original functions.
|
|
70
|
+
|
|
71
|
+
This method should unwrap all previously wrapped functions and
|
|
72
|
+
restore the library to its original state. It should be idempotent -
|
|
73
|
+
calling it when not instrumented should be a no-op.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
RuntimeError: If uninstrumentation fails
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def is_instrumented(self) -> bool:
|
|
83
|
+
"""Check if this library is currently instrumented.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
True if instrumentation is active, False otherwise
|
|
87
|
+
"""
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def wrap_function(
|
|
92
|
+
module: ModuleType,
|
|
93
|
+
func_name: str,
|
|
94
|
+
wrapper: Callable[[Callable[..., Any]], Callable[..., Any]],
|
|
95
|
+
) -> None:
|
|
96
|
+
"""Wrap a function on a module with instrumentation.
|
|
97
|
+
|
|
98
|
+
This function replaces `module.func_name` with a wrapped version created
|
|
99
|
+
by calling `wrapper(original_func)`. The original function is stored in
|
|
100
|
+
`module.__prela_originals__` for later restoration.
|
|
101
|
+
|
|
102
|
+
If the function is already wrapped (i.e., it exists in __prela_originals__),
|
|
103
|
+
this function does nothing to prevent double-wrapping.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
module: The module containing the function to wrap
|
|
107
|
+
func_name: Name of the function/attribute to wrap
|
|
108
|
+
wrapper: A function that takes the original function and returns
|
|
109
|
+
a wrapped version. Should preserve the function signature.
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
AttributeError: If the function doesn't exist on the module
|
|
113
|
+
RuntimeError: If wrapping fails
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
```python
|
|
117
|
+
def trace_wrapper(original_func):
|
|
118
|
+
def wrapper(*args, **kwargs):
|
|
119
|
+
with tracer.span("api_call"):
|
|
120
|
+
return original_func(*args, **kwargs)
|
|
121
|
+
return wrapper
|
|
122
|
+
|
|
123
|
+
wrap_function(openai, "create", trace_wrapper)
|
|
124
|
+
```
|
|
125
|
+
"""
|
|
126
|
+
# Check if the attribute exists
|
|
127
|
+
if not hasattr(module, func_name):
|
|
128
|
+
raise AttributeError(
|
|
129
|
+
f"Module {module.__name__} has no attribute '{func_name}'"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Get or create the originals dict
|
|
133
|
+
if not hasattr(module, _ORIGINALS_ATTR):
|
|
134
|
+
setattr(module, _ORIGINALS_ATTR, {})
|
|
135
|
+
|
|
136
|
+
originals = getattr(module, _ORIGINALS_ATTR)
|
|
137
|
+
|
|
138
|
+
# Check if already wrapped
|
|
139
|
+
if func_name in originals:
|
|
140
|
+
logger.debug(
|
|
141
|
+
f"{module.__name__}.{func_name} is already wrapped, skipping"
|
|
142
|
+
)
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Store the original function
|
|
146
|
+
original_func = getattr(module, func_name)
|
|
147
|
+
originals[func_name] = original_func
|
|
148
|
+
|
|
149
|
+
# Create and set the wrapped version
|
|
150
|
+
try:
|
|
151
|
+
wrapped_func = wrapper(original_func)
|
|
152
|
+
setattr(module, func_name, wrapped_func)
|
|
153
|
+
logger.debug(f"Successfully wrapped {module.__name__}.{func_name}")
|
|
154
|
+
except Exception as e:
|
|
155
|
+
# Restore original on failure
|
|
156
|
+
del originals[func_name]
|
|
157
|
+
if not originals:
|
|
158
|
+
delattr(module, _ORIGINALS_ATTR)
|
|
159
|
+
raise RuntimeError(
|
|
160
|
+
f"Failed to wrap {module.__name__}.{func_name}: {e}"
|
|
161
|
+
) from e
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def unwrap_function(module: ModuleType, func_name: str) -> None:
|
|
165
|
+
"""Restore a wrapped function to its original implementation.
|
|
166
|
+
|
|
167
|
+
This function looks up the original implementation in
|
|
168
|
+
`module.__prela_originals__` and restores it to `module.func_name`.
|
|
169
|
+
|
|
170
|
+
If the function is not currently wrapped, this function does nothing.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
module: The module containing the wrapped function
|
|
174
|
+
func_name: Name of the function/attribute to unwrap
|
|
175
|
+
|
|
176
|
+
Example:
|
|
177
|
+
```python
|
|
178
|
+
unwrap_function(openai, "create")
|
|
179
|
+
```
|
|
180
|
+
"""
|
|
181
|
+
# Check if the module has any wrapped functions
|
|
182
|
+
if not hasattr(module, _ORIGINALS_ATTR):
|
|
183
|
+
logger.debug(
|
|
184
|
+
f"Module {module.__name__} has no wrapped functions, skipping"
|
|
185
|
+
)
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
originals = getattr(module, _ORIGINALS_ATTR)
|
|
189
|
+
|
|
190
|
+
# Check if this specific function is wrapped
|
|
191
|
+
if func_name not in originals:
|
|
192
|
+
logger.debug(
|
|
193
|
+
f"{module.__name__}.{func_name} is not wrapped, skipping"
|
|
194
|
+
)
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
# Restore the original function
|
|
198
|
+
original_func = originals.pop(func_name)
|
|
199
|
+
setattr(module, func_name, original_func)
|
|
200
|
+
|
|
201
|
+
# Clean up the originals dict if empty
|
|
202
|
+
if not originals:
|
|
203
|
+
delattr(module, _ORIGINALS_ATTR)
|
|
204
|
+
|
|
205
|
+
logger.debug(f"Successfully unwrapped {module.__name__}.{func_name}")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def extract_llm_request_attributes(
|
|
209
|
+
model: str,
|
|
210
|
+
messages: list[dict[str, Any]] | str | None = None,
|
|
211
|
+
**kwargs: Any,
|
|
212
|
+
) -> dict[str, Any]:
|
|
213
|
+
"""Extract standardized attributes from an LLM request.
|
|
214
|
+
|
|
215
|
+
This function extracts common attributes from LLM API calls in a
|
|
216
|
+
vendor-agnostic format. It handles both chat-style (messages) and
|
|
217
|
+
completion-style (prompt) APIs.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
model: The model identifier (e.g., "gpt-4", "claude-3-opus")
|
|
221
|
+
messages: Chat messages (list of dicts) or text prompt (string)
|
|
222
|
+
**kwargs: Additional request parameters (temperature, max_tokens, etc.)
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Dictionary of span attributes following semantic conventions:
|
|
226
|
+
- llm.model: Model identifier
|
|
227
|
+
- llm.request.type: "chat" or "completion"
|
|
228
|
+
- llm.request.messages: Message count for chat
|
|
229
|
+
- llm.request.prompt_length: Character count for completion
|
|
230
|
+
- llm.request.temperature: Sampling temperature (if provided)
|
|
231
|
+
- llm.request.max_tokens: Maximum tokens (if provided)
|
|
232
|
+
- llm.request.top_p: Nucleus sampling (if provided)
|
|
233
|
+
- llm.request.stream: Whether streaming is enabled
|
|
234
|
+
|
|
235
|
+
Example:
|
|
236
|
+
```python
|
|
237
|
+
attrs = extract_llm_request_attributes(
|
|
238
|
+
model="gpt-4",
|
|
239
|
+
messages=[
|
|
240
|
+
{"role": "user", "content": "Hello"}
|
|
241
|
+
],
|
|
242
|
+
temperature=0.7,
|
|
243
|
+
max_tokens=100
|
|
244
|
+
)
|
|
245
|
+
# Returns: {
|
|
246
|
+
# "llm.model": "gpt-4",
|
|
247
|
+
# "llm.request.type": "chat",
|
|
248
|
+
# "llm.request.messages": 1,
|
|
249
|
+
# "llm.request.temperature": 0.7,
|
|
250
|
+
# "llm.request.max_tokens": 100
|
|
251
|
+
# }
|
|
252
|
+
```
|
|
253
|
+
"""
|
|
254
|
+
attributes: dict[str, Any] = {"llm.model": model}
|
|
255
|
+
|
|
256
|
+
# Determine request type and extract message/prompt info
|
|
257
|
+
if messages is not None:
|
|
258
|
+
if isinstance(messages, list):
|
|
259
|
+
# Chat-style API
|
|
260
|
+
attributes["llm.request.type"] = "chat"
|
|
261
|
+
attributes["llm.request.messages"] = len(messages)
|
|
262
|
+
elif isinstance(messages, str):
|
|
263
|
+
# Completion-style API with text prompt
|
|
264
|
+
attributes["llm.request.type"] = "completion"
|
|
265
|
+
attributes["llm.request.prompt_length"] = len(messages)
|
|
266
|
+
|
|
267
|
+
# Extract common parameters
|
|
268
|
+
# Temperature
|
|
269
|
+
if "temperature" in kwargs:
|
|
270
|
+
attributes["llm.request.temperature"] = kwargs["temperature"]
|
|
271
|
+
|
|
272
|
+
# Max tokens (handle various parameter names)
|
|
273
|
+
for param in ["max_tokens", "max_completion_tokens", "maxTokens"]:
|
|
274
|
+
if param in kwargs:
|
|
275
|
+
attributes["llm.request.max_tokens"] = kwargs[param]
|
|
276
|
+
break
|
|
277
|
+
|
|
278
|
+
# Top-p sampling
|
|
279
|
+
if "top_p" in kwargs:
|
|
280
|
+
attributes["llm.request.top_p"] = kwargs["top_p"]
|
|
281
|
+
|
|
282
|
+
# Streaming
|
|
283
|
+
if "stream" in kwargs:
|
|
284
|
+
attributes["llm.request.stream"] = kwargs["stream"]
|
|
285
|
+
|
|
286
|
+
# Stop sequences
|
|
287
|
+
if "stop" in kwargs:
|
|
288
|
+
stop = kwargs["stop"]
|
|
289
|
+
if isinstance(stop, list):
|
|
290
|
+
attributes["llm.request.stop_sequences"] = len(stop)
|
|
291
|
+
elif stop is not None:
|
|
292
|
+
attributes["llm.request.stop_sequences"] = 1
|
|
293
|
+
|
|
294
|
+
# Frequency penalty
|
|
295
|
+
if "frequency_penalty" in kwargs:
|
|
296
|
+
attributes["llm.request.frequency_penalty"] = kwargs["frequency_penalty"]
|
|
297
|
+
|
|
298
|
+
# Presence penalty
|
|
299
|
+
if "presence_penalty" in kwargs:
|
|
300
|
+
attributes["llm.request.presence_penalty"] = kwargs["presence_penalty"]
|
|
301
|
+
|
|
302
|
+
return attributes
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def extract_llm_response_attributes(
|
|
306
|
+
response: Any,
|
|
307
|
+
vendor: str,
|
|
308
|
+
) -> dict[str, Any]:
|
|
309
|
+
"""Extract standardized attributes from an LLM response.
|
|
310
|
+
|
|
311
|
+
This function extracts common attributes from LLM API responses in a
|
|
312
|
+
vendor-agnostic format. It handles different response structures from
|
|
313
|
+
OpenAI, Anthropic, and other providers.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
response: The response object from the LLM API
|
|
317
|
+
vendor: The vendor identifier ("openai", "anthropic", etc.)
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Dictionary of span attributes:
|
|
321
|
+
- llm.response.model: Actual model used (may differ from request)
|
|
322
|
+
- llm.response.id: Response/completion ID
|
|
323
|
+
- llm.response.finish_reason: Why generation stopped
|
|
324
|
+
- llm.usage.prompt_tokens: Input token count
|
|
325
|
+
- llm.usage.completion_tokens: Output token count
|
|
326
|
+
- llm.usage.total_tokens: Total token count
|
|
327
|
+
|
|
328
|
+
Example:
|
|
329
|
+
```python
|
|
330
|
+
# OpenAI response
|
|
331
|
+
attrs = extract_llm_response_attributes(
|
|
332
|
+
response=openai_response,
|
|
333
|
+
vendor="openai"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Anthropic response
|
|
337
|
+
attrs = extract_llm_response_attributes(
|
|
338
|
+
response=anthropic_response,
|
|
339
|
+
vendor="anthropic"
|
|
340
|
+
)
|
|
341
|
+
```
|
|
342
|
+
"""
|
|
343
|
+
attributes: dict[str, Any] = {}
|
|
344
|
+
|
|
345
|
+
if vendor == "openai":
|
|
346
|
+
# OpenAI response structure
|
|
347
|
+
# Handle both dict and object responses
|
|
348
|
+
if isinstance(response, dict):
|
|
349
|
+
# Response ID
|
|
350
|
+
if "id" in response:
|
|
351
|
+
attributes["llm.response.id"] = response["id"]
|
|
352
|
+
|
|
353
|
+
# Model
|
|
354
|
+
if "model" in response:
|
|
355
|
+
attributes["llm.response.model"] = response["model"]
|
|
356
|
+
|
|
357
|
+
# Usage stats
|
|
358
|
+
if "usage" in response:
|
|
359
|
+
usage = response["usage"]
|
|
360
|
+
if "prompt_tokens" in usage:
|
|
361
|
+
attributes["llm.usage.prompt_tokens"] = usage["prompt_tokens"]
|
|
362
|
+
if "completion_tokens" in usage:
|
|
363
|
+
attributes["llm.usage.completion_tokens"] = usage["completion_tokens"]
|
|
364
|
+
if "total_tokens" in usage:
|
|
365
|
+
attributes["llm.usage.total_tokens"] = usage["total_tokens"]
|
|
366
|
+
|
|
367
|
+
# Finish reason (from first choice)
|
|
368
|
+
if "choices" in response and response["choices"]:
|
|
369
|
+
first_choice = response["choices"][0]
|
|
370
|
+
if "finish_reason" in first_choice:
|
|
371
|
+
attributes["llm.response.finish_reason"] = first_choice["finish_reason"]
|
|
372
|
+
else:
|
|
373
|
+
# Object response (openai SDK objects)
|
|
374
|
+
if hasattr(response, "id"):
|
|
375
|
+
attributes["llm.response.id"] = response.id
|
|
376
|
+
if hasattr(response, "model"):
|
|
377
|
+
attributes["llm.response.model"] = response.model
|
|
378
|
+
if hasattr(response, "usage"):
|
|
379
|
+
if hasattr(response.usage, "prompt_tokens"):
|
|
380
|
+
attributes["llm.usage.prompt_tokens"] = response.usage.prompt_tokens
|
|
381
|
+
if hasattr(response.usage, "completion_tokens"):
|
|
382
|
+
attributes["llm.usage.completion_tokens"] = response.usage.completion_tokens
|
|
383
|
+
if hasattr(response.usage, "total_tokens"):
|
|
384
|
+
attributes["llm.usage.total_tokens"] = response.usage.total_tokens
|
|
385
|
+
if hasattr(response, "choices") and response.choices:
|
|
386
|
+
first_choice = response.choices[0]
|
|
387
|
+
if hasattr(first_choice, "finish_reason"):
|
|
388
|
+
attributes["llm.response.finish_reason"] = first_choice.finish_reason
|
|
389
|
+
|
|
390
|
+
elif vendor == "anthropic":
|
|
391
|
+
# Anthropic response structure
|
|
392
|
+
if isinstance(response, dict):
|
|
393
|
+
# Response ID
|
|
394
|
+
if "id" in response:
|
|
395
|
+
attributes["llm.response.id"] = response["id"]
|
|
396
|
+
|
|
397
|
+
# Model
|
|
398
|
+
if "model" in response:
|
|
399
|
+
attributes["llm.response.model"] = response["model"]
|
|
400
|
+
|
|
401
|
+
# Usage stats
|
|
402
|
+
if "usage" in response:
|
|
403
|
+
usage = response["usage"]
|
|
404
|
+
if "input_tokens" in usage:
|
|
405
|
+
attributes["llm.usage.prompt_tokens"] = usage["input_tokens"]
|
|
406
|
+
if "output_tokens" in usage:
|
|
407
|
+
attributes["llm.usage.completion_tokens"] = usage["output_tokens"]
|
|
408
|
+
# Calculate total
|
|
409
|
+
if "input_tokens" in usage and "output_tokens" in usage:
|
|
410
|
+
attributes["llm.usage.total_tokens"] = (
|
|
411
|
+
usage["input_tokens"] + usage["output_tokens"]
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Stop reason
|
|
415
|
+
if "stop_reason" in response:
|
|
416
|
+
attributes["llm.response.finish_reason"] = response["stop_reason"]
|
|
417
|
+
else:
|
|
418
|
+
# Object response
|
|
419
|
+
if hasattr(response, "id"):
|
|
420
|
+
attributes["llm.response.id"] = response.id
|
|
421
|
+
if hasattr(response, "model"):
|
|
422
|
+
attributes["llm.response.model"] = response.model
|
|
423
|
+
if hasattr(response, "usage"):
|
|
424
|
+
if hasattr(response.usage, "input_tokens"):
|
|
425
|
+
attributes["llm.usage.prompt_tokens"] = response.usage.input_tokens
|
|
426
|
+
if hasattr(response.usage, "output_tokens"):
|
|
427
|
+
attributes["llm.usage.completion_tokens"] = response.usage.output_tokens
|
|
428
|
+
# Calculate total
|
|
429
|
+
if hasattr(response.usage, "input_tokens"):
|
|
430
|
+
attributes["llm.usage.total_tokens"] = (
|
|
431
|
+
response.usage.input_tokens + response.usage.output_tokens
|
|
432
|
+
)
|
|
433
|
+
if hasattr(response, "stop_reason"):
|
|
434
|
+
attributes["llm.response.finish_reason"] = response.stop_reason
|
|
435
|
+
|
|
436
|
+
return attributes
|