openlit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +143 -0
- openlit/__init__.py +184 -0
- openlit/instrumentation/anthropic/__init__.py +50 -0
- openlit/instrumentation/anthropic/anthropic.py +291 -0
- openlit/instrumentation/anthropic/async_anthropic.py +291 -0
- openlit/instrumentation/chroma/__init__.py +86 -0
- openlit/instrumentation/chroma/chroma.py +197 -0
- openlit/instrumentation/cohere/__init__.py +51 -0
- openlit/instrumentation/cohere/cohere.py +397 -0
- openlit/instrumentation/langchain/__init__.py +74 -0
- openlit/instrumentation/langchain/langchain.py +161 -0
- openlit/instrumentation/mistral/__init__.py +80 -0
- openlit/instrumentation/mistral/async_mistral.py +417 -0
- openlit/instrumentation/mistral/mistral.py +416 -0
- openlit/instrumentation/openai/__init__.py +335 -0
- openlit/instrumentation/openai/async_azure_openai.py +841 -0
- openlit/instrumentation/openai/async_openai.py +875 -0
- openlit/instrumentation/openai/azure_openai.py +840 -0
- openlit/instrumentation/openai/openai.py +891 -0
- openlit/instrumentation/pinecone/__init__.py +66 -0
- openlit/instrumentation/pinecone/pinecone.py +173 -0
- openlit/instrumentation/transformers/__init__.py +37 -0
- openlit/instrumentation/transformers/transformers.py +156 -0
- openlit/otel/metrics.py +109 -0
- openlit/otel/tracing.py +83 -0
- openlit/semcov/__init__.py +123 -0
- openlit-0.0.1.dist-info/LICENSE +201 -0
- openlit-0.0.1.dist-info/METADATA +113 -0
- openlit-0.0.1.dist-info/RECORD +30 -0
- openlit-0.0.1.dist-info/WHEEL +4 -0
openlit/__helpers.py
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
# pylint: disable=bare-except, broad-exception-caught
|
2
|
+
"""
|
3
|
+
This module has functions to calculate model costs based on tokens and to fetch pricing information.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
import requests
|
8
|
+
import tiktoken
|
9
|
+
from opentelemetry.trace import Status, StatusCode
|
10
|
+
|
11
|
+
# Set up logging
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
def openai_tokens(text, model):
|
15
|
+
"""
|
16
|
+
Calculate the number of tokens a given text would take up for a specified model.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
text (str): The input text to be encoded.
|
20
|
+
model (str): The model identifier used for encoding.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
int: The number of tokens the text is encoded into.
|
24
|
+
"""
|
25
|
+
try:
|
26
|
+
encoding = tiktoken.encoding_for_model(model)
|
27
|
+
except:
|
28
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
29
|
+
|
30
|
+
num_tokens = len(encoding.encode(text))
|
31
|
+
return num_tokens
|
32
|
+
|
33
|
+
def general_tokens(text):
|
34
|
+
"""
|
35
|
+
Calculate the number of tokens a given text would take up.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
text (str): The input text to be encoded.
|
39
|
+
model (str): The model identifier used for encoding.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
int: The number of tokens the text is encoded into.
|
43
|
+
"""
|
44
|
+
|
45
|
+
encoding = tiktoken.get_encoding("gpt2")
|
46
|
+
|
47
|
+
num_tokens = len(encoding.encode(text))
|
48
|
+
return num_tokens
|
49
|
+
|
50
|
+
def get_chat_model_cost(model, pricing_info, prompt_tokens, completion_tokens):
|
51
|
+
"""
|
52
|
+
Retrieve the cost of processing for a given model based on prompt and tokens.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
model (str): The model identifier.
|
56
|
+
pricing_info (dict): A dictionary containing pricing information for various models.
|
57
|
+
prompt_tokens (int): Number of tokens in the prompt.
|
58
|
+
completion_tokens (int): Number of tokens in the completion if applicable.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
float: The calculated cost for the operation.
|
62
|
+
"""
|
63
|
+
try:
|
64
|
+
cost = ((prompt_tokens / 1000) * pricing_info["chat"][model]["promptPrice"]) + \
|
65
|
+
((completion_tokens / 1000) * pricing_info["chat"][model]["completionPrice"])
|
66
|
+
except:
|
67
|
+
cost = 0
|
68
|
+
return cost
|
69
|
+
|
70
|
+
def get_embed_model_cost(model, pricing_info, prompt_tokens):
|
71
|
+
"""
|
72
|
+
Retrieve the cost of processing for a given model based on prompt tokens.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
model (str): The model identifier.
|
76
|
+
pricing_info (dict): A dictionary containing pricing information for various models.
|
77
|
+
prompt_tokens (int): Number of tokens in the prompt.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
float: The calculated cost for the operation.
|
81
|
+
"""
|
82
|
+
try:
|
83
|
+
cost = (prompt_tokens / 1000) * pricing_info["embeddings"][model]
|
84
|
+
except:
|
85
|
+
cost = 0
|
86
|
+
return cost
|
87
|
+
|
88
|
+
def get_image_model_cost(model, pricing_info, size, quality):
|
89
|
+
"""
|
90
|
+
Retrieve the cost of processing for a given model based on image size and quailty.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
model (str): The model identifier.
|
94
|
+
pricing_info (dict): A dictionary containing pricing information for various models.
|
95
|
+
size (str): Size of the Image.
|
96
|
+
quality (int): Quality of the Image.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
float: The calculated cost for the operation.
|
100
|
+
"""
|
101
|
+
try:
|
102
|
+
cost = pricing_info["images"][model][quality][size]
|
103
|
+
except:
|
104
|
+
cost = 0
|
105
|
+
return cost
|
106
|
+
|
107
|
+
def get_audio_model_cost(model, pricing_info, prompt):
|
108
|
+
"""
|
109
|
+
Retrieve the cost of processing for a given model based on prompt.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
model (str): The model identifier.
|
113
|
+
pricing_info (dict): A dictionary containing pricing information for various models.
|
114
|
+
prompt (str): Prompt to the LLM Model
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
float: The calculated cost for the operation.
|
118
|
+
"""
|
119
|
+
try:
|
120
|
+
cost = (len(prompt) / 1000) * pricing_info["audio"][model]
|
121
|
+
except:
|
122
|
+
cost = 0
|
123
|
+
return cost
|
124
|
+
|
125
|
+
def fetch_pricing_info():
|
126
|
+
"""Fetches pricing information from a specified URL."""
|
127
|
+
pricing_url = "https://raw.githubusercontent.com/dokulabs/doku/main/assets/pricing.json"
|
128
|
+
try:
|
129
|
+
# Set a timeout of 10 seconds for both the connection and the read
|
130
|
+
response = requests.get(pricing_url, timeout=20)
|
131
|
+
response.raise_for_status()
|
132
|
+
return response.json()
|
133
|
+
except requests.HTTPError as http_err:
|
134
|
+
logger.error("HTTP error occured while fetching pricing info: %s", http_err)
|
135
|
+
except Exception as err:
|
136
|
+
logger.error("Unexpected error occurred while fetching pricing info: %s", err)
|
137
|
+
return {}
|
138
|
+
|
139
|
+
def handle_exception(span,e):
|
140
|
+
"""Handles Exception when LLM Function fails or trace creation fails."""
|
141
|
+
# Record the exception details within the span
|
142
|
+
span.record_exception(e)
|
143
|
+
span.set_status(Status(StatusCode.ERROR))
|
openlit/__init__.py
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
"""
|
2
|
+
The __init__.py module for the openLIT package.
|
3
|
+
This module sets up the openLIT configuration and instrumentation for various
|
4
|
+
large language models (LLMs).
|
5
|
+
"""
|
6
|
+
from typing import Optional, Dict, Any
|
7
|
+
import logging
|
8
|
+
|
9
|
+
# Import internal modules for setting up tracing and fetching pricing info.
|
10
|
+
from openlit.otel.tracing import setup_tracing
|
11
|
+
from openlit.otel.metrics import setup_meter
|
12
|
+
from openlit.__helpers import fetch_pricing_info
|
13
|
+
|
14
|
+
# Instrumentors for various large language models.
|
15
|
+
from openlit.instrumentation.openai import OpenAIInstrumentor
|
16
|
+
from openlit.instrumentation.anthropic import AnthropicInstrumentor
|
17
|
+
from openlit.instrumentation.cohere import CohereInstrumentor
|
18
|
+
from openlit.instrumentation.mistral import MistralInstrumentor
|
19
|
+
from openlit.instrumentation.langchain import LangChainInstrumentor
|
20
|
+
from openlit.instrumentation.chroma import ChromaInstrumentor
|
21
|
+
from openlit.instrumentation.pinecone import PineconeInstrumentor
|
22
|
+
from openlit.instrumentation.transformers import TransformersInstrumentor
|
23
|
+
|
24
|
+
# Set up logging for error and information messages.
|
25
|
+
logger = logging.getLogger(__name__)
|
26
|
+
|
27
|
+
class OpenlitConfig:
|
28
|
+
"""
|
29
|
+
A Singleton Configuration class for openLIT.
|
30
|
+
|
31
|
+
This class maintains a single instance of configuration settings including
|
32
|
+
environment details, application name, and tracing information throughout the openLIT package.
|
33
|
+
|
34
|
+
Attributes:
|
35
|
+
environment (str): Deployment environment of the application.
|
36
|
+
application_name (str): Name of the application using openLIT.
|
37
|
+
pricing_info (Dict[str, Any]): Pricing information.
|
38
|
+
tracer (Optional[Any]): Tracer instance for OpenTelemetry.
|
39
|
+
otlp_endpoint (Optional[str]): Endpoint for OTLP.
|
40
|
+
otlp_headers (Optional[Dict[str, str]]): Headers for OTLP.
|
41
|
+
disable_batch (bool): Flag to disable batch span processing in tracing.
|
42
|
+
trace_content (bool): Flag to enable or disable tracing of content.
|
43
|
+
"""
|
44
|
+
_instance = None
|
45
|
+
|
46
|
+
def __new__(cls):
|
47
|
+
"""Ensures that only one instance of the configuration exists."""
|
48
|
+
if cls._instance is None:
|
49
|
+
cls._instance = super(OpenlitConfig, cls).__new__(cls)
|
50
|
+
cls.reset_to_defaults()
|
51
|
+
return cls._instance
|
52
|
+
|
53
|
+
@classmethod
|
54
|
+
def reset_to_defaults(cls):
|
55
|
+
"""Resets configuration to default values."""
|
56
|
+
cls.environment = "default"
|
57
|
+
cls.application_name = "default"
|
58
|
+
cls.pricing_info = fetch_pricing_info()
|
59
|
+
cls.tracer = None
|
60
|
+
cls.metrics_dict = {}
|
61
|
+
cls.otlp_endpoint = None
|
62
|
+
cls.otlp_headers = None
|
63
|
+
cls.disable_batch = False
|
64
|
+
cls.trace_content = True
|
65
|
+
cls.disable_metrics = False
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def update_config(cls, environment, application_name, tracer, otlp_endpoint,
|
69
|
+
otlp_headers, disable_batch, trace_content, metrics_dict, disable_metrics):
|
70
|
+
"""
|
71
|
+
Updates the configuration based on provided parameters.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
environment (str): Deployment environment.
|
75
|
+
application_name (str): Application name.
|
76
|
+
tracer: Tracer instance.
|
77
|
+
meter: Metric Instance
|
78
|
+
otlp_endpoint (str): OTLP endpoint.
|
79
|
+
otlp_headers (Dict[str, str]): OTLP headers.
|
80
|
+
disable_batch (bool): Disable batch span processing flag.
|
81
|
+
trace_content (bool): Enable or disable content tracing.
|
82
|
+
"""
|
83
|
+
cls.environment = environment
|
84
|
+
cls.application_name = application_name
|
85
|
+
cls.pricing_info = fetch_pricing_info()
|
86
|
+
cls.tracer = tracer
|
87
|
+
cls.metrics_dict = metrics_dict
|
88
|
+
cls.otlp_endpoint = otlp_endpoint
|
89
|
+
cls.otlp_headers = otlp_headers
|
90
|
+
cls.disable_batch = disable_batch
|
91
|
+
cls.trace_content = trace_content
|
92
|
+
cls.disable_metrics = disable_metrics
|
93
|
+
|
94
|
+
def init(environment="default", application_name="default", tracer=None, otlp_endpoint=None,
|
95
|
+
otlp_headers=None, disable_batch=False, trace_content=True, disabled_instrumentors=None,
|
96
|
+
meter=None, disable_metrics=False):
|
97
|
+
"""
|
98
|
+
Initializes the openLIT configuration and setups tracing.
|
99
|
+
|
100
|
+
This function sets up the openLIT environment with provided configurations
|
101
|
+
and initializes instrumentors for tracing.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
environment (str): Deployment environment.
|
105
|
+
application_name (str): Application name.
|
106
|
+
tracer: Tracer instance (Optional).
|
107
|
+
meter: OpenTelemetry Metrics Instance (Optional).
|
108
|
+
otlp_endpoint (str): OTLP endpoint for exporter (Optional).
|
109
|
+
otlp_headers (Dict[str, str]): OTLP headers for exporter (Optional).
|
110
|
+
disable_batch (bool): Flag to disable batch span processing (Optional).
|
111
|
+
trace_content (bool): Flag to trace content (Optional).
|
112
|
+
disabled_instrumentors (List[str]): Optional. List of instrumentor names to disable.
|
113
|
+
disable_metrics (bool): Flag to disable metrics (Optional)
|
114
|
+
"""
|
115
|
+
disabled_instrumentors = disabled_instrumentors if disabled_instrumentors else []
|
116
|
+
|
117
|
+
# Check for invalid instrumentor names
|
118
|
+
valid_instruments = {
|
119
|
+
"openai", "anthropic", "langchain",
|
120
|
+
"cohere", "mistral", "chroma",
|
121
|
+
"pinecone", "transformers"
|
122
|
+
}
|
123
|
+
invalid_instrumentors = set(disabled_instrumentors) - valid_instruments
|
124
|
+
for invalid_name in invalid_instrumentors:
|
125
|
+
logger.warning("Invalid instrumentor name detected and ignored: '%s'", invalid_name)
|
126
|
+
|
127
|
+
try:
|
128
|
+
# Retrieve or create the single configuration instance.
|
129
|
+
config = OpenlitConfig()
|
130
|
+
|
131
|
+
# Setup tracing based on the provided or default configuration.
|
132
|
+
tracer = setup_tracing(
|
133
|
+
application_name=application_name,
|
134
|
+
environment=environment, tracer=tracer,
|
135
|
+
otlp_endpoint=otlp_endpoint, otlp_headers=otlp_headers,
|
136
|
+
disable_batch=disable_batch
|
137
|
+
)
|
138
|
+
|
139
|
+
if not tracer:
|
140
|
+
logger.error("openLIT tracing setup failed. Tracing will not be available.")
|
141
|
+
return
|
142
|
+
|
143
|
+
# Setup meter and receive metrics_dict instead of meter
|
144
|
+
metrics_dict = setup_meter(application_name=application_name,
|
145
|
+
environment=environment, meter=meter,
|
146
|
+
otlp_endpoint=otlp_endpoint, otlp_headers=otlp_headers)
|
147
|
+
|
148
|
+
if not metrics_dict:
|
149
|
+
logger.error("openLIT metrics setup failed. Metrics will not be available.")
|
150
|
+
return
|
151
|
+
|
152
|
+
# Update global configuration with the provided settings.
|
153
|
+
config.update_config(environment, application_name, tracer, otlp_endpoint,
|
154
|
+
otlp_headers, disable_batch, trace_content,
|
155
|
+
metrics_dict, disable_metrics)
|
156
|
+
|
157
|
+
# Map instrumentor names to their instances
|
158
|
+
instrumentor_instances = {
|
159
|
+
"openai": OpenAIInstrumentor(),
|
160
|
+
"anthropic": AnthropicInstrumentor(),
|
161
|
+
"cohere": CohereInstrumentor(),
|
162
|
+
"mistral": MistralInstrumentor(),
|
163
|
+
"langchain": LangChainInstrumentor(),
|
164
|
+
"chroma": ChromaInstrumentor(),
|
165
|
+
"pinecone": PineconeInstrumentor(),
|
166
|
+
"transformers": TransformersInstrumentor()
|
167
|
+
}
|
168
|
+
|
169
|
+
# Initialize and instrument only the enabled instrumentors
|
170
|
+
for name, instrumentor in instrumentor_instances.items():
|
171
|
+
if name not in disabled_instrumentors:
|
172
|
+
instrumentor.instrument(
|
173
|
+
environment=config.environment,
|
174
|
+
application_name=config.application_name,
|
175
|
+
tracer=config.tracer,
|
176
|
+
pricing_info=config.pricing_info,
|
177
|
+
trace_content=config.trace_content,
|
178
|
+
metrics_dict=config.metrics_dict,
|
179
|
+
disable_metrics=config.disable_metrics
|
180
|
+
)
|
181
|
+
|
182
|
+
# pylint: disable=broad-exception-caught
|
183
|
+
except Exception as e:
|
184
|
+
logger.error("Error during openLIT initialization: %s", e)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
|
+
"""Initializer of Auto Instrumentation of Anthropic Functions"""
|
3
|
+
|
4
|
+
from typing import Collection
|
5
|
+
import importlib.metadata
|
6
|
+
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
7
|
+
from wrapt import wrap_function_wrapper
|
8
|
+
|
9
|
+
from openlit.instrumentation.anthropic.anthropic import messages
|
10
|
+
from openlit.instrumentation.anthropic.async_anthropic import async_messages
|
11
|
+
|
12
|
+
_instruments = ("anthropic >= 0.21.0",)
|
13
|
+
|
14
|
+
class AnthropicInstrumentor(BaseInstrumentor):
|
15
|
+
"""
|
16
|
+
An instrumentor for Anthropic's client library.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def instrumentation_dependencies(self) -> Collection[str]:
|
20
|
+
return _instruments
|
21
|
+
|
22
|
+
def _instrument(self, **kwargs):
|
23
|
+
application_name = kwargs.get("application_name", "default_application")
|
24
|
+
environment = kwargs.get("environment", "default_environment")
|
25
|
+
tracer = kwargs.get("tracer")
|
26
|
+
metrics = kwargs.get("metrics_dict")
|
27
|
+
pricing_info = kwargs.get("pricing_info", {})
|
28
|
+
trace_content = kwargs.get("trace_content", False)
|
29
|
+
disable_metrics = kwargs.get("disable_metrics")
|
30
|
+
version = importlib.metadata.version("anthropic")
|
31
|
+
|
32
|
+
#sync
|
33
|
+
wrap_function_wrapper(
|
34
|
+
"anthropic.resources.messages",
|
35
|
+
"Messages.create",
|
36
|
+
messages("anthropic.messages", version, environment, application_name,
|
37
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
38
|
+
)
|
39
|
+
|
40
|
+
#async
|
41
|
+
wrap_function_wrapper(
|
42
|
+
"anthropic.resources.messages",
|
43
|
+
"AsyncMessages.create",
|
44
|
+
async_messages("anthropic.messages", version, environment, application_name,
|
45
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
46
|
+
)
|
47
|
+
|
48
|
+
def _uninstrument(self, **kwargs):
|
49
|
+
# Proper uninstrumentation logic to revert patched methods
|
50
|
+
pass
|
@@ -0,0 +1,291 @@
|
|
1
|
+
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument
|
2
|
+
"""
|
3
|
+
Module for monitoring Anthropic API calls.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
+
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
+
from openlit.__helpers import get_chat_model_cost, handle_exception
|
10
|
+
from openlit.semcov import SemanticConvetion
|
11
|
+
|
12
|
+
# Initialize logger for logging potential issues and operations
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
def messages(gen_ai_endpoint, version, environment, application_name, tracer,
|
16
|
+
pricing_info, trace_content, metrics, disable_metrics):
|
17
|
+
"""
|
18
|
+
Generates a telemetry wrapper for messages to collect metrics.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
22
|
+
version: Version of the monitoring package.
|
23
|
+
environment: Deployment environment (e.g., production, staging).
|
24
|
+
application_name: Name of the application using the OpenAI API.
|
25
|
+
tracer: OpenTelemetry tracer for creating spans.
|
26
|
+
pricing_info: Information used for calculating the cost of OpenAI usage.
|
27
|
+
trace_content: Flag indicating whether to trace the actual content.
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
A function that wraps the chat method to add telemetry.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def wrapper(wrapped, instance, args, kwargs):
|
34
|
+
"""
|
35
|
+
Wraps the 'messages' API call to add telemetry.
|
36
|
+
|
37
|
+
This collects metrics such as execution time, cost, and token usage, and handles errors
|
38
|
+
gracefully, adding details to the trace for observability.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
wrapped: The original 'messages' method to be wrapped.
|
42
|
+
instance: The instance of the class where the original method is defined.
|
43
|
+
args: Positional arguments for the 'messages' method.
|
44
|
+
kwargs: Keyword arguments for the 'messages' method.
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
The response from the original 'messages' method.
|
48
|
+
"""
|
49
|
+
|
50
|
+
# Check if streaming is enabled for the API call
|
51
|
+
streaming = kwargs.get("stream", False)
|
52
|
+
|
53
|
+
# pylint: disable=no-else-return
|
54
|
+
if streaming:
|
55
|
+
# Special handling for streaming response to accommodate the nature of data flow
|
56
|
+
def stream_generator():
|
57
|
+
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
58
|
+
# Placeholder for aggregating streaming response
|
59
|
+
llmresponse = ""
|
60
|
+
|
61
|
+
# Loop through streaming events capturing relevant details
|
62
|
+
for event in wrapped(*args, **kwargs):
|
63
|
+
|
64
|
+
# Collect message IDs and input token from events
|
65
|
+
if event.type == "message_start":
|
66
|
+
response_id = event.message.id
|
67
|
+
prompt_tokens = event.message.usage.input_tokens
|
68
|
+
|
69
|
+
# Aggregate response content
|
70
|
+
if event.type == "content_block_delta":
|
71
|
+
llmresponse += event.delta.text
|
72
|
+
|
73
|
+
# Collect output tokens and stop reason from events
|
74
|
+
if event.type == "message_delta":
|
75
|
+
completion_tokens = event.usage.output_tokens
|
76
|
+
finish_reason = event.delta.stop_reason
|
77
|
+
yield event
|
78
|
+
|
79
|
+
# Handling exception ensure observability without disrupting operation
|
80
|
+
try:
|
81
|
+
# Format 'messages' into a single string
|
82
|
+
message_prompt = kwargs.get("messages", "")
|
83
|
+
formatted_messages = []
|
84
|
+
for message in message_prompt:
|
85
|
+
role = message["role"]
|
86
|
+
content = message["content"]
|
87
|
+
|
88
|
+
if isinstance(content, list):
|
89
|
+
content_str = ", ".join(
|
90
|
+
# pylint: disable=line-too-long
|
91
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
92
|
+
if "type" in item else f'text: {item["text"]}'
|
93
|
+
for item in content
|
94
|
+
)
|
95
|
+
formatted_messages.append(f"{role}: {content_str}")
|
96
|
+
else:
|
97
|
+
formatted_messages.append(f"{role}: {content}")
|
98
|
+
prompt = "\n".join(formatted_messages)
|
99
|
+
|
100
|
+
# Calculate cost of the operation
|
101
|
+
cost = get_chat_model_cost(
|
102
|
+
kwargs.get("model", "claude-3-sonnet-20240229"),
|
103
|
+
pricing_info, prompt_tokens, completion_tokens
|
104
|
+
)
|
105
|
+
|
106
|
+
# Set Span attributes
|
107
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
108
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
109
|
+
SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC)
|
110
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
111
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
112
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
113
|
+
gen_ai_endpoint)
|
114
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
115
|
+
response_id)
|
116
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
117
|
+
environment)
|
118
|
+
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
119
|
+
application_name)
|
120
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
121
|
+
kwargs.get("model", "claude-3-sonnet-20240229"))
|
122
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
123
|
+
kwargs.get("max_tokens", ""))
|
124
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
125
|
+
True)
|
126
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
127
|
+
kwargs.get("temperature", 1.0))
|
128
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
129
|
+
kwargs.get("top_p", ""))
|
130
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
131
|
+
kwargs.get("top_k", ""))
|
132
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
133
|
+
finish_reason)
|
134
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
135
|
+
prompt_tokens)
|
136
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
137
|
+
completion_tokens)
|
138
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
139
|
+
prompt_tokens + completion_tokens)
|
140
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
141
|
+
cost)
|
142
|
+
if trace_content:
|
143
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
144
|
+
prompt)
|
145
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
|
146
|
+
llmresponse)
|
147
|
+
|
148
|
+
span.set_status(Status(StatusCode.OK))
|
149
|
+
|
150
|
+
if disable_metrics is False:
|
151
|
+
attributes = {
|
152
|
+
TELEMETRY_SDK_NAME:
|
153
|
+
"openlit",
|
154
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
155
|
+
application_name,
|
156
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
157
|
+
SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
|
158
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
159
|
+
environment,
|
160
|
+
SemanticConvetion.GEN_AI_TYPE:
|
161
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
162
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
163
|
+
kwargs.get("model", "claude-3-sonnet-20240229")
|
164
|
+
}
|
165
|
+
|
166
|
+
metrics["genai_requests"].add(1, attributes)
|
167
|
+
metrics["genai_total_tokens"].add(
|
168
|
+
prompt_tokens + completion_tokens, attributes
|
169
|
+
)
|
170
|
+
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
171
|
+
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
172
|
+
metrics["genai_cost"].record(cost, attributes)
|
173
|
+
|
174
|
+
except Exception as e:
|
175
|
+
handle_exception(span, e)
|
176
|
+
logger.error("Error in trace creation: %s", e)
|
177
|
+
|
178
|
+
return stream_generator()
|
179
|
+
|
180
|
+
# Handling for non-streaming responses
|
181
|
+
else:
|
182
|
+
with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
|
183
|
+
response = wrapped(*args, **kwargs)
|
184
|
+
|
185
|
+
try:
|
186
|
+
# Format 'messages' into a single string
|
187
|
+
message_prompt = kwargs.get("messages", "")
|
188
|
+
formatted_messages = []
|
189
|
+
for message in message_prompt:
|
190
|
+
role = message["role"]
|
191
|
+
content = message["content"]
|
192
|
+
|
193
|
+
if isinstance(content, list):
|
194
|
+
content_str = ", ".join(
|
195
|
+
# pylint: disable=line-too-long
|
196
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
197
|
+
if "type" in item else f'text: {item["text"]}'
|
198
|
+
for item in content
|
199
|
+
)
|
200
|
+
formatted_messages.append(f"{role}: {content_str}")
|
201
|
+
else:
|
202
|
+
formatted_messages.append(f"{role}: {content}")
|
203
|
+
prompt = "\n".join(formatted_messages)
|
204
|
+
|
205
|
+
# Calculate cost of the operation
|
206
|
+
cost = get_chat_model_cost(kwargs.get("model", "claude-3-sonnet-20240229"),
|
207
|
+
pricing_info, response.usage.input_tokens,
|
208
|
+
response.usage.output_tokens)
|
209
|
+
|
210
|
+
# Set Span attribues
|
211
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
212
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
213
|
+
SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC)
|
214
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
215
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
216
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
217
|
+
gen_ai_endpoint)
|
218
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
219
|
+
response.id)
|
220
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
221
|
+
environment)
|
222
|
+
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
223
|
+
application_name)
|
224
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
225
|
+
kwargs.get("model", "claude-3-sonnet-20240229"))
|
226
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
227
|
+
kwargs.get("max_tokens", ""))
|
228
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
229
|
+
False)
|
230
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
231
|
+
kwargs.get("temperature", 1.0))
|
232
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
233
|
+
kwargs.get("top_p", ""))
|
234
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
235
|
+
kwargs.get("top_k", ""))
|
236
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
237
|
+
response.stop_reason)
|
238
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
239
|
+
response.usage.input_tokens)
|
240
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
241
|
+
response.usage.output_tokens)
|
242
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
243
|
+
response.usage.input_tokens +
|
244
|
+
response.usage.output_tokens)
|
245
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
246
|
+
cost)
|
247
|
+
if trace_content:
|
248
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
|
249
|
+
prompt)
|
250
|
+
# pylint: disable=line-too-long
|
251
|
+
span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.content[0].text if response.content else "")
|
252
|
+
|
253
|
+
span.set_status(Status(StatusCode.OK))
|
254
|
+
|
255
|
+
if disable_metrics is False:
|
256
|
+
attributes = {
|
257
|
+
TELEMETRY_SDK_NAME:
|
258
|
+
"openlit",
|
259
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
260
|
+
application_name,
|
261
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
262
|
+
SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
|
263
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
264
|
+
environment,
|
265
|
+
SemanticConvetion.GEN_AI_TYPE:
|
266
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
267
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
268
|
+
kwargs.get("model", "claude-3-sonnet-20240229")
|
269
|
+
}
|
270
|
+
|
271
|
+
metrics["genai_requests"].add(1, attributes)
|
272
|
+
metrics["genai_total_tokens"].add(
|
273
|
+
response.usage.input_tokens +
|
274
|
+
response.usage.output_tokens, attributes)
|
275
|
+
metrics["genai_completion_tokens"].add(
|
276
|
+
response.usage.output_tokens, attributes)
|
277
|
+
metrics["genai_prompt_tokens"].add(
|
278
|
+
response.usage.input_tokens, attributes)
|
279
|
+
metrics["genai_cost"].record(cost, attributes)
|
280
|
+
|
281
|
+
# Return original response
|
282
|
+
return response
|
283
|
+
|
284
|
+
except Exception as e:
|
285
|
+
handle_exception(span, e)
|
286
|
+
logger.error("Error in trace creation: %s", e)
|
287
|
+
|
288
|
+
# Return original response
|
289
|
+
return response
|
290
|
+
|
291
|
+
return wrapper
|