openlit 1.34.22__py3-none-any.whl → 1.34.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +48 -3
- openlit/__init__.py +3 -0
- openlit/instrumentation/ag2/__init__.py +22 -18
- openlit/instrumentation/ag2/ag2.py +75 -124
- openlit/instrumentation/ag2/async_ag2.py +114 -0
- openlit/instrumentation/ag2/utils.py +175 -0
- openlit/instrumentation/langchain/__init__.py +11 -35
- openlit/instrumentation/langchain/async_langchain.py +51 -337
- openlit/instrumentation/langchain/langchain.py +50 -310
- openlit/instrumentation/langchain/utils.py +252 -0
- openlit/instrumentation/langchain_community/__init__.py +74 -0
- openlit/instrumentation/langchain_community/async_langchain_community.py +49 -0
- openlit/instrumentation/langchain_community/langchain_community.py +49 -0
- openlit/instrumentation/langchain_community/utils.py +69 -0
- openlit/instrumentation/pinecone/__init__.py +128 -20
- openlit/instrumentation/pinecone/async_pinecone.py +59 -0
- openlit/instrumentation/pinecone/pinecone.py +36 -150
- openlit/instrumentation/pinecone/utils.py +182 -0
- openlit/semcov/__init__.py +13 -1
- {openlit-1.34.22.dist-info → openlit-1.34.24.dist-info}/METADATA +1 -1
- {openlit-1.34.22.dist-info → openlit-1.34.24.dist-info}/RECORD +23 -14
- {openlit-1.34.22.dist-info → openlit-1.34.24.dist-info}/LICENSE +0 -0
- {openlit-1.34.22.dist-info → openlit-1.34.24.dist-info}/WHEEL +0 -0
@@ -1,246 +1,68 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, unused-import, too-many-function-args
|
2
1
|
"""
|
3
|
-
Module for monitoring
|
2
|
+
Module for monitoring LangChain API calls.
|
4
3
|
"""
|
5
4
|
|
6
|
-
import logging
|
7
5
|
import time
|
8
|
-
from opentelemetry.trace import SpanKind
|
9
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
10
7
|
from openlit.__helpers import (
|
11
|
-
get_chat_model_cost,
|
12
8
|
handle_exception,
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
9
|
+
set_server_address_and_port
|
10
|
+
)
|
11
|
+
from openlit.instrumentation.langchain.utils import (
|
12
|
+
get_model_from_instance,
|
13
|
+
process_chat_response,
|
14
|
+
process_hub_response,
|
18
15
|
)
|
19
16
|
from openlit.semcov import SemanticConvention
|
20
17
|
|
21
|
-
# Initialize logger for logging potential issues and operations
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
|
-
def get_attribute_from_instance_or_kwargs(instance, attribute_name, default=-1):
|
25
|
-
"""Return attribute from instance or kwargs"""
|
26
|
-
# Attempt to retrieve model_kwargs from the instance
|
27
|
-
model_kwargs = getattr(instance, 'model_kwargs', None)
|
28
|
-
|
29
|
-
# Check for attribute in model_kwargs if it exists
|
30
|
-
if model_kwargs and attribute_name in model_kwargs:
|
31
|
-
return model_kwargs[attribute_name]
|
32
|
-
|
33
|
-
# Attempt to get the attribute directly from the instance
|
34
|
-
try:
|
35
|
-
return getattr(instance, attribute_name)
|
36
|
-
except AttributeError:
|
37
|
-
# Special handling for 'model' attribute to consider 'model_id'
|
38
|
-
if attribute_name == 'model':
|
39
|
-
return getattr(instance, 'model_id', 'default_model_id')
|
40
|
-
|
41
|
-
# Default if the attribute isn't found in model_kwargs or the instance
|
42
|
-
return default
|
43
|
-
|
44
|
-
def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
45
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
46
|
-
"""
|
47
|
-
Creates a wrapper around a function call to trace and log its execution metrics.
|
48
|
-
|
49
|
-
This function wraps any given function to measure its execution time,
|
50
|
-
log its operation, and trace its execution using OpenTelemetry.
|
51
|
-
|
52
|
-
Parameters:
|
53
|
-
- gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
|
54
|
-
- version (str): The version of the Langchain application.
|
55
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
56
|
-
- application_name (str): Name of the Langchain application.
|
57
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
58
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
59
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
60
|
-
|
61
|
-
Returns:
|
62
|
-
- function: A higher-order function that takes a function 'wrapped' and returns
|
63
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
64
|
-
"""
|
65
|
-
|
66
|
-
def wrapper(wrapped, instance, args, kwargs):
|
67
|
-
"""
|
68
|
-
An inner wrapper function that executes the wrapped function, measures execution
|
69
|
-
time, and records trace data using OpenTelemetry.
|
70
|
-
|
71
|
-
Parameters:
|
72
|
-
- wrapped (Callable): The original function that this wrapper will execute.
|
73
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
74
|
-
is used for instance methods. For static and classmethods,
|
75
|
-
this may be None.
|
76
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
77
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
78
|
-
|
79
|
-
Returns:
|
80
|
-
- The result of the wrapped function call.
|
81
|
-
|
82
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
83
|
-
on the span based on the function's execution and response, and ensures
|
84
|
-
errors are handled and logged appropriately.
|
85
|
-
"""
|
86
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
87
|
-
response = wrapped(*args, **kwargs)
|
88
|
-
|
89
|
-
try:
|
90
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
91
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
92
|
-
SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN)
|
93
|
-
span.set_attribute(SemanticConvention.GEN_AI_ENDPOINT,
|
94
|
-
gen_ai_endpoint)
|
95
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
96
|
-
environment)
|
97
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
98
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK)
|
99
|
-
span.set_attribute(SERVICE_NAME,
|
100
|
-
application_name)
|
101
|
-
span.set_attribute(SemanticConvention.GEN_AI_RETRIEVAL_SOURCE,
|
102
|
-
response[0].metadata["source"])
|
103
|
-
span.set_status(Status(StatusCode.OK))
|
104
|
-
|
105
|
-
# Return original response
|
106
|
-
return response
|
107
|
-
|
108
|
-
except Exception as e:
|
109
|
-
handle_exception(span, e)
|
110
|
-
logger.error("Error in trace creation: %s", e)
|
111
|
-
|
112
|
-
# Return original response
|
113
|
-
return response
|
114
|
-
|
115
|
-
return wrapper
|
116
|
-
|
117
18
|
def hub(gen_ai_endpoint, version, environment, application_name, tracer,
|
118
19
|
pricing_info, capture_message_content, metrics, disable_metrics):
|
119
20
|
"""
|
120
|
-
|
121
|
-
|
122
|
-
Similar to `general_wrap`, this function focuses on wrapping functions involved
|
123
|
-
in interacting with the Langchain hub, adding specific metadata relevant to
|
124
|
-
hub operations to the span attributes.
|
125
|
-
|
126
|
-
Parameters:
|
127
|
-
- gen_ai_endpoint (str): A descriptor or name for the Langchain hub endpoint.
|
128
|
-
- version (str): The version of the Langchain application.
|
129
|
-
- environment (str): The deployment environment, such as 'production' or 'development'.
|
130
|
-
- application_name (str): Name of the Langchain application.
|
131
|
-
- tracer (opentelemetry.trace.Tracer): The tracer for OpenTelemetry tracing.
|
132
|
-
- pricing_info (dict): Pricing information for the operation (not currently used).
|
133
|
-
- capture_message_content (bool): Indicates if the content of the response should be traced.
|
134
|
-
|
135
|
-
Returns:
|
136
|
-
- function: A new function that wraps the original hub operation call with added
|
137
|
-
logging, tracing, and metric calculation functionalities.
|
21
|
+
Generates a telemetry wrapper for LangChain hub operations.
|
138
22
|
"""
|
139
23
|
|
140
24
|
def wrapper(wrapped, instance, args, kwargs):
|
141
25
|
"""
|
142
|
-
|
143
|
-
providing tracing, logging, and execution metrics.
|
144
|
-
|
145
|
-
Parameters:
|
146
|
-
- wrapped (Callable): The original hub operation function to be executed.
|
147
|
-
- instance (object): The instance of the class where the hub operation
|
148
|
-
method is defined. May be None for static or class methods.
|
149
|
-
- args (tuple): Positional arguments to pass to the hub operation function.
|
150
|
-
- kwargs (dict): Keyword arguments to pass to the hub operation function.
|
151
|
-
|
152
|
-
Returns:
|
153
|
-
- The result of executing the hub operation function.
|
154
|
-
|
155
|
-
This wrapper captures additional metadata relevant to Langchain hub operations,
|
156
|
-
creating spans with specific attributes and metrics that reflect the nature of
|
157
|
-
each hub call.
|
26
|
+
Wraps the LangChain hub operation call.
|
158
27
|
"""
|
159
28
|
|
160
|
-
|
29
|
+
server_address, server_port = set_server_address_and_port(instance, "langchain.com", 443)
|
30
|
+
|
31
|
+
with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
|
161
32
|
response = wrapped(*args, **kwargs)
|
162
33
|
|
163
34
|
try:
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
application_name)
|
175
|
-
span.set_attribute(SemanticConvention.GEN_AI_HUB_OWNER,
|
176
|
-
response.metadata["lc_hub_owner"])
|
177
|
-
span.set_attribute(SemanticConvention.GEN_AI_HUB_REPO,
|
178
|
-
response.metadata["lc_hub_repo"])
|
179
|
-
span.set_status(Status(StatusCode.OK))
|
180
|
-
|
181
|
-
return response
|
35
|
+
response = process_hub_response(
|
36
|
+
response=response,
|
37
|
+
gen_ai_endpoint=gen_ai_endpoint,
|
38
|
+
server_port=server_port,
|
39
|
+
server_address=server_address,
|
40
|
+
environment=environment,
|
41
|
+
application_name=application_name,
|
42
|
+
span=span,
|
43
|
+
version=version
|
44
|
+
)
|
182
45
|
|
183
46
|
except Exception as e:
|
184
47
|
handle_exception(span, e)
|
185
|
-
logger.error("Error in trace creation: %s", e)
|
186
48
|
|
187
|
-
|
188
|
-
return response
|
49
|
+
return response
|
189
50
|
|
190
51
|
return wrapper
|
191
52
|
|
192
53
|
def chat(gen_ai_endpoint, version, environment, application_name,
|
193
|
-
|
54
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
194
55
|
"""
|
195
|
-
|
196
|
-
|
197
|
-
This function wraps any given function to measure its execution time,
|
198
|
-
log its operation, and trace its execution using OpenTelemetry.
|
199
|
-
|
200
|
-
Parameters:
|
201
|
-
- version (str): The version of the Langchain application.
|
202
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
203
|
-
- application_name (str): Name of the Langchain application.
|
204
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
205
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
206
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
207
|
-
|
208
|
-
Returns:
|
209
|
-
- function: A higher-order function that takes a function 'wrapped' and returns
|
210
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
56
|
+
Generates a telemetry wrapper for GenAI operations.
|
211
57
|
"""
|
212
58
|
|
213
59
|
def wrapper(wrapped, instance, args, kwargs):
|
214
60
|
"""
|
215
|
-
|
216
|
-
time, and records trace data using OpenTelemetry.
|
217
|
-
|
218
|
-
Parameters:
|
219
|
-
- wrapped (Callable): The original function that this wrapper will execute.
|
220
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
221
|
-
is used for instance methods. For static and classmethods,
|
222
|
-
this may be None.
|
223
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
224
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
225
|
-
|
226
|
-
Returns:
|
227
|
-
- The result of the wrapped function call.
|
228
|
-
|
229
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
230
|
-
on the span based on the function's execution and response, and ensures
|
231
|
-
errors are handled and logged appropriately.
|
61
|
+
Wraps the GenAI operation call.
|
232
62
|
"""
|
233
63
|
|
234
|
-
server_address, server_port = "
|
235
|
-
|
236
|
-
if hasattr(instance, "model_id"):
|
237
|
-
request_model = instance.model_id
|
238
|
-
elif hasattr(instance, "model"):
|
239
|
-
request_model = instance.model
|
240
|
-
elif hasattr(instance, "model_name"):
|
241
|
-
request_model = instance.model_name
|
242
|
-
else:
|
243
|
-
request_model = "NOT_FOUND"
|
64
|
+
server_address, server_port = set_server_address_and_port(instance, "langchain.com", 443)
|
65
|
+
request_model = get_model_from_instance(instance)
|
244
66
|
|
245
67
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
246
68
|
|
@@ -250,113 +72,31 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
250
72
|
end_time = time.time()
|
251
73
|
|
252
74
|
try:
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
75
|
+
# Add instance to kwargs for processing
|
76
|
+
kwargs["instance"] = instance
|
77
|
+
|
78
|
+
response = process_chat_response(
|
79
|
+
response=response,
|
80
|
+
request_model=request_model,
|
81
|
+
pricing_info=pricing_info,
|
82
|
+
server_port=server_port,
|
83
|
+
server_address=server_address,
|
84
|
+
environment=environment,
|
85
|
+
application_name=application_name,
|
86
|
+
metrics=metrics,
|
87
|
+
start_time=start_time,
|
88
|
+
end_time=end_time,
|
89
|
+
span=span,
|
90
|
+
capture_message_content=capture_message_content,
|
91
|
+
disable_metrics=disable_metrics,
|
92
|
+
version=version,
|
93
|
+
args=args,
|
94
|
+
**kwargs
|
262
95
|
)
|
263
96
|
|
264
|
-
try:
|
265
|
-
llm_response = response.content
|
266
|
-
except AttributeError:
|
267
|
-
llm_response = response
|
268
|
-
|
269
|
-
# Set base span attribues (OTel Semconv)
|
270
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
271
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
272
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
273
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
274
|
-
SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN)
|
275
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
276
|
-
request_model)
|
277
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
278
|
-
request_model)
|
279
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
280
|
-
str(getattr(instance, 'temperature', 1)))
|
281
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
282
|
-
str(getattr(instance, 'top_k', 1)))
|
283
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
284
|
-
str(getattr(instance, 'top_p', 1)))
|
285
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
286
|
-
input_tokens)
|
287
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
288
|
-
output_tokens)
|
289
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
290
|
-
server_address)
|
291
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
292
|
-
server_port)
|
293
|
-
|
294
|
-
# Set base span attribues (Extras)
|
295
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
296
|
-
environment)
|
297
|
-
span.set_attribute(SERVICE_NAME,
|
298
|
-
application_name)
|
299
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
300
|
-
False)
|
301
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
302
|
-
input_tokens + output_tokens)
|
303
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
304
|
-
cost)
|
305
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
306
|
-
end_time - start_time)
|
307
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
308
|
-
version)
|
309
|
-
|
310
|
-
if capture_message_content:
|
311
|
-
span.add_event(
|
312
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
313
|
-
attributes={
|
314
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
315
|
-
},
|
316
|
-
)
|
317
|
-
span.add_event(
|
318
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
319
|
-
attributes={
|
320
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
|
321
|
-
},
|
322
|
-
)
|
323
|
-
|
324
|
-
span.set_status(Status(StatusCode.OK))
|
325
|
-
|
326
|
-
if disable_metrics is False:
|
327
|
-
attributes = create_metrics_attributes(
|
328
|
-
service_name=application_name,
|
329
|
-
deployment_environment=environment,
|
330
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
331
|
-
system=SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
332
|
-
request_model=request_model,
|
333
|
-
server_address=server_address,
|
334
|
-
server_port=server_port,
|
335
|
-
response_model=request_model,
|
336
|
-
)
|
337
|
-
|
338
|
-
metrics["genai_client_usage_tokens"].record(
|
339
|
-
input_tokens + output_tokens, attributes
|
340
|
-
)
|
341
|
-
metrics["genai_client_operation_duration"].record(
|
342
|
-
end_time - start_time, attributes
|
343
|
-
)
|
344
|
-
metrics["genai_server_ttft"].record(
|
345
|
-
end_time - start_time, attributes
|
346
|
-
)
|
347
|
-
metrics["genai_requests"].add(1, attributes)
|
348
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
349
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
350
|
-
metrics["genai_cost"].record(cost, attributes)
|
351
|
-
|
352
|
-
# Return original response
|
353
|
-
return response
|
354
|
-
|
355
97
|
except Exception as e:
|
356
98
|
handle_exception(span, e)
|
357
|
-
logger.error("Error in trace creation: %s", e)
|
358
99
|
|
359
|
-
|
360
|
-
return response
|
100
|
+
return response
|
361
101
|
|
362
102
|
return wrapper
|
@@ -0,0 +1,252 @@
|
|
1
|
+
"""
|
2
|
+
LangChain OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
get_chat_model_cost,
|
10
|
+
general_tokens,
|
11
|
+
record_completion_metrics,
|
12
|
+
common_span_attributes,
|
13
|
+
)
|
14
|
+
from openlit.semcov import SemanticConvention
|
15
|
+
|
16
|
+
def format_content(messages):
|
17
|
+
"""
|
18
|
+
Format the messages into a string for span events.
|
19
|
+
"""
|
20
|
+
|
21
|
+
if not messages:
|
22
|
+
return ""
|
23
|
+
|
24
|
+
# Handle string input (simple case)
|
25
|
+
if isinstance(messages, str):
|
26
|
+
return messages
|
27
|
+
|
28
|
+
# Handle list of messages
|
29
|
+
formatted_messages = []
|
30
|
+
for message in messages:
|
31
|
+
# Handle the case where message is a tuple
|
32
|
+
if isinstance(message, tuple) and len(message) == 2:
|
33
|
+
role, content = message
|
34
|
+
# Handle the case where message is a dictionary
|
35
|
+
elif isinstance(message, dict):
|
36
|
+
role = message.get("role", "user")
|
37
|
+
content = message.get("content", "")
|
38
|
+
else:
|
39
|
+
continue
|
40
|
+
|
41
|
+
# Check if the content is a list
|
42
|
+
if isinstance(content, list):
|
43
|
+
content_str = ", ".join(
|
44
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
45
|
+
if "type" in item else f'text: {item["text"]}'
|
46
|
+
for item in content
|
47
|
+
)
|
48
|
+
formatted_messages.append(f"{role}: {content_str}")
|
49
|
+
else:
|
50
|
+
formatted_messages.append(f"{role}: {content}")
|
51
|
+
|
52
|
+
return "\n".join(formatted_messages)
|
53
|
+
|
54
|
+
def get_model_from_instance(instance):
|
55
|
+
"""
|
56
|
+
Extract model name from LangChain instance.
|
57
|
+
"""
|
58
|
+
if hasattr(instance, "model_id"):
|
59
|
+
return instance.model_id
|
60
|
+
elif hasattr(instance, "model"):
|
61
|
+
return instance.model
|
62
|
+
elif hasattr(instance, "model_name"):
|
63
|
+
return instance.model_name
|
64
|
+
else:
|
65
|
+
return "langchain-model"
|
66
|
+
|
67
|
+
def get_attribute_from_instance(instance, attribute_name, default=-1):
|
68
|
+
"""
|
69
|
+
Get attribute from instance, checking model_kwargs first.
|
70
|
+
"""
|
71
|
+
# Attempt to retrieve model_kwargs from the instance
|
72
|
+
model_kwargs = getattr(instance, "model_kwargs", None)
|
73
|
+
|
74
|
+
# Check for attribute in model_kwargs if it exists
|
75
|
+
if model_kwargs and attribute_name in model_kwargs:
|
76
|
+
value = model_kwargs[attribute_name]
|
77
|
+
return value if value is not None else default
|
78
|
+
|
79
|
+
# Attempt to get the attribute directly from the instance
|
80
|
+
try:
|
81
|
+
value = getattr(instance, attribute_name)
|
82
|
+
# Return default if value is None
|
83
|
+
return value if value is not None else default
|
84
|
+
except AttributeError:
|
85
|
+
# Special handling for "model" attribute to consider "model_id"
|
86
|
+
if attribute_name == "model":
|
87
|
+
return getattr(instance, "model_id", "langchain-model")
|
88
|
+
|
89
|
+
# Default if the attribute isnt found in model_kwargs or the instance
|
90
|
+
return default
|
91
|
+
|
92
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
93
|
+
capture_message_content, disable_metrics, version, is_stream):
|
94
|
+
"""
|
95
|
+
Process chat request and generate Telemetry
|
96
|
+
"""
|
97
|
+
|
98
|
+
scope._end_time = time.time()
|
99
|
+
scope._tbt = 0 # LangChain doesnt support streaming yet
|
100
|
+
scope._ttft = scope._end_time - scope._start_time
|
101
|
+
|
102
|
+
# Extract prompt - check args[0] first (positional), then kwargs (keyword arguments)
|
103
|
+
messages = None
|
104
|
+
if scope._args and len(scope._args) > 0:
|
105
|
+
messages = scope._args[0] # llm.invoke([("system", "..."), ("human", "...")])
|
106
|
+
else:
|
107
|
+
messages = scope._kwargs.get("messages", "") or scope._kwargs.get("input", "") # llm.invoke(messages=[...])
|
108
|
+
|
109
|
+
formatted_messages = format_content(messages)
|
110
|
+
request_model = scope._request_model
|
111
|
+
|
112
|
+
# Use actual token counts from response if available, otherwise calculate them using general_tokens
|
113
|
+
if (scope._input_tokens in [None, 0] or scope._output_tokens in [None, 0]):
|
114
|
+
scope._input_tokens = general_tokens(str(formatted_messages))
|
115
|
+
scope._output_tokens = general_tokens(str(scope._llmresponse))
|
116
|
+
|
117
|
+
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
118
|
+
|
119
|
+
# Common Span Attributes
|
120
|
+
common_span_attributes(scope,
|
121
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
122
|
+
scope._server_address, scope._server_port, request_model, scope._response_model,
|
123
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
124
|
+
|
125
|
+
# Span Attributes for Request parameters
|
126
|
+
instance = scope._kwargs.get("instance")
|
127
|
+
if instance:
|
128
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
129
|
+
get_attribute_from_instance(instance, "temperature", 1.0))
|
130
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
131
|
+
get_attribute_from_instance(instance, "top_k", 1.0))
|
132
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
133
|
+
get_attribute_from_instance(instance, "top_p", 1.0))
|
134
|
+
|
135
|
+
# Span Attributes for Response parameters
|
136
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
137
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
|
138
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
|
139
|
+
|
140
|
+
# Span Attributes for Cost and Tokens
|
141
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
|
142
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
|
143
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
|
144
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
145
|
+
|
146
|
+
# Span Attributes for Content
|
147
|
+
if capture_message_content:
|
148
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_messages)
|
149
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
|
150
|
+
|
151
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
152
|
+
scope._span.add_event(
|
153
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
154
|
+
attributes={
|
155
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: formatted_messages,
|
156
|
+
},
|
157
|
+
)
|
158
|
+
scope._span.add_event(
|
159
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
160
|
+
attributes={
|
161
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
162
|
+
},
|
163
|
+
)
|
164
|
+
|
165
|
+
scope._span.set_status(Status(StatusCode.OK))
|
166
|
+
|
167
|
+
# Record metrics
|
168
|
+
if not disable_metrics:
|
169
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
170
|
+
scope._server_address, scope._server_port, request_model, scope._response_model, environment,
|
171
|
+
application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
|
172
|
+
cost, scope._tbt, scope._ttft)
|
173
|
+
|
174
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
175
|
+
environment, application_name, metrics, start_time, end_time,
|
176
|
+
span, capture_message_content=False, disable_metrics=False,
|
177
|
+
version="1.0.0", args=None, **kwargs):
|
178
|
+
"""
|
179
|
+
Process chat response and generate telemetry.
|
180
|
+
"""
|
181
|
+
|
182
|
+
# Create scope object
|
183
|
+
scope = type("GenericScope", (), {})()
|
184
|
+
|
185
|
+
scope._start_time = start_time
|
186
|
+
scope._end_time = end_time
|
187
|
+
scope._span = span
|
188
|
+
scope._server_address = server_address
|
189
|
+
scope._server_port = server_port
|
190
|
+
scope._request_model = request_model
|
191
|
+
scope._kwargs = kwargs
|
192
|
+
scope._args = args or ()
|
193
|
+
|
194
|
+
# Extract response content and metadata - only extract what comes from the response
|
195
|
+
try:
|
196
|
+
scope._llmresponse = response.content
|
197
|
+
except AttributeError:
|
198
|
+
scope._llmresponse = str(response)
|
199
|
+
|
200
|
+
# Extract token information from usage_metadata if available
|
201
|
+
usage_metadata = getattr(response, "usage_metadata", None)
|
202
|
+
if usage_metadata:
|
203
|
+
scope._input_tokens = usage_metadata.get("input_tokens", 0)
|
204
|
+
scope._output_tokens = usage_metadata.get("output_tokens", 0)
|
205
|
+
scope._total_tokens = usage_metadata.get("total_tokens", 0)
|
206
|
+
else:
|
207
|
+
# Will be calculated in common_chat_logic if not available
|
208
|
+
scope._input_tokens = None
|
209
|
+
scope._output_tokens = None
|
210
|
+
scope._total_tokens = None
|
211
|
+
|
212
|
+
# Extract response metadata
|
213
|
+
response_metadata = getattr(response, "response_metadata", {})
|
214
|
+
scope._response_model = response_metadata.get("model_name", request_model)
|
215
|
+
scope._finish_reason = response_metadata.get("finish_reason", "stop")
|
216
|
+
|
217
|
+
# Extract response ID
|
218
|
+
scope._response_id = getattr(response, "id", "")
|
219
|
+
|
220
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
221
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
222
|
+
|
223
|
+
return response
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
def process_hub_response(response, gen_ai_endpoint, server_port, server_address,
|
228
|
+
environment, application_name, span, version="1.0.0"):
|
229
|
+
"""
|
230
|
+
Process LangChain hub operations and generate telemetry.
|
231
|
+
"""
|
232
|
+
|
233
|
+
# Set span attributes for hub operations
|
234
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN)
|
235
|
+
span.set_attribute(SemanticConvention.GEN_AI_ENDPOINT, gen_ai_endpoint)
|
236
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK)
|
237
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
238
|
+
span.set_attribute(SemanticConvention.GEN_AI_ENVIRONMENT, environment)
|
239
|
+
span.set_attribute(SemanticConvention.GEN_AI_APPLICATION_NAME, application_name)
|
240
|
+
|
241
|
+
# Try to extract hub metadata
|
242
|
+
try:
|
243
|
+
span.set_attribute(SemanticConvention.GEN_AI_HUB_OWNER,
|
244
|
+
response.metadata.get("lc_hub_owner", "unknown"))
|
245
|
+
span.set_attribute(SemanticConvention.GEN_AI_HUB_REPO,
|
246
|
+
response.metadata.get("lc_hub_repo", "unknown"))
|
247
|
+
except (AttributeError, KeyError):
|
248
|
+
pass
|
249
|
+
|
250
|
+
span.set_status(Status(StatusCode.OK))
|
251
|
+
|
252
|
+
return response
|