openlit 1.34.23__py3-none-any.whl → 1.34.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +48 -3
- openlit/instrumentation/chroma/__init__.py +38 -34
- openlit/instrumentation/chroma/chroma.py +37 -177
- openlit/instrumentation/chroma/utils.py +227 -0
- openlit/instrumentation/pinecone/__init__.py +128 -20
- openlit/instrumentation/pinecone/async_pinecone.py +58 -0
- openlit/instrumentation/pinecone/pinecone.py +36 -151
- openlit/instrumentation/pinecone/utils.py +186 -0
- openlit/semcov/__init__.py +13 -1
- {openlit-1.34.23.dist-info → openlit-1.34.25.dist-info}/METADATA +1 -1
- {openlit-1.34.23.dist-info → openlit-1.34.25.dist-info}/RECORD +13 -10
- {openlit-1.34.23.dist-info → openlit-1.34.25.dist-info}/LICENSE +0 -0
- {openlit-1.34.23.dist-info → openlit-1.34.25.dist-info}/WHEEL +0 -0
openlit/__helpers.py
CHANGED
@@ -205,11 +205,22 @@ def set_server_address_and_port(client_instance: Any,
|
|
205
205
|
config = getattr(client_instance, 'sdk_configuration', None)
|
206
206
|
base_url = getattr(config, 'server_url', None)
|
207
207
|
|
208
|
+
if not base_url:
|
209
|
+
# Attempt to get host from instance.config.host (used by Pinecone and other vector DBs)
|
210
|
+
config = getattr(client_instance, 'config', None)
|
211
|
+
base_url = getattr(config, 'host', None)
|
212
|
+
|
208
213
|
if base_url:
|
209
214
|
if isinstance(base_url, str):
|
210
|
-
|
211
|
-
|
212
|
-
|
215
|
+
# Check if it's a full URL or just a hostname
|
216
|
+
if base_url.startswith(('http://', 'https://')):
|
217
|
+
url = urlparse(base_url)
|
218
|
+
server_address = url.hostname or default_server_address
|
219
|
+
server_port = url.port if url.port is not None else default_server_port
|
220
|
+
else:
|
221
|
+
# If it's just a hostname (like Pinecone's case), use it directly
|
222
|
+
server_address = base_url
|
223
|
+
server_port = default_server_port
|
213
224
|
else: # base_url might not be a str; handle as an object.
|
214
225
|
server_address = getattr(base_url, 'host', None) or default_server_address
|
215
226
|
port_attr = getattr(base_url, 'port', None)
|
@@ -442,3 +453,37 @@ def record_image_metrics(metrics, gen_ai_operation, gen_ai_system, server_addres
|
|
442
453
|
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
443
454
|
metrics["genai_requests"].add(1, attributes)
|
444
455
|
metrics["genai_cost"].record(cost, attributes)
|
456
|
+
|
457
|
+
def common_db_span_attributes(scope, db_system, server_address, server_port,
|
458
|
+
environment, application_name, version):
|
459
|
+
"""
|
460
|
+
Set common span attributes for database operations.
|
461
|
+
"""
|
462
|
+
|
463
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
464
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_VECTORDB)
|
465
|
+
scope._span.set_attribute(SemanticConvention.DB_SYSTEM_NAME, db_system)
|
466
|
+
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
467
|
+
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
468
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
469
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
470
|
+
scope._span.set_attribute(SemanticConvention.DB_SDK_VERSION, version)
|
471
|
+
|
472
|
+
def record_db_metrics(metrics, db_system, server_address, server_port,
|
473
|
+
environment, application_name, start_time, end_time):
|
474
|
+
"""
|
475
|
+
Record database-specific metrics for the operation.
|
476
|
+
"""
|
477
|
+
|
478
|
+
attributes = create_metrics_attributes(
|
479
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_VECTORDB,
|
480
|
+
system=db_system,
|
481
|
+
request_model=db_system,
|
482
|
+
server_address=server_address,
|
483
|
+
server_port=server_port,
|
484
|
+
response_model=db_system,
|
485
|
+
service_name=application_name,
|
486
|
+
deployment_environment=environment,
|
487
|
+
)
|
488
|
+
metrics["db_requests"].add(1, attributes)
|
489
|
+
metrics["db_client_operation_duration"].record(end_time - start_time, attributes)
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
OpenLIT ChromaDB Instrumentation
|
3
|
+
"""
|
4
|
+
|
3
5
|
from typing import Collection
|
4
6
|
import importlib.metadata
|
5
7
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
@@ -10,77 +12,79 @@ from openlit.instrumentation.chroma.chroma import general_wrap
|
|
10
12
|
_instruments = ("chromadb >= 0.4.0",)
|
11
13
|
|
12
14
|
class ChromaInstrumentor(BaseInstrumentor):
|
13
|
-
"""
|
15
|
+
"""
|
16
|
+
An instrumentor for ChromaDB's client library.
|
17
|
+
"""
|
14
18
|
|
15
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
16
20
|
return _instruments
|
17
21
|
|
18
22
|
def _instrument(self, **kwargs):
|
19
|
-
|
20
|
-
environment = kwargs.get("environment")
|
23
|
+
version = importlib.metadata.version("chromadb")
|
24
|
+
environment = kwargs.get("environment", "default")
|
25
|
+
application_name = kwargs.get("application_name", "default")
|
21
26
|
tracer = kwargs.get("tracer")
|
27
|
+
pricing_info = kwargs.get("pricing_info", {})
|
28
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
22
29
|
metrics = kwargs.get("metrics_dict")
|
23
|
-
pricing_info = kwargs.get("pricing_info")
|
24
|
-
capture_message_content = kwargs.get("capture_message_content")
|
25
30
|
disable_metrics = kwargs.get("disable_metrics")
|
26
|
-
version = importlib.metadata.version("chromadb")
|
27
31
|
|
32
|
+
# Sync operations
|
28
33
|
wrap_function_wrapper(
|
29
|
-
"chromadb.db",
|
30
|
-
"DB.create_collection",
|
34
|
+
"chromadb.db",
|
35
|
+
"DB.create_collection",
|
31
36
|
general_wrap("chroma.create_collection", version, environment, application_name,
|
32
|
-
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
33
38
|
)
|
34
39
|
|
35
40
|
wrap_function_wrapper(
|
36
|
-
"chromadb",
|
37
|
-
"Collection.add",
|
41
|
+
"chromadb",
|
42
|
+
"Collection.add",
|
38
43
|
general_wrap("chroma.add", version, environment, application_name,
|
39
|
-
|
44
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
40
45
|
)
|
41
46
|
|
42
47
|
wrap_function_wrapper(
|
43
|
-
"chromadb",
|
44
|
-
"Collection.get",
|
48
|
+
"chromadb",
|
49
|
+
"Collection.get",
|
45
50
|
general_wrap("chroma.get", version, environment, application_name,
|
46
|
-
|
51
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
47
52
|
)
|
48
53
|
|
49
54
|
wrap_function_wrapper(
|
50
|
-
"chromadb",
|
51
|
-
"Collection.peek",
|
55
|
+
"chromadb",
|
56
|
+
"Collection.peek",
|
52
57
|
general_wrap("chroma.peek", version, environment, application_name,
|
53
|
-
|
58
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
54
59
|
)
|
55
60
|
|
56
61
|
wrap_function_wrapper(
|
57
|
-
"chromadb",
|
58
|
-
"Collection.query",
|
62
|
+
"chromadb",
|
63
|
+
"Collection.query",
|
59
64
|
general_wrap("chroma.query", version, environment, application_name,
|
60
|
-
|
65
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
61
66
|
)
|
62
67
|
|
63
68
|
wrap_function_wrapper(
|
64
|
-
"chromadb",
|
65
|
-
"Collection.update",
|
69
|
+
"chromadb",
|
70
|
+
"Collection.update",
|
66
71
|
general_wrap("chroma.update", version, environment, application_name,
|
67
|
-
|
72
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
68
73
|
)
|
69
74
|
|
70
75
|
wrap_function_wrapper(
|
71
|
-
"chromadb",
|
72
|
-
"Collection.upsert",
|
76
|
+
"chromadb",
|
77
|
+
"Collection.upsert",
|
73
78
|
general_wrap("chroma.upsert", version, environment, application_name,
|
74
|
-
|
79
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
75
80
|
)
|
81
|
+
|
76
82
|
wrap_function_wrapper(
|
77
|
-
"chromadb",
|
78
|
-
"Collection.delete",
|
83
|
+
"chromadb",
|
84
|
+
"Collection.delete",
|
79
85
|
general_wrap("chroma.delete", version, environment, application_name,
|
80
|
-
|
86
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
81
87
|
)
|
82
88
|
|
83
|
-
|
84
|
-
@staticmethod
|
85
89
|
def _uninstrument(self, **kwargs):
|
86
90
|
pass
|
@@ -1,199 +1,59 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
|
-
Module for monitoring ChromaDB.
|
2
|
+
Module for monitoring ChromaDB API calls.
|
4
3
|
"""
|
5
4
|
|
6
|
-
import
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry
|
9
|
-
from openlit.__helpers import
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
Counts Length of object if it exists, Else returns None
|
18
|
-
"""
|
19
|
-
try:
|
20
|
-
cnt = len(obj)
|
21
|
-
# pylint: disable=bare-except
|
22
|
-
except:
|
23
|
-
cnt = 0
|
24
|
-
|
25
|
-
return cnt
|
5
|
+
import time
|
6
|
+
from opentelemetry.trace import SpanKind
|
7
|
+
from opentelemetry import context as context_api
|
8
|
+
from openlit.__helpers import (
|
9
|
+
handle_exception,
|
10
|
+
set_server_address_and_port,
|
11
|
+
)
|
12
|
+
from openlit.instrumentation.chroma.utils import (
|
13
|
+
process_vectordb_response,
|
14
|
+
DB_OPERATION_MAP,
|
15
|
+
)
|
26
16
|
|
27
17
|
def general_wrap(gen_ai_endpoint, version, environment, application_name,
|
28
|
-
|
18
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
29
19
|
"""
|
30
|
-
|
31
|
-
|
32
|
-
This function wraps any given function to measure its execution time,
|
33
|
-
log its operation, and trace its execution using OpenTelemetry.
|
34
|
-
|
35
|
-
Parameters:
|
36
|
-
- gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
|
37
|
-
- version (str): The version of the Langchain application.
|
38
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
39
|
-
- application_name (str): Name of the Langchain application.
|
40
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
41
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
42
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
- function: A higher-order function that takes a function 'wrapped' and returns
|
46
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
20
|
+
Generates a telemetry wrapper for ChromaDB function calls.
|
47
21
|
"""
|
48
22
|
|
49
23
|
def wrapper(wrapped, instance, args, kwargs):
|
50
24
|
"""
|
51
|
-
|
52
|
-
time, and records trace data using OpenTelemetry.
|
53
|
-
|
54
|
-
Parameters:
|
55
|
-
- wrapped (Callable): The original function that this wrapper will execute.
|
56
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
57
|
-
is used for instance methods. For static and classmethods,
|
58
|
-
this may be None.
|
59
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
60
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
61
|
-
|
62
|
-
Returns:
|
63
|
-
- The result of the wrapped function call.
|
64
|
-
|
65
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
66
|
-
on the span based on the function's execution and response, and ensures
|
67
|
-
errors are handled and logged appropriately.
|
25
|
+
Wraps the ChromaDB function call.
|
68
26
|
"""
|
69
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
70
|
-
response = wrapped(*args, **kwargs)
|
71
27
|
|
72
|
-
|
73
|
-
|
74
|
-
span.set_attribute(SemanticConvention.GEN_AI_ENDPOINT,
|
75
|
-
gen_ai_endpoint)
|
76
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
77
|
-
environment)
|
78
|
-
span.set_attribute(SERVICE_NAME,
|
79
|
-
application_name)
|
80
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
81
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_VECTORDB)
|
82
|
-
span.set_attribute(SemanticConvention.DB_SYSTEM_NAME,
|
83
|
-
SemanticConvention.DB_SYSTEM_CHROMA)
|
84
|
-
span.set_attribute(SemanticConvention.DB_COLLECTION_NAME,
|
85
|
-
instance.name)
|
28
|
+
if context_api.get_value(context_api._SUPPRESS_INSTRUMENTATION_KEY):
|
29
|
+
return wrapped(*args, **kwargs)
|
86
30
|
|
87
|
-
|
88
|
-
|
89
|
-
span.set_attribute(SemanticConvention.DB_OPERATION_NAME,
|
90
|
-
SemanticConvention.DB_OPERATION_ADD)
|
91
|
-
span.set_attribute(SemanticConvention.DB_ID_COUNT,
|
92
|
-
object_count(kwargs.get("ids", [])))
|
93
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
94
|
-
object_count(kwargs.get("embeddings", [])))
|
95
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
96
|
-
object_count(kwargs.get("metadatas", [])))
|
97
|
-
span.set_attribute(SemanticConvention.DB_DOCUMENTS_COUNT,
|
98
|
-
object_count(kwargs.get("documents", [])))
|
31
|
+
# Get server address and port using the standard helper
|
32
|
+
server_address, server_port = set_server_address_and_port(instance, "localhost", 8000)
|
99
33
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
span.set_attribute(SemanticConvention.DB_QUERY_LIMIT,
|
107
|
-
kwargs.get("limit", ""))
|
108
|
-
span.set_attribute(SemanticConvention.DB_OFFSET,
|
109
|
-
kwargs.get("offset", ""))
|
110
|
-
span.set_attribute(SemanticConvention.DB_WHERE_DOCUMENT,
|
111
|
-
str(kwargs.get("where_document", "")))
|
34
|
+
db_operation = DB_OPERATION_MAP.get(gen_ai_endpoint, "unknown")
|
35
|
+
if db_operation == "create_collection":
|
36
|
+
namespace = kwargs.get("name") or (args[0] if args else "unknown")
|
37
|
+
else:
|
38
|
+
namespace = getattr(instance, "name", "unknown")
|
39
|
+
span_name = f"{db_operation} {namespace}"
|
112
40
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
span.set_attribute(SemanticConvention.DB_STATEMENT,
|
118
|
-
str(kwargs.get("query_texts", "")))
|
119
|
-
span.set_attribute(SemanticConvention.DB_N_RESULTS,
|
120
|
-
kwargs.get("n_results", ""))
|
121
|
-
span.set_attribute(SemanticConvention.DB_FILTER,
|
122
|
-
str(kwargs.get("where", "")))
|
123
|
-
span.set_attribute(SemanticConvention.DB_WHERE_DOCUMENT,
|
124
|
-
str(kwargs.get("where_document", "")))
|
125
|
-
|
126
|
-
elif gen_ai_endpoint == "chroma.update":
|
127
|
-
db_operation = SemanticConvention.DB_OPERATION_UPDATE
|
128
|
-
span.set_attribute(SemanticConvention.DB_OPERATION_NAME,
|
129
|
-
SemanticConvention.DB_OPERATION_UPDATE)
|
130
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
131
|
-
object_count(kwargs.get("embeddings", [])))
|
132
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
133
|
-
object_count(kwargs.get("metadatas", [])))
|
134
|
-
span.set_attribute(SemanticConvention.DB_ID_COUNT,
|
135
|
-
object_count(kwargs.get("ids", [])))
|
136
|
-
span.set_attribute(SemanticConvention.DB_DOCUMENTS_COUNT,
|
137
|
-
object_count(kwargs.get("documents", [])))
|
138
|
-
|
139
|
-
elif gen_ai_endpoint == "chroma.upsert":
|
140
|
-
db_operation = SemanticConvention.DB_OPERATION_UPSERT
|
141
|
-
span.set_attribute(SemanticConvention.DB_OPERATION_NAME,
|
142
|
-
SemanticConvention.DB_OPERATION_UPSERT)
|
143
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
144
|
-
object_count(kwargs.get("embeddings", [])))
|
145
|
-
span.set_attribute(SemanticConvention.DB_VECTOR_COUNT,
|
146
|
-
object_count(kwargs.get("metadatas", [])))
|
147
|
-
span.set_attribute(SemanticConvention.DB_ID_COUNT,
|
148
|
-
object_count(kwargs.get("ids", [])))
|
149
|
-
span.set_attribute(SemanticConvention.DB_DOCUMENTS_COUNT,
|
150
|
-
object_count(kwargs.get("documents", [])))
|
151
|
-
|
152
|
-
elif gen_ai_endpoint == "chroma.delete":
|
153
|
-
db_operation = SemanticConvention.DB_OPERATION_DELETE
|
154
|
-
span.set_attribute(SemanticConvention.DB_OPERATION_NAME,
|
155
|
-
SemanticConvention.DB_OPERATION_DELETE)
|
156
|
-
span.set_attribute(SemanticConvention.DB_ID_COUNT,
|
157
|
-
object_count(kwargs.get("ids", [])))
|
158
|
-
span.set_attribute(SemanticConvention.DB_FILTER,
|
159
|
-
str(kwargs.get("where", "")))
|
160
|
-
span.set_attribute(SemanticConvention.DB_DELETE_ALL,
|
161
|
-
kwargs.get("delete_all", False))
|
162
|
-
span.set_attribute(SemanticConvention.DB_WHERE_DOCUMENT,
|
163
|
-
str(kwargs.get("where_document", "")))
|
164
|
-
|
165
|
-
elif gen_ai_endpoint == "chroma.peek":
|
166
|
-
db_operation = SemanticConvention.DB_OPERATION_PEEK
|
167
|
-
span.set_attribute(SemanticConvention.DB_OPERATION_NAME,
|
168
|
-
SemanticConvention.DB_OPERATION_PEEK)
|
169
|
-
|
170
|
-
span.set_status(Status(StatusCode.OK))
|
171
|
-
|
172
|
-
if disable_metrics is False:
|
173
|
-
attributes = {
|
174
|
-
TELEMETRY_SDK_NAME:
|
175
|
-
"openlit",
|
176
|
-
SERVICE_NAME:
|
177
|
-
application_name,
|
178
|
-
SemanticConvention.DB_SYSTEM_NAME:
|
179
|
-
SemanticConvention.DB_SYSTEM_CHROMA,
|
180
|
-
DEPLOYMENT_ENVIRONMENT:
|
181
|
-
environment,
|
182
|
-
SemanticConvention.GEN_AI_OPERATION:
|
183
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_VECTORDB,
|
184
|
-
SemanticConvention.DB_OPERATION_NAME:
|
185
|
-
db_operation
|
186
|
-
}
|
187
|
-
|
188
|
-
metrics["db_requests"].add(1, attributes)
|
41
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
42
|
+
try:
|
43
|
+
start_time = time.time()
|
44
|
+
response = wrapped(*args, **kwargs)
|
189
45
|
|
190
|
-
|
46
|
+
# Process response and generate telemetry
|
47
|
+
response = process_vectordb_response(
|
48
|
+
response, db_operation, server_address, server_port,
|
49
|
+
environment, application_name, metrics, start_time, span,
|
50
|
+
capture_message_content, disable_metrics, version, instance, args, endpoint=gen_ai_endpoint, **kwargs
|
51
|
+
)
|
191
52
|
|
192
53
|
except Exception as e:
|
193
54
|
handle_exception(span, e)
|
194
|
-
logger.error("Error in trace creation: %s", e)
|
195
55
|
|
196
|
-
|
197
|
-
|
56
|
+
return response
|
57
|
+
|
198
58
|
|
199
59
|
return wrapper
|
@@ -0,0 +1,227 @@
|
|
1
|
+
"""
|
2
|
+
ChromaDB OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
common_db_span_attributes,
|
10
|
+
record_db_metrics,
|
11
|
+
)
|
12
|
+
from openlit.semcov import SemanticConvention
|
13
|
+
|
14
|
+
# Operation mapping for simple span naming
|
15
|
+
DB_OPERATION_MAP = {
|
16
|
+
"chroma.create_collection": SemanticConvention.DB_OPERATION_CREATE_COLLECTION,
|
17
|
+
"chroma.add": SemanticConvention.DB_OPERATION_INSERT,
|
18
|
+
"chroma.get": SemanticConvention.DB_OPERATION_GET,
|
19
|
+
"chroma.peek": SemanticConvention.DB_OPERATION_PEEK,
|
20
|
+
"chroma.query": SemanticConvention.DB_OPERATION_GET,
|
21
|
+
"chroma.update": SemanticConvention.DB_OPERATION_UPDATE,
|
22
|
+
"chroma.upsert": SemanticConvention.DB_OPERATION_UPSERT,
|
23
|
+
"chroma.delete": SemanticConvention.DB_OPERATION_DELETE,
|
24
|
+
}
|
25
|
+
|
26
|
+
def object_count(obj):
|
27
|
+
"""
|
28
|
+
Counts length of object if it exists, else returns 0.
|
29
|
+
"""
|
30
|
+
return len(obj) if obj else 0
|
31
|
+
|
32
|
+
def common_vectordb_logic(scope, environment, application_name,
|
33
|
+
metrics, capture_message_content, disable_metrics, version, instance=None, endpoint=None):
|
34
|
+
"""
|
35
|
+
Process vector database request and generate telemetry.
|
36
|
+
"""
|
37
|
+
|
38
|
+
scope._end_time = time.time()
|
39
|
+
|
40
|
+
# Set common database span attributes using helper
|
41
|
+
common_db_span_attributes(scope, SemanticConvention.DB_SYSTEM_CHROMA, scope._server_address, scope._server_port,
|
42
|
+
environment, application_name, version)
|
43
|
+
|
44
|
+
# Set DB operation specific attributes
|
45
|
+
scope._span.set_attribute(SemanticConvention.DB_OPERATION_NAME, scope._db_operation)
|
46
|
+
scope._span.set_attribute(SemanticConvention.DB_CLIENT_OPERATION_DURATION, scope._end_time - scope._start_time)
|
47
|
+
|
48
|
+
# Set collection name from instance
|
49
|
+
if hasattr(instance, "name"):
|
50
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, instance.name)
|
51
|
+
|
52
|
+
# Set Create Collection operation specific attributes
|
53
|
+
if scope._db_operation == SemanticConvention.DB_OPERATION_CREATE_COLLECTION:
|
54
|
+
# Standard database attributes
|
55
|
+
collection_name = scope._kwargs.get("name") or (scope._args[0] if scope._args else "unknown")
|
56
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
57
|
+
|
58
|
+
# Vector database specific attributes (extensions)
|
59
|
+
metadata = scope._kwargs.get("metadata", {})
|
60
|
+
if metadata:
|
61
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_METADATA, str(metadata))
|
62
|
+
|
63
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_INSERT:
|
64
|
+
collection_name = getattr(instance, "name", "unknown")
|
65
|
+
query = scope._kwargs.get("ids", [])
|
66
|
+
|
67
|
+
# Standard database attributes
|
68
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, str(query))
|
69
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
70
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, object_count(query))
|
71
|
+
|
72
|
+
# Vector database specific attributes (extensions)
|
73
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
74
|
+
f"{scope._db_operation} {collection_name} "
|
75
|
+
f"ids={query} "
|
76
|
+
f"documents={scope._kwargs.get('documents', [])}")
|
77
|
+
|
78
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_GET:
|
79
|
+
collection_name = getattr(instance, "name", "unknown")
|
80
|
+
|
81
|
+
# Handle different GET operations based on endpoint
|
82
|
+
if endpoint == "chroma.get":
|
83
|
+
# Collection.get() - retrieve documents by IDs
|
84
|
+
query = scope._kwargs.get("ids", [])
|
85
|
+
|
86
|
+
# Standard database attributes
|
87
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, str(query))
|
88
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
89
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, object_count(query))
|
90
|
+
|
91
|
+
# Vector database specific attributes (extensions)
|
92
|
+
scope._span.set_attribute(SemanticConvention.DB_FILTER, str(scope._kwargs.get("where", "")))
|
93
|
+
|
94
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
95
|
+
f"{scope._db_operation} {collection_name} "
|
96
|
+
f"ids={query} "
|
97
|
+
f"limit={scope._kwargs.get('limit', 'None')} "
|
98
|
+
f"offset={scope._kwargs.get('offset', 'None')}")
|
99
|
+
|
100
|
+
elif endpoint == "chroma.query":
|
101
|
+
query_texts = scope._kwargs.get("query_texts", [])
|
102
|
+
query_embeddings = scope._kwargs.get("query_embeddings", [])
|
103
|
+
|
104
|
+
# Create comprehensive query text (can be either embeddings or texts)
|
105
|
+
if query_texts:
|
106
|
+
query_content = f"texts={query_texts}"
|
107
|
+
elif query_embeddings:
|
108
|
+
query_content = f"embeddings={len(query_embeddings) if query_embeddings else 0} vectors"
|
109
|
+
else:
|
110
|
+
query_content = "no query provided"
|
111
|
+
|
112
|
+
# Standard database attributes
|
113
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, query_content)
|
114
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
115
|
+
|
116
|
+
# Vector database specific attributes (extensions)
|
117
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_QUERY_TOP_K, scope._kwargs.get("n_results", 10))
|
118
|
+
scope._span.set_attribute(SemanticConvention.DB_FILTER, str(scope._kwargs.get("where", "")))
|
119
|
+
|
120
|
+
# Extract response metrics if available
|
121
|
+
if scope._response:
|
122
|
+
# Get number of results returned
|
123
|
+
if hasattr(scope._response, 'get') and scope._response.get('ids'):
|
124
|
+
returned_rows = object_count(scope._response['ids'][0]) if scope._response['ids'] else 0
|
125
|
+
scope._span.set_attribute(SemanticConvention.DB_RESPONSE_RETURNED_ROWS, returned_rows)
|
126
|
+
|
127
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
128
|
+
f"{scope._db_operation} {collection_name} "
|
129
|
+
f"n_results={scope._kwargs.get('n_results', 10)} "
|
130
|
+
f"{query_content} "
|
131
|
+
f"filter={scope._kwargs.get('where', 'None')}")
|
132
|
+
|
133
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_UPDATE:
|
134
|
+
collection_name = getattr(instance, "name", "unknown")
|
135
|
+
query = scope._kwargs.get("ids", [])
|
136
|
+
|
137
|
+
# Standard database attributes
|
138
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, str(query))
|
139
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
140
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, object_count(query))
|
141
|
+
|
142
|
+
# Vector database specific attributes (extensions)
|
143
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
144
|
+
f"{scope._db_operation} {collection_name} "
|
145
|
+
f"ids={query} "
|
146
|
+
f"embeddings={scope._kwargs.get('embeddings', 'None')} "
|
147
|
+
f"metadatas={scope._kwargs.get('metadatas', 'None')} "
|
148
|
+
f"documents={scope._kwargs.get('documents', 'None')}")
|
149
|
+
|
150
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_UPSERT:
|
151
|
+
collection_name = getattr(instance, "name", "unknown")
|
152
|
+
query = scope._kwargs.get("ids", [])
|
153
|
+
|
154
|
+
# Standard database attributes
|
155
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, str(query))
|
156
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
157
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, object_count(query))
|
158
|
+
|
159
|
+
# Vector database specific attributes (extensions)
|
160
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
161
|
+
f"{scope._db_operation} {collection_name} "
|
162
|
+
f"ids={query} "
|
163
|
+
f"embeddings={scope._kwargs.get('embeddings', 'None')} "
|
164
|
+
f"metadatas={scope._kwargs.get('metadatas', 'None')} "
|
165
|
+
f"documents={scope._kwargs.get('documents', 'None')}")
|
166
|
+
|
167
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_DELETE:
|
168
|
+
collection_name = getattr(instance, "name", "unknown")
|
169
|
+
query = scope._kwargs.get("ids", [])
|
170
|
+
|
171
|
+
# Standard database attributes
|
172
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, str(query))
|
173
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
174
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, object_count(query))
|
175
|
+
|
176
|
+
# Vector database specific attributes (extensions)
|
177
|
+
scope._span.set_attribute(SemanticConvention.DB_FILTER, str(scope._kwargs.get("where", "")))
|
178
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
179
|
+
f"{scope._db_operation} {collection_name} "
|
180
|
+
f"ids={query} "
|
181
|
+
f"filter={scope._kwargs.get('where', 'None')} "
|
182
|
+
f"delete_all={scope._kwargs.get('delete_all', False)}")
|
183
|
+
|
184
|
+
elif scope._db_operation == SemanticConvention.DB_OPERATION_PEEK:
|
185
|
+
collection_name = getattr(instance, "name", "unknown")
|
186
|
+
query = f"PEEK limit={scope._kwargs.get('limit', '')}"
|
187
|
+
|
188
|
+
# Standard database attributes
|
189
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_TEXT, query)
|
190
|
+
scope._span.set_attribute(SemanticConvention.DB_COLLECTION_NAME, collection_name)
|
191
|
+
scope._span.set_attribute(SemanticConvention.DB_VECTOR_COUNT, scope._kwargs.get("limit", ""))
|
192
|
+
|
193
|
+
# Vector database specific attributes (extensions)
|
194
|
+
scope._span.set_attribute(SemanticConvention.DB_QUERY_SUMMARY,
|
195
|
+
f"{scope._db_operation} {collection_name} "
|
196
|
+
f"limit={scope._kwargs.get('limit', 'None')}")
|
197
|
+
|
198
|
+
scope._span.set_status(Status(StatusCode.OK))
|
199
|
+
|
200
|
+
# Record metrics using helper
|
201
|
+
if not disable_metrics:
|
202
|
+
record_db_metrics(metrics, SemanticConvention.DB_SYSTEM_CHROMA, scope._server_address, scope._server_port,
|
203
|
+
environment, application_name, scope._start_time, scope._end_time)
|
204
|
+
|
205
|
+
def process_vectordb_response(response, db_operation, server_address, server_port,
|
206
|
+
environment, application_name, metrics, start_time, span,
|
207
|
+
capture_message_content=False, disable_metrics=False,
|
208
|
+
version="1.0.0", instance=None, args=None, endpoint=None, **kwargs):
|
209
|
+
"""
|
210
|
+
Process vector database response and generate telemetry following OpenTelemetry conventions.
|
211
|
+
"""
|
212
|
+
|
213
|
+
scope = type("GenericScope", (), {})()
|
214
|
+
|
215
|
+
scope._start_time = start_time
|
216
|
+
scope._span = span
|
217
|
+
scope._kwargs = kwargs
|
218
|
+
scope._args = args or []
|
219
|
+
scope._db_operation = db_operation
|
220
|
+
scope._response = response
|
221
|
+
scope._server_address = server_address
|
222
|
+
scope._server_port = server_port
|
223
|
+
|
224
|
+
common_vectordb_logic(scope, environment, application_name,
|
225
|
+
metrics, capture_message_content, disable_metrics, version, instance, endpoint)
|
226
|
+
|
227
|
+
return response
|