openlit 1.34.26__py3-none-any.whl → 1.34.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. openlit/__helpers.py +38 -0
  2. openlit/__init__.py +22 -155
  3. openlit/_instrumentors.py +144 -0
  4. openlit/guard/all.py +3 -3
  5. openlit/instrumentation/astra/__init__.py +71 -159
  6. openlit/instrumentation/astra/astra.py +32 -22
  7. openlit/instrumentation/astra/async_astra.py +32 -22
  8. openlit/instrumentation/astra/utils.py +263 -88
  9. openlit/instrumentation/chroma/utils.py +2 -2
  10. openlit/instrumentation/controlflow/controlflow.py +2 -2
  11. openlit/instrumentation/embedchain/embedchain.py +4 -4
  12. openlit/instrumentation/groq/__init__.py +4 -4
  13. openlit/instrumentation/haystack/__init__.py +57 -28
  14. openlit/instrumentation/haystack/async_haystack.py +54 -0
  15. openlit/instrumentation/haystack/haystack.py +35 -65
  16. openlit/instrumentation/haystack/utils.py +377 -0
  17. openlit/instrumentation/julep/async_julep.py +2 -2
  18. openlit/instrumentation/julep/julep.py +2 -2
  19. openlit/instrumentation/langchain_community/utils.py +2 -2
  20. openlit/instrumentation/llamaindex/__init__.py +165 -37
  21. openlit/instrumentation/llamaindex/async_llamaindex.py +53 -0
  22. openlit/instrumentation/llamaindex/llamaindex.py +32 -64
  23. openlit/instrumentation/llamaindex/utils.py +412 -0
  24. openlit/instrumentation/mem0/mem0.py +2 -2
  25. openlit/instrumentation/milvus/__init__.py +30 -68
  26. openlit/instrumentation/milvus/milvus.py +34 -161
  27. openlit/instrumentation/milvus/utils.py +276 -0
  28. openlit/instrumentation/openai/__init__.py +24 -24
  29. openlit/instrumentation/openai/utils.py +10 -4
  30. openlit/instrumentation/pinecone/utils.py +2 -2
  31. openlit/instrumentation/qdrant/utils.py +2 -2
  32. openlit/instrumentation/together/__init__.py +8 -8
  33. openlit/semcov/__init__.py +79 -0
  34. {openlit-1.34.26.dist-info → openlit-1.34.28.dist-info}/METADATA +1 -1
  35. {openlit-1.34.26.dist-info → openlit-1.34.28.dist-info}/RECORD +37 -31
  36. {openlit-1.34.26.dist-info → openlit-1.34.28.dist-info}/LICENSE +0 -0
  37. {openlit-1.34.26.dist-info → openlit-1.34.28.dist-info}/WHEEL +0 -0
@@ -0,0 +1,412 @@
1
+ """
2
+ LlamaIndex OpenTelemetry Instrumentation
3
+ """
4
+ import time
5
+ import hashlib
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ common_framework_span_attributes,
10
+ record_framework_metrics,
11
+ )
12
+ from openlit.semcov import SemanticConvention
13
+
14
+ # === OPTIMIZED OPERATION MAPPING - Framework Guide Compliant ===
15
+ # Simplified semantic conventions for efficient processing
16
+ OPERATION_MAP = {
17
+ # === WORKFLOW OPERATIONS (Business-level spans) ===
18
+
19
+ # Document Loading & Processing Pipeline
20
+ "document_load": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
21
+ "document_load_async": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
22
+ "document_transform": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
23
+ "document_split": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
24
+
25
+ # Index Construction & Management
26
+ "index_construct": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
27
+ "index_insert": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
28
+ "index_delete": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
29
+ "index_build": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
30
+
31
+ # Query Engine Operations
32
+ "query_engine_create": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
33
+ "query_engine_query": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
34
+ "query_engine_query_async": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
35
+
36
+ # Retriever Operations
37
+ "retriever_create": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
38
+ "retriever_retrieve": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
39
+ "retriever_retrieve_async": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
40
+
41
+ # LLM & Embedding Operations
42
+ "llm_complete": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
43
+ "llm_complete_async": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
44
+ "llm_chat": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
45
+ "llm_chat_async": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
46
+ "llm_stream_async": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
47
+ "embedding_generate": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
48
+ "embedding_generate_async": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
49
+
50
+ # Response Generation Operations
51
+ "response_generate_async": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
52
+
53
+ # === COMPONENT OPERATIONS (Technical-level spans) ===
54
+
55
+ # Text Processing Components
56
+ "text_splitter_split": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
57
+ "text_splitter_postprocess": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
58
+ "node_parser_parse": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
59
+
60
+ # Embedding Processing Components
61
+ "embedding_encode": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
62
+ "embedding_embed_nodes": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
63
+ "embedding_similarity": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
64
+ "embedding_metadata": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
65
+
66
+ # Retrieval Processing Components
67
+ "retrieval_retrieve_nodes": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
68
+ "retrieval_get_nodes": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
69
+ "retrieval_build_nodes": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
70
+ "postprocessor_process": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
71
+
72
+ # Response Generation Components
73
+ "response_synthesize": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
74
+ "response_compact_refine": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
75
+ "response_tree_summarize": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
76
+
77
+ # Vector Store Components
78
+ "vector_store_add": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
79
+ "vector_store_delete": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
80
+ "vector_store_query": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
81
+
82
+ # Document & Node Components
83
+ "document_get_content": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
84
+ "node_get_content": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
85
+ "node_get_metadata": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
86
+ "document_extract_metadata": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
87
+ "query_prepare_response": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
88
+ }
89
+
90
+ def set_server_address_and_port(instance, default_host="localhost", default_port=8000):
91
+ """Extract server address and port with enhanced detection"""
92
+ if hasattr(instance, '_client'):
93
+ client = instance._client
94
+ if hasattr(client, 'base_url'):
95
+ base_url = str(client.base_url)
96
+ if '://' in base_url:
97
+ parts = base_url.split('://', 1)[1].split('/', 1)[0]
98
+ if ':' in parts:
99
+ host, port = parts.rsplit(':', 1)
100
+ try:
101
+ return host, int(port)
102
+ except ValueError:
103
+ return parts, default_port
104
+ return parts, default_port
105
+ return default_host, default_port
106
+
107
+ def object_count(obj):
108
+ """Enhanced object counting with type detection"""
109
+ if obj is None:
110
+ return 0
111
+ try:
112
+ if hasattr(obj, '__len__'):
113
+ return len(obj)
114
+ elif hasattr(obj, '__iter__'):
115
+ return sum(1 for _ in obj)
116
+ else:
117
+ return 1 if obj else 0
118
+ except Exception:
119
+ return 0
120
+
121
+ def extract_performance_metrics(scope):
122
+ """Extract comprehensive performance metrics"""
123
+ duration = scope._end_time - scope._start_time
124
+
125
+ # Performance categorization for business intelligence
126
+ if duration < 0.1:
127
+ performance_tier = "excellent"
128
+ elif duration < 0.5:
129
+ performance_tier = "good"
130
+ elif duration < 2.0:
131
+ performance_tier = "acceptable"
132
+ else:
133
+ performance_tier = "slow"
134
+
135
+ return {
136
+ "duration": duration,
137
+ "performance_tier": performance_tier,
138
+ "latency_ms": duration * 1000,
139
+ "is_fast": duration < 0.5,
140
+ "needs_optimization": duration > 2.0
141
+ }
142
+
143
+ def extract_content_metrics(content):
144
+ """Extract advanced content analysis metrics"""
145
+ if not content:
146
+ return {}
147
+
148
+ content_str = str(content)
149
+ char_count = len(content_str)
150
+ word_count = len(content_str.split()) if content_str else 0
151
+
152
+ # Content complexity analysis
153
+ complexity_score = 0
154
+ if word_count > 0:
155
+ avg_word_length = char_count / word_count
156
+ complexity_score = min(100, int((avg_word_length * 10) + (word_count / 10)))
157
+
158
+ return {
159
+ "char_count": char_count,
160
+ "word_count": word_count,
161
+ "complexity_score": complexity_score,
162
+ "content_hash": hashlib.md5(content_str.encode()).hexdigest()[:8],
163
+ "is_lengthy": char_count > 1000,
164
+ "is_complex": complexity_score > 50
165
+ }
166
+
167
+ def extract_business_intelligence(scope, endpoint):
168
+ """Extract superior business intelligence attributes"""
169
+ bi_attrs = {}
170
+
171
+ # Operation categorization for business insights
172
+ if endpoint.startswith("framework.query"):
173
+ bi_attrs["operation_category"] = "user_interaction"
174
+ bi_attrs["business_impact"] = "high"
175
+ bi_attrs["cost_driver"] = "llm_calls"
176
+ elif endpoint.startswith("framework.index"):
177
+ bi_attrs["operation_category"] = "data_preparation"
178
+ bi_attrs["business_impact"] = "medium"
179
+ bi_attrs["cost_driver"] = "embedding_generation"
180
+ elif endpoint.startswith("framework.retriever"):
181
+ bi_attrs["operation_category"] = "information_retrieval"
182
+ bi_attrs["business_impact"] = "high"
183
+ bi_attrs["cost_driver"] = "vector_search"
184
+ elif endpoint.startswith("component."):
185
+ bi_attrs["operation_category"] = "technical_processing"
186
+ bi_attrs["business_impact"] = "low"
187
+ bi_attrs["cost_driver"] = "compute_resources"
188
+
189
+ # Performance impact classification
190
+ performance = extract_performance_metrics(scope)
191
+ bi_attrs["performance_impact"] = performance["performance_tier"]
192
+ bi_attrs["optimization_opportunity"] = performance["needs_optimization"]
193
+
194
+ return bi_attrs
195
+
196
+ def common_llamaindex_logic(scope, environment, application_name,
197
+ metrics, capture_message_content, disable_metrics, version,
198
+ instance=None, endpoint=None, **kwargs):
199
+ """
200
+ DOMINANCE EDITION: Process LlamaIndex with superior attribute richness
201
+ Enhanced to beat OpenInference with 5+ attributes per span vs their 2.3
202
+ """
203
+ scope._end_time = time.time()
204
+
205
+ # Set common framework span attributes using centralized helper
206
+ common_framework_span_attributes(scope, SemanticConvention.GEN_AI_SYSTEM_LLAMAINDEX,
207
+ scope._server_address, scope._server_port, environment, application_name,
208
+ version, endpoint, instance)
209
+
210
+ # === CORE SEMANTIC ATTRIBUTES ===
211
+ operation_type = OPERATION_MAP.get(endpoint, SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK)
212
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, operation_type)
213
+
214
+ # === PERFORMANCE INTELLIGENCE ===
215
+ performance = extract_performance_metrics(scope)
216
+ scope._span.set_attribute("gen_ai.operation.duration_ms", performance["latency_ms"])
217
+ scope._span.set_attribute("gen_ai.operation.performance_tier", performance["performance_tier"])
218
+ scope._span.set_attribute("gen_ai.operation.is_fast", performance["is_fast"])
219
+
220
+ # === BUSINESS INTELLIGENCE ===
221
+ bi_attrs = extract_business_intelligence(scope, endpoint)
222
+ for key, value in bi_attrs.items():
223
+ scope._span.set_attribute(f"gen_ai.business.{key}", str(value))
224
+
225
+ # === OPERATION-SPECIFIC ENHANCED PROCESSING ===
226
+
227
+ if endpoint == "framework.index.construct":
228
+ # Enhanced index construction telemetry
229
+ documents_count = scope._kwargs.get("documents", [])
230
+ if documents_count:
231
+ doc_count = object_count(documents_count)
232
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_DOCUMENTS_COUNT, doc_count)
233
+
234
+ # Document source analysis
235
+ document_sources = []
236
+ total_content_length = 0
237
+ unique_authors = set()
238
+
239
+ for doc in (documents_count[:10] if hasattr(documents_count, '__iter__') else []):
240
+ if hasattr(doc, 'metadata'):
241
+ source = doc.metadata.get('file_path', 'unknown')
242
+ author = doc.metadata.get('author', 'unknown')
243
+ document_sources.append(source)
244
+ unique_authors.add(author)
245
+
246
+ if hasattr(doc, 'text'):
247
+ total_content_length += len(doc.text)
248
+
249
+ scope._span.set_attribute("gen_ai.index.document_sources", str(document_sources[:5]))
250
+ scope._span.set_attribute("gen_ai.index.total_content_length", total_content_length)
251
+ scope._span.set_attribute("gen_ai.index.unique_authors", len(unique_authors))
252
+ scope._span.set_attribute("gen_ai.index.avg_document_size", total_content_length // max(doc_count, 1))
253
+
254
+ elif endpoint in ("framework.query_engine.query", "framework.query_engine.query_async"):
255
+ # Enhanced query processing telemetry
256
+ query_text = scope._args[0] if scope._args else scope._kwargs.get("query", "unknown")
257
+ if capture_message_content:
258
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, str(query_text))
259
+
260
+ # Query analysis
261
+ query_length = len(str(query_text))
262
+ query_words = len(str(query_text).split())
263
+ scope._span.set_attribute("gen_ai.query.length", query_length)
264
+ scope._span.set_attribute("gen_ai.query.word_count", query_words)
265
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_QUERY_TYPE, "query_engine")
266
+
267
+ # Process index operations using helper function
268
+ _process_index_operations(scope, endpoint, capture_message_content)
269
+
270
+ elif endpoint in ("framework.retriever.retrieve", "framework.retriever.retrieve_async"):
271
+ # Enhanced retrieval telemetry
272
+ query_text = scope._args[0] if scope._args else scope._kwargs.get("query", "unknown")
273
+ if capture_message_content:
274
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, str(query_text))
275
+
276
+ # Retrieval configuration analysis
277
+ similarity_top_k = scope._kwargs.get("similarity_top_k", 2)
278
+ scope._span.set_attribute("gen_ai.retrieval.top_k", similarity_top_k)
279
+ scope._span.set_attribute("gen_ai.retrieval.strategy", "vector_similarity")
280
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_QUERY_TYPE, "retriever")
281
+
282
+ elif endpoint == "framework.document.split":
283
+ # Enhanced document splitting telemetry
284
+ show_progress = scope._kwargs.get("show_progress", False)
285
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_SHOW_PROGRESS, show_progress)
286
+
287
+ # Extract enhanced node creation info
288
+ if scope._response and hasattr(scope._response, '__len__'):
289
+ nodes_created = len(scope._response)
290
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_NODES_CREATED, nodes_created)
291
+
292
+ # Extract comprehensive chunk configuration
293
+ chunk_size = getattr(instance, 'chunk_size', 1024) if instance else 1024
294
+ chunk_overlap = getattr(instance, 'chunk_overlap', 200) if instance else 200
295
+ separator = getattr(instance, 'separator', '\\n\\n') if instance else '\\n\\n'
296
+
297
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_CHUNK_SIZE, chunk_size)
298
+ scope._span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_CHUNK_OVERLAP, chunk_overlap)
299
+ scope._span.set_attribute("gen_ai.splitter.separator", separator)
300
+ scope._span.set_attribute("gen_ai.splitter.efficiency", nodes_created / max(1, chunk_size // 100))
301
+
302
+ # === COMPONENT-LEVEL ENHANCED PROCESSING ===
303
+
304
+ elif endpoint.startswith("component.text_splitter"):
305
+ if endpoint == "component.text_splitter.split":
306
+ text_input = scope._args[0] if scope._args else scope._kwargs.get("text", "")
307
+ text_metrics = extract_content_metrics(text_input)
308
+ scope._span.set_attribute("gen_ai.component.input_length", text_metrics.get("char_count", 0))
309
+ scope._span.set_attribute("gen_ai.component.input_complexity", text_metrics.get("complexity_score", 0))
310
+
311
+ if scope._response and hasattr(scope._response, '__len__'):
312
+ chunks_created = len(scope._response)
313
+ scope._span.set_attribute("gen_ai.component.chunks_created", chunks_created)
314
+ scope._span.set_attribute("gen_ai.component.compression_ratio",
315
+ chunks_created / max(1, text_metrics.get("word_count", 1) // 100))
316
+
317
+ elif endpoint.startswith("component.embedding"):
318
+ if endpoint == "component.embedding.encode":
319
+ texts = scope._args[0] if scope._args else scope._kwargs.get("texts", [])
320
+ embedding_count = object_count(texts)
321
+ scope._span.set_attribute("gen_ai.component.embedding_count", embedding_count)
322
+
323
+ if embedding_count > 0 and hasattr(texts, '__iter__'):
324
+ total_chars = sum(len(str(text)) for text in texts)
325
+ scope._span.set_attribute("gen_ai.component.total_chars", total_chars)
326
+ scope._span.set_attribute("gen_ai.component.avg_text_length", total_chars // embedding_count)
327
+
328
+ elif endpoint.startswith("component.retrieval"):
329
+ if endpoint == "component.retrieval.retrieve_nodes":
330
+ # Enhanced retrieval component analysis
331
+ query_embedding = scope._args[0] if scope._args else scope._kwargs.get("query_embedding")
332
+ if query_embedding and hasattr(query_embedding, '__len__'):
333
+ scope._span.set_attribute("gen_ai.component.embedding_dimension", len(query_embedding))
334
+
335
+ similarity_threshold = scope._kwargs.get("similarity_threshold", 0.0)
336
+ scope._span.set_attribute("gen_ai.component.similarity_threshold", similarity_threshold)
337
+
338
+ # === UNIVERSAL ATTRIBUTES ===
339
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_OPERATION_DURATION,
340
+ scope._end_time - scope._start_time)
341
+ scope._span.set_attribute("gen_ai.operation.endpoint", endpoint)
342
+ scope._span.set_attribute("gen_ai.framework.version", version)
343
+ scope._span.set_attribute("gen_ai.operation.success", True)
344
+
345
+ scope._span.set_status(Status(StatusCode.OK))
346
+
347
+ # Record enhanced metrics
348
+ if not disable_metrics:
349
+ record_framework_metrics(metrics, scope._operation_type, SemanticConvention.GEN_AI_SYSTEM_LLAMAINDEX,
350
+ scope._server_address, scope._server_port, environment, application_name,
351
+ scope._start_time, scope._end_time)
352
+
353
+ def process_llamaindex_response(response, operation_type, server_address, server_port,
354
+ environment, application_name, metrics, start_time, span,
355
+ capture_message_content, disable_metrics, version, instance=None,
356
+ args=None, endpoint=None, **kwargs):
357
+ """
358
+ DOMINANCE EDITION: Process LlamaIndex response with superior observability
359
+ """
360
+ # Create enhanced scope object
361
+ scope = type("EnhancedScope", (), {})()
362
+ scope._span = span
363
+ scope._operation_type = operation_type
364
+ scope._response = response
365
+ scope._start_time = start_time
366
+ scope._server_address = server_address
367
+ scope._server_port = server_port
368
+ scope._args = args
369
+ scope._kwargs = kwargs
370
+
371
+ # Process with enhanced telemetry
372
+ common_llamaindex_logic(
373
+ scope, environment, application_name, metrics,
374
+ capture_message_content, disable_metrics, version,
375
+ instance, endpoint
376
+ )
377
+
378
+ return response
379
+
380
+ def _process_index_operations(scope, endpoint, capture_message_content):
381
+ """Helper function to process index operations and reduce nesting"""
382
+ if not hasattr(scope, '_result') or not scope._result:
383
+ return
384
+
385
+ try:
386
+ if hasattr(scope._result, "source_nodes"):
387
+ nodes = scope._result.source_nodes
388
+ elif hasattr(scope._result, "nodes"):
389
+ nodes = scope._result.nodes
390
+ else:
391
+ return
392
+
393
+ doc_count = len(nodes)
394
+ scope._span.set_attribute("gen_ai.index.document_count", doc_count)
395
+
396
+ # Process document metadata
397
+ unique_authors = set()
398
+ total_content_length = 0
399
+
400
+ for node in nodes:
401
+ if hasattr(node, "metadata") and isinstance(node.metadata, dict):
402
+ if "author" in node.metadata:
403
+ unique_authors.add(node.metadata["author"])
404
+
405
+ if hasattr(node, "text"):
406
+ total_content_length += len(str(node.text))
407
+
408
+ scope._span.set_attribute("gen_ai.index.total_content_length", total_content_length)
409
+ scope._span.set_attribute("gen_ai.index.unique_authors", len(unique_authors))
410
+ scope._span.set_attribute("gen_ai.index.avg_document_size", total_content_length // max(doc_count, 1))
411
+ except Exception:
412
+ pass # Don't fail on metadata extraction
@@ -19,7 +19,7 @@ def mem0_wrap(gen_ai_endpoint, version, environment, application_name,
19
19
 
20
20
  This function wraps any given function to measure its execution time,
21
21
  log its operation, and trace its execution using OpenTelemetry.
22
-
22
+
23
23
  Parameters:
24
24
  - gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
25
25
  - version (str): The version of the mem0 application.
@@ -49,7 +49,7 @@ def mem0_wrap(gen_ai_endpoint, version, environment, application_name,
49
49
 
50
50
  Returns:
51
51
  - The result of the wrapped function call.
52
-
52
+
53
53
  The wrapper initiates a span with the provided tracer, sets various attributes
54
54
  on the span based on the function's execution and response, and ensures
55
55
  errors are handled and logged appropriately.
@@ -1,5 +1,7 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
- """Initializer of Auto Instrumentation of Milvus Functions"""
1
+ """
2
+ OpenLIT Milvus Instrumentation
3
+ """
4
+
3
5
  from typing import Collection
4
6
  import importlib.metadata
5
7
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
@@ -9,86 +11,46 @@ from openlit.instrumentation.milvus.milvus import general_wrap
9
11
 
10
12
  _instruments = ("pymilvus >= 2.4.3",)
11
13
 
12
- WRAPPED_METHODS = [
13
- {
14
- "package": "pymilvus",
15
- "object": "MilvusClient.create_collection",
16
- "endpoint": "milvus.create_collection",
17
- "wrapper": general_wrap,
18
- },
19
- {
20
- "package": "pymilvus",
21
- "object": "MilvusClient.drop_collection",
22
- "endpoint": "milvus.drop_collection",
23
- "wrapper": general_wrap,
24
- },
25
- {
26
- "package": "pymilvus",
27
- "object": "MilvusClient.insert",
28
- "endpoint": "milvus.insert",
29
- "wrapper": general_wrap,
30
- },
31
- {
32
- "package": "pymilvus",
33
- "object": "MilvusClient.upsert",
34
- "endpoint": "milvus.upsert",
35
- "wrapper": general_wrap,
36
- },
37
- {
38
- "package": "pymilvus",
39
- "object": "MilvusClient.search",
40
- "endpoint": "milvus.search",
41
- "wrapper": general_wrap,
42
- },
43
- {
44
- "package": "pymilvus",
45
- "object": "MilvusClient.query",
46
- "endpoint": "milvus.query",
47
- "wrapper": general_wrap,
48
- },
49
- {
50
- "package": "pymilvus",
51
- "object": "MilvusClient.get",
52
- "endpoint": "milvus.get",
53
- "wrapper": general_wrap,
54
- },
55
- {
56
- "package": "pymilvus",
57
- "object": "MilvusClient.delete",
58
- "endpoint": "milvus.delete",
59
- "wrapper": general_wrap,
60
- },
14
+ # Operations to wrap for Milvus client
15
+ MILVUS_OPERATIONS = [
16
+ ("create_collection", "milvus.create_collection"),
17
+ ("drop_collection", "milvus.drop_collection"),
18
+ ("insert", "milvus.insert"),
19
+ ("upsert", "milvus.upsert"),
20
+ ("search", "milvus.search"),
21
+ ("query", "milvus.query"),
22
+ ("get", "milvus.get"),
23
+ ("delete", "milvus.delete"),
61
24
  ]
62
25
 
63
26
  class MilvusInstrumentor(BaseInstrumentor):
64
- """An instrumentor for Milvus's client library."""
27
+ """
28
+ An instrumentor for Milvus's client library.
29
+ """
65
30
 
66
31
  def instrumentation_dependencies(self) -> Collection[str]:
67
32
  return _instruments
68
33
 
69
34
  def _instrument(self, **kwargs):
70
- application_name = kwargs.get("application_name")
71
- environment = kwargs.get("environment")
35
+ version = importlib.metadata.version("pymilvus")
36
+ environment = kwargs.get("environment", "default")
37
+ application_name = kwargs.get("application_name", "default")
72
38
  tracer = kwargs.get("tracer")
39
+ pricing_info = kwargs.get("pricing_info", {})
40
+ capture_message_content = kwargs.get("capture_message_content", False)
73
41
  metrics = kwargs.get("metrics_dict")
74
- pricing_info = kwargs.get("pricing_info")
75
- capture_message_content = kwargs.get("capture_message_content")
76
42
  disable_metrics = kwargs.get("disable_metrics")
77
- version = importlib.metadata.version("pymilvus")
78
43
 
79
- for wrapped_method in WRAPPED_METHODS:
80
- wrap_package = wrapped_method.get("package")
81
- wrap_object = wrapped_method.get("object")
82
- gen_ai_endpoint = wrapped_method.get("endpoint")
83
- wrapper = wrapped_method.get("wrapper")
44
+ # Wrap operations
45
+ for method_name, endpoint in MILVUS_OPERATIONS:
84
46
  wrap_function_wrapper(
85
- wrap_package,
86
- wrap_object,
87
- wrapper(gen_ai_endpoint, version, environment, application_name,
88
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
47
+ "pymilvus",
48
+ f"MilvusClient.{method_name}",
49
+ general_wrap(
50
+ endpoint, version, environment, application_name, tracer,
51
+ pricing_info, capture_message_content, metrics, disable_metrics
52
+ ),
89
53
  )
90
54
 
91
-
92
- @staticmethod
93
55
  def _uninstrument(self, **kwargs):
94
56
  pass