netra-sdk 0.1.30__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of netra-sdk might be problematic. Click here for more details.

@@ -93,6 +93,10 @@ def init_instrumentations(
93
93
  if CustomInstruments.MISTRALAI in netra_custom_instruments:
94
94
  init_mistral_instrumentor()
95
95
 
96
+ # Initialize LiteLLM instrumentation.
97
+ if CustomInstruments.LITELLM in netra_custom_instruments:
98
+ init_litellm_instrumentation()
99
+
96
100
  # Initialize OpenAI instrumentation.
97
101
  if CustomInstruments.OPENAI in netra_custom_instruments:
98
102
  init_openai_instrumentation()
@@ -435,6 +439,26 @@ def init_mistral_instrumentor() -> bool:
435
439
  return False
436
440
 
437
441
 
442
+ def init_litellm_instrumentation() -> bool:
443
+ """Initialize LiteLLM instrumentation.
444
+
445
+ Returns:
446
+ bool: True if initialization was successful, False otherwise.
447
+ """
448
+ try:
449
+ if is_package_installed("litellm"):
450
+ from netra.instrumentation.litellm import LiteLLMInstrumentor
451
+
452
+ instrumentor = LiteLLMInstrumentor()
453
+ if not instrumentor.is_instrumented_by_opentelemetry:
454
+ instrumentor.instrument()
455
+ return True
456
+ except Exception as e:
457
+ logging.error(f"Error initializing LiteLLM instrumentor: {e}")
458
+ Telemetry().log_exception(e)
459
+ return False
460
+
461
+
438
462
  def init_openai_instrumentation() -> bool:
439
463
  """Initialize OpenAI instrumentation.
440
464
 
@@ -8,6 +8,7 @@ class CustomInstruments(Enum):
8
8
  AIOHTTP = "aiohttp"
9
9
  COHEREAI = "cohere_ai"
10
10
  HTTPX = "httpx"
11
+ LITELLM = "litellm"
11
12
  MISTRALAI = "mistral_ai"
12
13
  OPENAI = "openai"
13
14
  PYDANTIC_AI = "pydantic_ai"
@@ -127,6 +128,7 @@ class InstrumentSet(Enum):
127
128
  KAFKA_PYTHON = "kafka_python"
128
129
  LANCEDB = "lancedb"
129
130
  LANGCHAIN = "langchain"
131
+ LITELLM = "litellm"
130
132
  LLAMA_INDEX = "llama_index"
131
133
  LOGGING = "logging"
132
134
  MARQO = "marqo"
@@ -0,0 +1,161 @@
1
+ import logging
2
+ import time
3
+ from typing import Any, Collection, Dict, Optional
4
+
5
+ from opentelemetry import context as context_api
6
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
8
+ from opentelemetry.trace import SpanKind, Tracer, get_tracer
9
+ from opentelemetry.trace.status import Status, StatusCode
10
+ from wrapt import wrap_function_wrapper
11
+
12
+ from netra.instrumentation.litellm.version import __version__
13
+ from netra.instrumentation.litellm.wrappers import (
14
+ acompletion_wrapper,
15
+ aembedding_wrapper,
16
+ aimage_generation_wrapper,
17
+ completion_wrapper,
18
+ embedding_wrapper,
19
+ image_generation_wrapper,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ _instruments = ("litellm >= 1.0.0",)
25
+
26
+
27
+ class LiteLLMInstrumentor(BaseInstrumentor): # type: ignore[misc]
28
+ """
29
+ Custom LiteLLM instrumentor for Netra SDK with enhanced support for:
30
+ - completion() and acompletion() methods
31
+ - embedding() and aembedding() methods
32
+ - image_generation() and aimage_generation() methods
33
+ - Proper streaming/non-streaming span handling
34
+ - Integration with Netra tracing
35
+ """
36
+
37
+ def instrumentation_dependencies(self) -> Collection[str]:
38
+ return _instruments
39
+
40
+ def _instrument(self, **kwargs): # type: ignore[no-untyped-def]
41
+ """Instrument LiteLLM methods"""
42
+ tracer_provider = kwargs.get("tracer_provider")
43
+ tracer = get_tracer(__name__, __version__, tracer_provider)
44
+
45
+ logger.debug("Starting LiteLLM instrumentation...")
46
+
47
+ # Force import litellm to ensure it's available for wrapping
48
+ try:
49
+ import litellm
50
+ except ImportError as e:
51
+ logger.error(f"Failed to import litellm: {e}")
52
+ return
53
+
54
+ # Store original functions for uninstrumentation
55
+ self._original_completion = getattr(litellm, "completion", None)
56
+ self._original_acompletion = getattr(litellm, "acompletion", None)
57
+ self._original_embedding = getattr(litellm, "embedding", None)
58
+ self._original_aembedding = getattr(litellm, "aembedding", None)
59
+ self._original_image_generation = getattr(litellm, "image_generation", None)
60
+ self._original_aimage_generation = getattr(litellm, "aimage_generation", None)
61
+
62
+ # Chat completions - use direct monkey patching with proper function wrapping
63
+ if self._original_completion:
64
+ try:
65
+
66
+ def instrumented_completion(*args, **kwargs): # type: ignore[no-untyped-def]
67
+ wrapper = completion_wrapper(tracer)
68
+ return wrapper(self._original_completion, None, args, kwargs)
69
+
70
+ litellm.completion = instrumented_completion
71
+ except Exception as e:
72
+ logger.error(f"Failed to monkey-patch litellm.completion: {e}")
73
+
74
+ if self._original_acompletion:
75
+ try:
76
+
77
+ async def instrumented_acompletion(*args, **kwargs): # type: ignore[no-untyped-def]
78
+ wrapper = acompletion_wrapper(tracer)
79
+ return await wrapper(self._original_acompletion, None, args, kwargs)
80
+
81
+ litellm.acompletion = instrumented_acompletion
82
+ except Exception as e:
83
+ logger.error(f"Failed to monkey-patch litellm.acompletion: {e}")
84
+
85
+ # Embeddings
86
+ if self._original_embedding:
87
+ try:
88
+
89
+ def instrumented_embedding(*args, **kwargs): # type: ignore[no-untyped-def]
90
+ wrapper = embedding_wrapper(tracer)
91
+ return wrapper(self._original_embedding, None, args, kwargs)
92
+
93
+ litellm.embedding = instrumented_embedding
94
+ except Exception as e:
95
+ logger.error(f"Failed to monkey-patch litellm.embedding: {e}")
96
+
97
+ if self._original_aembedding:
98
+ try:
99
+
100
+ async def instrumented_aembedding(*args, **kwargs): # type: ignore[no-untyped-def]
101
+ wrapper = aembedding_wrapper(tracer)
102
+ return await wrapper(self._original_aembedding, None, args, kwargs)
103
+
104
+ litellm.aembedding = instrumented_aembedding
105
+ except Exception as e:
106
+ logger.error(f"Failed to monkey-patch litellm.aembedding: {e}")
107
+
108
+ # Image generation
109
+ if self._original_image_generation:
110
+ try:
111
+
112
+ def instrumented_image_generation(*args, **kwargs): # type: ignore[no-untyped-def]
113
+ wrapper = image_generation_wrapper(tracer)
114
+ return wrapper(self._original_image_generation, None, args, kwargs)
115
+
116
+ litellm.image_generation = instrumented_image_generation
117
+ except Exception as e:
118
+ logger.error(f"Failed to monkey-patch litellm.image_generation: {e}")
119
+
120
+ if self._original_aimage_generation:
121
+ try:
122
+
123
+ async def instrumented_aimage_generation(*args, **kwargs): # type: ignore[no-untyped-def]
124
+ wrapper = aimage_generation_wrapper(tracer)
125
+ return await wrapper(self._original_aimage_generation, None, args, kwargs)
126
+
127
+ litellm.aimage_generation = instrumented_aimage_generation
128
+ except Exception as e:
129
+ logger.error(f"Failed to monkey-patch litellm.aimage_generation: {e}")
130
+
131
+ def _uninstrument(self, **kwargs): # type: ignore[no-untyped-def]
132
+ """Uninstrument LiteLLM methods"""
133
+ try:
134
+ import litellm
135
+
136
+ # Restore original functions
137
+ if hasattr(self, "_original_completion") and self._original_completion:
138
+ litellm.completion = self._original_completion
139
+
140
+ if hasattr(self, "_original_acompletion") and self._original_acompletion:
141
+ litellm.acompletion = self._original_acompletion
142
+
143
+ if hasattr(self, "_original_embedding") and self._original_embedding:
144
+ litellm.embedding = self._original_embedding
145
+
146
+ if hasattr(self, "_original_aembedding") and self._original_aembedding:
147
+ litellm.aembedding = self._original_aembedding
148
+
149
+ if hasattr(self, "_original_image_generation") and self._original_image_generation:
150
+ litellm.image_generation = self._original_image_generation
151
+
152
+ if hasattr(self, "_original_aimage_generation") and self._original_aimage_generation:
153
+ litellm.aimage_generation = self._original_aimage_generation
154
+
155
+ except ImportError:
156
+ pass
157
+
158
+
159
+ def should_suppress_instrumentation() -> bool:
160
+ """Check if instrumentation should be suppressed"""
161
+ return context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) is True
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,557 @@
1
+ import logging
2
+ import time
3
+ from collections.abc import Awaitable
4
+ from typing import Any, AsyncIterator, Callable, Dict, Iterator, Tuple
5
+
6
+ from opentelemetry import context as context_api
7
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
8
+ from opentelemetry.semconv_ai import (
9
+ SpanAttributes,
10
+ )
11
+ from opentelemetry.trace import Span, SpanKind, Tracer
12
+ from opentelemetry.trace.status import Status, StatusCode
13
+ from wrapt import ObjectProxy
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ COMPLETION_SPAN_NAME = "litellm.completion"
18
+ EMBEDDING_SPAN_NAME = "litellm.embedding"
19
+ IMAGE_GENERATION_SPAN_NAME = "litellm.image_generation"
20
+
21
+
22
+ def should_suppress_instrumentation() -> bool:
23
+ """Check if instrumentation should be suppressed"""
24
+ return context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) is True
25
+
26
+
27
+ def is_streaming_response(response: Any) -> bool:
28
+ """Check if response is a streaming response"""
29
+ return hasattr(response, "__iter__") and not isinstance(response, (str, bytes, dict))
30
+
31
+
32
+ def model_as_dict(obj: Any) -> Dict[str, Any]:
33
+ """Convert LiteLLM model object to dictionary"""
34
+ if hasattr(obj, "model_dump"):
35
+ result = obj.model_dump()
36
+ return result if isinstance(result, dict) else {}
37
+ elif hasattr(obj, "to_dict"):
38
+ result = obj.to_dict()
39
+ return result if isinstance(result, dict) else {}
40
+ elif isinstance(obj, dict):
41
+ return obj
42
+ else:
43
+ return {}
44
+
45
+
46
+ def set_request_attributes(span: Span, kwargs: Dict[str, Any], operation_type: str) -> None:
47
+ """Set request attributes on span"""
48
+ if not span.is_recording():
49
+ return
50
+
51
+ # Set operation type
52
+ span.set_attribute(f"{SpanAttributes.LLM_REQUEST_TYPE}", operation_type)
53
+ span.set_attribute(f"{SpanAttributes.LLM_SYSTEM}", "LiteLLM")
54
+
55
+ # Common attributes
56
+ if kwargs.get("model"):
57
+ span.set_attribute(f"{SpanAttributes.LLM_REQUEST_MODEL}", kwargs["model"])
58
+
59
+ if kwargs.get("temperature") is not None:
60
+ span.set_attribute(f"{SpanAttributes.LLM_REQUEST_TEMPERATURE}", kwargs["temperature"])
61
+
62
+ if kwargs.get("max_tokens") is not None:
63
+ span.set_attribute(f"{SpanAttributes.LLM_REQUEST_MAX_TOKENS}", kwargs["max_tokens"])
64
+
65
+ if kwargs.get("stream") is not None:
66
+ span.set_attribute("gen_ai.stream", kwargs["stream"])
67
+
68
+ # Chat completion specific attributes
69
+ if operation_type == "chat" and kwargs.get("messages"):
70
+ messages = kwargs["messages"]
71
+ if isinstance(messages, list) and len(messages) > 0:
72
+ for index, message in enumerate(messages):
73
+ if isinstance(message, dict):
74
+ span.set_attribute(f"{SpanAttributes.LLM_PROMPTS}.{index}.role", message.get("role", "user"))
75
+ span.set_attribute(f"{SpanAttributes.LLM_PROMPTS}.{index}.content", str(message.get("content", "")))
76
+
77
+ # Embedding specific attributes
78
+ if operation_type == "embedding" and kwargs.get("input"):
79
+ input_data = kwargs["input"]
80
+ if isinstance(input_data, str):
81
+ span.set_attribute(f"{SpanAttributes.LLM_PROMPTS}.0.content", input_data)
82
+ elif isinstance(input_data, list):
83
+ for index, text in enumerate(input_data):
84
+ if isinstance(text, str):
85
+ span.set_attribute(f"{SpanAttributes.LLM_PROMPTS}.{index}.content", text)
86
+
87
+ # Image generation specific attributes
88
+ if operation_type == "image_generation":
89
+ if kwargs.get("prompt"):
90
+ span.set_attribute("gen_ai.prompt", kwargs["prompt"])
91
+ if kwargs.get("n"):
92
+ span.set_attribute("gen_ai.request.n", kwargs["n"])
93
+ if kwargs.get("size"):
94
+ span.set_attribute("gen_ai.request.size", kwargs["size"])
95
+ if kwargs.get("quality"):
96
+ span.set_attribute("gen_ai.request.quality", kwargs["quality"])
97
+ if kwargs.get("style"):
98
+ span.set_attribute("gen_ai.request.style", kwargs["style"])
99
+
100
+
101
+ def set_response_attributes(span: Span, response_dict: Dict[str, Any], operation_type: str) -> None:
102
+ """Set response attributes on span"""
103
+ if not span.is_recording():
104
+ return
105
+
106
+ if response_dict.get("model"):
107
+ span.set_attribute(f"{SpanAttributes.LLM_RESPONSE_MODEL}", response_dict["model"])
108
+
109
+ if response_dict.get("id"):
110
+ span.set_attribute("gen_ai.response.id", response_dict["id"])
111
+
112
+ # Usage information
113
+ usage = response_dict.get("usage", {})
114
+ if usage:
115
+ if usage.get("prompt_tokens"):
116
+ span.set_attribute(f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}", usage["prompt_tokens"])
117
+ if usage.get("completion_tokens"):
118
+ span.set_attribute(f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}", usage["completion_tokens"])
119
+ if usage.get("cache_read_input_tokens"):
120
+ span.set_attribute(f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}", usage["cache_read_input_tokens"])
121
+ if usage.get("cache_creation_input_tokens"):
122
+ span.set_attribute("gen_ai.usage.cache_creation_input_tokens", usage["cache_creation_input_tokens"])
123
+ if usage.get("total_tokens"):
124
+ span.set_attribute(f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}", usage["total_tokens"])
125
+
126
+ # Chat completion response content
127
+ if operation_type == "chat":
128
+ choices = response_dict.get("choices", [])
129
+ for index, choice in enumerate(choices):
130
+ if choice.get("message", {}).get("role"):
131
+ span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.{index}.role", choice["message"]["role"])
132
+ if choice.get("message", {}).get("content"):
133
+ span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.{index}.content", choice["message"]["content"])
134
+ if choice.get("finish_reason"):
135
+ span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.{index}.finish_reason", choice["finish_reason"])
136
+
137
+ # Embedding response content
138
+ elif operation_type == "embedding":
139
+ data = response_dict.get("data", [])
140
+ for index, embedding_data in enumerate(data):
141
+ if embedding_data.get("index") is not None:
142
+ span.set_attribute(f"gen_ai.response.embeddings.{index}.index", embedding_data["index"])
143
+ if embedding_data.get("embedding"):
144
+ # Don't log the actual embedding vector, just its dimensions
145
+ embedding_vector = embedding_data["embedding"]
146
+ if isinstance(embedding_vector, list):
147
+ span.set_attribute(f"gen_ai.response.embeddings.{index}.dimensions", len(embedding_vector))
148
+
149
+ # Image generation response content
150
+ elif operation_type == "image_generation":
151
+ data = response_dict.get("data", [])
152
+ for index, image_data in enumerate(data):
153
+ if image_data.get("url"):
154
+ span.set_attribute(f"gen_ai.response.images.{index}.url", image_data["url"])
155
+ if image_data.get("b64_json"):
156
+ span.set_attribute(f"gen_ai.response.images.{index}.has_b64_json", True)
157
+ if image_data.get("revised_prompt"):
158
+ span.set_attribute(f"gen_ai.response.images.{index}.revised_prompt", image_data["revised_prompt"])
159
+
160
+
161
+ def completion_wrapper(tracer: Tracer) -> Callable[..., Any]:
162
+ """Wrapper for LiteLLM completion function"""
163
+
164
+ def wrapper(wrapped: Callable[..., Any], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Any:
165
+ logger.debug(f"LiteLLM completion wrapper called with model: {kwargs.get('model')}")
166
+
167
+ if should_suppress_instrumentation():
168
+ logger.debug("LiteLLM instrumentation suppressed")
169
+ return wrapped(*args, **kwargs)
170
+
171
+ # Check if streaming
172
+ is_streaming = kwargs.get("stream", False)
173
+
174
+ if is_streaming:
175
+ # Use start_span for streaming - returns span directly
176
+ span = tracer.start_span(
177
+ COMPLETION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "chat"}
178
+ )
179
+
180
+ set_request_attributes(span, kwargs, "chat")
181
+
182
+ try:
183
+ start_time = time.time()
184
+ response = wrapped(*args, **kwargs)
185
+
186
+ return StreamingWrapper(span=span, response=response, start_time=start_time, request_kwargs=kwargs)
187
+ except Exception as e:
188
+ span.set_status(Status(StatusCode.ERROR, str(e)))
189
+ span.record_exception(e)
190
+ span.end()
191
+ raise
192
+ else:
193
+ # Use start_as_current_span for non-streaming - returns context manager
194
+ with tracer.start_as_current_span(
195
+ COMPLETION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "chat"}
196
+ ) as span:
197
+ set_request_attributes(span, kwargs, "chat")
198
+
199
+ try:
200
+ start_time = time.time()
201
+ response = wrapped(*args, **kwargs)
202
+ end_time = time.time()
203
+
204
+ response_dict = model_as_dict(response)
205
+ set_response_attributes(span, response_dict, "chat")
206
+
207
+ span.set_attribute("llm.response.duration", end_time - start_time)
208
+ span.set_status(Status(StatusCode.OK))
209
+
210
+ return response
211
+ except Exception as e:
212
+ span.set_status(Status(StatusCode.ERROR, str(e)))
213
+ raise
214
+
215
+ return wrapper
216
+
217
+
218
+ def acompletion_wrapper(tracer: Tracer) -> Callable[..., Awaitable[Any]]:
219
+ """Async wrapper for LiteLLM acompletion function"""
220
+
221
+ async def wrapper(
222
+ wrapped: Callable[..., Awaitable[Any]], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]
223
+ ) -> Any:
224
+ if should_suppress_instrumentation():
225
+ return await wrapped(*args, **kwargs)
226
+
227
+ # Check if streaming
228
+ is_streaming = kwargs.get("stream", False)
229
+
230
+ if is_streaming:
231
+ # Use start_span for streaming - returns span directly
232
+ span = tracer.start_span(
233
+ COMPLETION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "chat"}
234
+ )
235
+
236
+ set_request_attributes(span, kwargs, "chat")
237
+
238
+ try:
239
+ start_time = time.time()
240
+ response = await wrapped(*args, **kwargs)
241
+
242
+ return AsyncStreamingWrapper(span=span, response=response, start_time=start_time, request_kwargs=kwargs)
243
+ except Exception as e:
244
+ span.set_status(Status(StatusCode.ERROR, str(e)))
245
+ span.record_exception(e)
246
+ span.end()
247
+ raise
248
+ else:
249
+ # Use start_as_current_span for non-streaming - returns context manager
250
+ with tracer.start_as_current_span(
251
+ COMPLETION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "chat"}
252
+ ) as span:
253
+ set_request_attributes(span, kwargs, "chat")
254
+
255
+ try:
256
+ start_time = time.time()
257
+ response = await wrapped(*args, **kwargs)
258
+ end_time = time.time()
259
+
260
+ response_dict = model_as_dict(response)
261
+ set_response_attributes(span, response_dict, "chat")
262
+
263
+ span.set_attribute("llm.response.duration", end_time - start_time)
264
+ span.set_status(Status(StatusCode.OK))
265
+
266
+ return response
267
+ except Exception as e:
268
+ span.set_status(Status(StatusCode.ERROR, str(e)))
269
+ raise
270
+
271
+ return wrapper
272
+
273
+
274
+ def embedding_wrapper(tracer: Tracer) -> Callable[..., Any]:
275
+ """Wrapper for LiteLLM embedding function"""
276
+
277
+ def wrapper(wrapped: Callable[..., Any], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Any:
278
+ if should_suppress_instrumentation():
279
+ return wrapped(*args, **kwargs)
280
+
281
+ # Embeddings are never streaming, always use start_as_current_span
282
+ with tracer.start_as_current_span(
283
+ EMBEDDING_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "embedding"}
284
+ ) as span:
285
+ set_request_attributes(span, kwargs, "embedding")
286
+
287
+ try:
288
+ start_time = time.time()
289
+ response = wrapped(*args, **kwargs)
290
+ end_time = time.time()
291
+
292
+ response_dict = model_as_dict(response)
293
+ set_response_attributes(span, response_dict, "embedding")
294
+
295
+ span.set_attribute("llm.response.duration", end_time - start_time)
296
+ span.set_status(Status(StatusCode.OK))
297
+
298
+ return response
299
+ except Exception as e:
300
+ span.set_status(Status(StatusCode.ERROR, str(e)))
301
+ raise
302
+
303
+ return wrapper
304
+
305
+
306
+ def aembedding_wrapper(tracer: Tracer) -> Callable[..., Awaitable[Any]]:
307
+ """Async wrapper for LiteLLM aembedding function"""
308
+
309
+ async def wrapper(
310
+ wrapped: Callable[..., Awaitable[Any]], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]
311
+ ) -> Any:
312
+ if should_suppress_instrumentation():
313
+ return await wrapped(*args, **kwargs)
314
+
315
+ # Embeddings are never streaming, always use start_as_current_span
316
+ with tracer.start_as_current_span(
317
+ EMBEDDING_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "embedding"}
318
+ ) as span:
319
+ set_request_attributes(span, kwargs, "embedding")
320
+
321
+ try:
322
+ start_time = time.time()
323
+ response = await wrapped(*args, **kwargs)
324
+ end_time = time.time()
325
+
326
+ response_dict = model_as_dict(response)
327
+ set_response_attributes(span, response_dict, "embedding")
328
+
329
+ span.set_attribute("llm.response.duration", end_time - start_time)
330
+ span.set_status(Status(StatusCode.OK))
331
+
332
+ return response
333
+ except Exception as e:
334
+ span.set_status(Status(StatusCode.ERROR, str(e)))
335
+ raise
336
+
337
+ return wrapper
338
+
339
+
340
+ def image_generation_wrapper(tracer: Tracer) -> Callable[..., Any]:
341
+ """Wrapper for LiteLLM image_generation function"""
342
+
343
+ def wrapper(wrapped: Callable[..., Any], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Any:
344
+ if should_suppress_instrumentation():
345
+ return wrapped(*args, **kwargs)
346
+
347
+ # Image generation is never streaming, always use start_as_current_span
348
+ with tracer.start_as_current_span(
349
+ IMAGE_GENERATION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "image_generation"}
350
+ ) as span:
351
+ set_request_attributes(span, kwargs, "image_generation")
352
+
353
+ try:
354
+ start_time = time.time()
355
+ response = wrapped(*args, **kwargs)
356
+ end_time = time.time()
357
+
358
+ response_dict = model_as_dict(response)
359
+ set_response_attributes(span, response_dict, "image_generation")
360
+
361
+ span.set_attribute("llm.response.duration", end_time - start_time)
362
+ span.set_status(Status(StatusCode.OK))
363
+
364
+ return response
365
+ except Exception as e:
366
+ span.set_status(Status(StatusCode.ERROR, str(e)))
367
+ raise
368
+
369
+ return wrapper
370
+
371
+
372
+ def aimage_generation_wrapper(tracer: Tracer) -> Callable[..., Awaitable[Any]]:
373
+ """Async wrapper for LiteLLM aimage_generation function"""
374
+
375
+ async def wrapper(
376
+ wrapped: Callable[..., Awaitable[Any]], instance: Any, args: Tuple[Any, ...], kwargs: Dict[str, Any]
377
+ ) -> Any:
378
+ if should_suppress_instrumentation():
379
+ return await wrapped(*args, **kwargs)
380
+
381
+ # Image generation is never streaming, always use start_as_current_span
382
+ with tracer.start_as_current_span(
383
+ IMAGE_GENERATION_SPAN_NAME, kind=SpanKind.CLIENT, attributes={"llm.request.type": "image_generation"}
384
+ ) as span:
385
+ set_request_attributes(span, kwargs, "image_generation")
386
+
387
+ try:
388
+ start_time = time.time()
389
+ response = await wrapped(*args, **kwargs)
390
+ end_time = time.time()
391
+
392
+ response_dict = model_as_dict(response)
393
+ set_response_attributes(span, response_dict, "image_generation")
394
+
395
+ span.set_attribute("llm.response.duration", end_time - start_time)
396
+ span.set_status(Status(StatusCode.OK))
397
+
398
+ return response
399
+ except Exception as e:
400
+ span.set_status(Status(StatusCode.ERROR, str(e)))
401
+ raise
402
+
403
+ return wrapper
404
+
405
+
406
+ class StreamingWrapper(ObjectProxy): # type: ignore[misc]
407
+ """Wrapper for streaming responses"""
408
+
409
+ def __init__(self, span: Span, response: Iterator[Any], start_time: float, request_kwargs: Dict[str, Any]) -> None:
410
+ super().__init__(response)
411
+ self._span = span
412
+ self._start_time = start_time
413
+ self._request_kwargs = request_kwargs
414
+ self._complete_response: Dict[str, Any] = {"choices": [], "model": ""}
415
+ self._content_parts: list[str] = []
416
+
417
+ def __iter__(self) -> Iterator[Any]:
418
+ return self
419
+
420
+ def __next__(self) -> Any:
421
+ try:
422
+ chunk = self.__wrapped__.__next__()
423
+ self._process_chunk(chunk)
424
+ return chunk
425
+ except StopIteration:
426
+ self._finalize_span()
427
+ raise
428
+
429
+ def _process_chunk(self, chunk: Any) -> None:
430
+ """Process streaming chunk"""
431
+ chunk_dict = model_as_dict(chunk)
432
+
433
+ # Accumulate response data
434
+ if chunk_dict.get("model"):
435
+ self._complete_response["model"] = chunk_dict["model"]
436
+
437
+ # Accumulate usage information from chunks
438
+ if chunk_dict.get("usage"):
439
+ self._complete_response["usage"] = chunk_dict["usage"]
440
+
441
+ # Collect content from delta
442
+ choices = chunk_dict.get("choices", [])
443
+ for choice in choices:
444
+ delta = choice.get("delta", {})
445
+ if delta.get("content"):
446
+ self._content_parts.append(delta["content"])
447
+
448
+ # Collect finish_reason from choices
449
+ if choice.get("finish_reason"):
450
+ if "choices" not in self._complete_response:
451
+ self._complete_response["choices"] = []
452
+ # Ensure we have enough choice entries
453
+ while len(self._complete_response["choices"]) <= len(choices) - 1:
454
+ self._complete_response["choices"].append(
455
+ {"message": {"role": "assistant", "content": ""}, "finish_reason": None}
456
+ )
457
+
458
+ choice_index = choice.get("index", 0)
459
+ if choice_index < len(self._complete_response["choices"]):
460
+ self._complete_response["choices"][choice_index]["finish_reason"] = choice["finish_reason"]
461
+
462
+ # Add chunk event
463
+ self._span.add_event("llm.content.completion.chunk")
464
+
465
+ def _finalize_span(self) -> None:
466
+ """Finalize span when streaming is complete"""
467
+ end_time = time.time()
468
+ duration = end_time - self._start_time
469
+
470
+ # Set accumulated content
471
+ if self._content_parts:
472
+ full_content = "".join(self._content_parts)
473
+ self._span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.0.content", full_content)
474
+ self._span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.0.role", "assistant")
475
+
476
+ set_response_attributes(self._span, self._complete_response, "chat")
477
+ self._span.set_attribute("llm.response.duration", duration)
478
+ self._span.set_status(Status(StatusCode.OK))
479
+ self._span.end()
480
+
481
+
482
+ class AsyncStreamingWrapper(ObjectProxy): # type: ignore[misc]
483
+ """Async wrapper for streaming responses"""
484
+
485
+ def __init__(
486
+ self, span: Span, response: AsyncIterator[Any], start_time: float, request_kwargs: Dict[str, Any]
487
+ ) -> None:
488
+ super().__init__(response)
489
+ self._span = span
490
+ self._start_time = start_time
491
+ self._request_kwargs = request_kwargs
492
+ self._complete_response: Dict[str, Any] = {"choices": [], "model": ""}
493
+ self._content_parts: list[str] = []
494
+
495
+ def __aiter__(self) -> AsyncIterator[Any]:
496
+ return self
497
+
498
+ async def __anext__(self) -> Any:
499
+ try:
500
+ chunk = await self.__wrapped__.__anext__()
501
+ self._process_chunk(chunk)
502
+ return chunk
503
+ except StopAsyncIteration:
504
+ self._finalize_span()
505
+ raise
506
+
507
+ def _process_chunk(self, chunk: Any) -> None:
508
+ """Process streaming chunk"""
509
+ chunk_dict = model_as_dict(chunk)
510
+
511
+ # Accumulate response data
512
+ if chunk_dict.get("model"):
513
+ self._complete_response["model"] = chunk_dict["model"]
514
+
515
+ # Accumulate usage information from chunks
516
+ if chunk_dict.get("usage"):
517
+ self._complete_response["usage"] = chunk_dict["usage"]
518
+
519
+ # Collect content from delta
520
+ choices = chunk_dict.get("choices", [])
521
+ for choice in choices:
522
+ delta = choice.get("delta", {})
523
+ if delta.get("content"):
524
+ self._content_parts.append(delta["content"])
525
+
526
+ # Collect finish_reason from choices
527
+ if choice.get("finish_reason"):
528
+ if "choices" not in self._complete_response:
529
+ self._complete_response["choices"] = []
530
+ # Ensure we have enough choice entries
531
+ while len(self._complete_response["choices"]) <= len(choices) - 1:
532
+ self._complete_response["choices"].append(
533
+ {"message": {"role": "assistant", "content": ""}, "finish_reason": None}
534
+ )
535
+
536
+ choice_index = choice.get("index", 0)
537
+ if choice_index < len(self._complete_response["choices"]):
538
+ self._complete_response["choices"][choice_index]["finish_reason"] = choice["finish_reason"]
539
+
540
+ # Add chunk event
541
+ self._span.add_event("llm.content.completion.chunk")
542
+
543
+ def _finalize_span(self) -> None:
544
+ """Finalize span when streaming is complete"""
545
+ end_time = time.time()
546
+ duration = end_time - self._start_time
547
+
548
+ # Set accumulated content
549
+ if self._content_parts:
550
+ full_content = "".join(self._content_parts)
551
+ self._span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.0.content", full_content)
552
+ self._span.set_attribute(f"{SpanAttributes.LLM_COMPLETIONS}.0.role", "assistant")
553
+
554
+ set_response_attributes(self._span, self._complete_response, "chat")
555
+ self._span.set_attribute("llm.response.duration", duration)
556
+ self._span.set_status(Status(StatusCode.OK))
557
+ self._span.end()
netra/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.30"
1
+ __version__ = "0.1.31"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: netra-sdk
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: A Python SDK for AI application observability that provides OpenTelemetry-based monitoring, tracing, and PII protection for LLM and vector database applications. Enables easy instrumentation, session tracking, and privacy-focused data collection for AI systems in production environments.
5
5
  License: Apache-2.0
6
6
  Keywords: netra,tracing,observability,sdk,ai,llm,vector,database
@@ -303,6 +303,7 @@ async def async_span(data):
303
303
  - **CrewAI** - Multi-agent AI systems
304
304
  - **Pydantic AI** - AI model communication standard
305
305
  - **MCP (Model Context Protocol)** - AI model communication standard
306
+ - **LiteLLM** - LLM provider agnostic client
306
307
 
307
308
  ## 🛡️ Privacy Protection & Security
308
309
 
@@ -9,7 +9,7 @@ netra/exceptions/__init__.py,sha256=uDgcBxmC4WhdS7HRYQk_TtJyxH1s1o6wZmcsnSHLAcM,
9
9
  netra/exceptions/injection.py,sha256=ke4eUXRYUFJkMZgdSyPPkPt5PdxToTI6xLEBI0hTWUQ,1332
10
10
  netra/exceptions/pii.py,sha256=MT4p_x-zH3VtYudTSxw1Z9qQZADJDspq64WrYqSWlZc,2438
11
11
  netra/input_scanner.py,sha256=At6N9gNY8cR0O6S8x3K6swWBV3P1a_9O-XBNM_pcKz4,5348
12
- netra/instrumentation/__init__.py,sha256=pJOkAO1h7rdM_VwvZ_FZZ-zq8PCmLOzW4jvr_CwWYCI,40792
12
+ netra/instrumentation/__init__.py,sha256=HdG3n5TxPRUNlOxsqjlvwDmBcnm3UtYx1OecLhnLeQM,41578
13
13
  netra/instrumentation/aiohttp/__init__.py,sha256=M1kuF0R3gKY5rlbhEC1AR13UWHelmfokluL2yFysKWc,14398
14
14
  netra/instrumentation/aiohttp/version.py,sha256=Zy-0Aukx-HS_Mo3NKPWg-hlUoWKDzS0w58gLoVtJec8,24
15
15
  netra/instrumentation/cohere/__init__.py,sha256=3XwmCAZwZiMkHdNN3YvcBOLsNCx80ymbU31TyMzv1IY,17685
@@ -22,7 +22,10 @@ netra/instrumentation/google_genai/utils.py,sha256=2OeSN5jUaMKF4x5zWiW65R1LB_a44
22
22
  netra/instrumentation/google_genai/version.py,sha256=Hww1duZrC8kYK7ThBSQVyz0HNOb0ys_o8Pln-wVQ1hI,23
23
23
  netra/instrumentation/httpx/__init__.py,sha256=w1su_eQP_w5ZJHq0Lf-4miF5zM4OOW0ItmRp0wi85Ew,19388
24
24
  netra/instrumentation/httpx/version.py,sha256=ZRQKbgDaGz_yuLk-cUKuk6ZBKCSRKZC8nQd041NRNXk,23
25
- netra/instrumentation/instruments.py,sha256=JJF8J2O2Xd3w3k33ZYxpFNrwWgl_veRNxV6QUFCsFn0,4301
25
+ netra/instrumentation/instruments.py,sha256=O6MI_BO-5EBkVqI-dr5eqhYnk8mP5QEpI0RWJ7Fe3FQ,4349
26
+ netra/instrumentation/litellm/__init__.py,sha256=H9FsdEq-CL39zbl_dLm8D43-D1vAjoNqFTBpbmZsVXs,6740
27
+ netra/instrumentation/litellm/version.py,sha256=J-j-u0itpEFT6irdmWmixQqYMadNl1X91TxUmoiLHMI,22
28
+ netra/instrumentation/litellm/wrappers.py,sha256=H_UG0et6PUmj6CQagvNzbs_WodNTMruzzGOHhedmTko,22840
26
29
  netra/instrumentation/mistralai/__init__.py,sha256=RE0b-rS6iXdoynJMFKHL9s97eYo5HghrJa013fR4ZhI,18910
27
30
  netra/instrumentation/mistralai/config.py,sha256=XCyo3mk30qkvqyCqeTrKwROahu0gcOEwmbDLOo53J5k,121
28
31
  netra/instrumentation/mistralai/utils.py,sha256=nhdIer5gJFxuGwg8FCT222hggDHeMQDhJctnDSwLqcc,894
@@ -44,8 +47,8 @@ netra/scanner.py,sha256=kyDpeZiscCPb6pjuhS-sfsVj-dviBFRepdUWh0sLoEY,11554
44
47
  netra/session_manager.py,sha256=AoQa-k4dFcq7PeOD8G8DNzhLzL1JrHUW6b_y8mRyTQo,10255
45
48
  netra/span_wrapper.py,sha256=lGuV1F4Q5I_swIoIof5myzOQCFmGFdtrpgfQt7dTTus,8105
46
49
  netra/tracer.py,sha256=YiuijB_5DBOLVgE39Lj3thWVmUqHLcqbdFVB0HGovW0,3543
47
- netra/version.py,sha256=2GUJJyX8g8EAXKUqyj7DGVzG-jNXOGaqVSWilvGYuX8,23
48
- netra_sdk-0.1.30.dist-info/LICENCE,sha256=8B_UoZ-BAl0AqiHAHUETCgd3I2B9yYJ1WEQtVb_qFMA,11359
49
- netra_sdk-0.1.30.dist-info/METADATA,sha256=9quLP0B1XfOdcQWU4vsIY44Gf2CwWiF_er2-Wa6wKvM,28151
50
- netra_sdk-0.1.30.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
51
- netra_sdk-0.1.30.dist-info/RECORD,,
50
+ netra/version.py,sha256=i-fDEsQ0iAiPKXFaj9eERDqcxl3BqNnavaCEqpNxmVI,23
51
+ netra_sdk-0.1.31.dist-info/LICENCE,sha256=8B_UoZ-BAl0AqiHAHUETCgd3I2B9yYJ1WEQtVb_qFMA,11359
52
+ netra_sdk-0.1.31.dist-info/METADATA,sha256=VvltGCy_nbt-TRB91KiH_hu6YC4CceY_uQW-UYXT7NE,28196
53
+ netra_sdk-0.1.31.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
54
+ netra_sdk-0.1.31.dist-info/RECORD,,