netra-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of netra-sdk might be problematic. Click here for more details.

Files changed (42) hide show
  1. netra/__init__.py +148 -0
  2. netra/anonymizer/__init__.py +7 -0
  3. netra/anonymizer/anonymizer.py +79 -0
  4. netra/anonymizer/base.py +159 -0
  5. netra/anonymizer/fp_anonymizer.py +182 -0
  6. netra/config.py +111 -0
  7. netra/decorators.py +167 -0
  8. netra/exceptions/__init__.py +6 -0
  9. netra/exceptions/injection.py +33 -0
  10. netra/exceptions/pii.py +46 -0
  11. netra/input_scanner.py +142 -0
  12. netra/instrumentation/__init__.py +257 -0
  13. netra/instrumentation/aiohttp/__init__.py +378 -0
  14. netra/instrumentation/aiohttp/version.py +1 -0
  15. netra/instrumentation/cohere/__init__.py +446 -0
  16. netra/instrumentation/cohere/version.py +1 -0
  17. netra/instrumentation/google_genai/__init__.py +506 -0
  18. netra/instrumentation/google_genai/config.py +5 -0
  19. netra/instrumentation/google_genai/utils.py +31 -0
  20. netra/instrumentation/google_genai/version.py +1 -0
  21. netra/instrumentation/httpx/__init__.py +545 -0
  22. netra/instrumentation/httpx/version.py +1 -0
  23. netra/instrumentation/instruments.py +78 -0
  24. netra/instrumentation/mistralai/__init__.py +545 -0
  25. netra/instrumentation/mistralai/config.py +5 -0
  26. netra/instrumentation/mistralai/utils.py +30 -0
  27. netra/instrumentation/mistralai/version.py +1 -0
  28. netra/instrumentation/weaviate/__init__.py +121 -0
  29. netra/instrumentation/weaviate/version.py +1 -0
  30. netra/pii.py +757 -0
  31. netra/processors/__init__.py +4 -0
  32. netra/processors/session_span_processor.py +55 -0
  33. netra/processors/span_aggregation_processor.py +365 -0
  34. netra/scanner.py +104 -0
  35. netra/session.py +185 -0
  36. netra/session_manager.py +96 -0
  37. netra/tracer.py +99 -0
  38. netra/version.py +1 -0
  39. netra_sdk-0.1.0.dist-info/LICENCE +201 -0
  40. netra_sdk-0.1.0.dist-info/METADATA +573 -0
  41. netra_sdk-0.1.0.dist-info/RECORD +42 -0
  42. netra_sdk-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,446 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Collection, Dict, Generator, Iterator, List, Optional, Union
4
+
5
+ from opentelemetry import context as context_api
6
+ from opentelemetry.instrumentation.cohere.config import Config
7
+ from opentelemetry.instrumentation.cohere.utils import dont_throw
8
+ from opentelemetry.instrumentation.cohere.version import __version__
9
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
10
+ from opentelemetry.instrumentation.utils import (
11
+ _SUPPRESS_INSTRUMENTATION_KEY,
12
+ unwrap,
13
+ )
14
+ from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
15
+ from opentelemetry.semconv_ai import (
16
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
17
+ LLMRequestTypeValues,
18
+ SpanAttributes,
19
+ )
20
+ from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer, set_span_in_context
21
+ from opentelemetry.trace.status import Status, StatusCode
22
+ from wrapt import wrap_function_wrapper
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ _instruments = ("cohere >=4.2.7, <6",)
27
+
28
+ WRAPPED_METHODS = [
29
+ {
30
+ "object": "ClientV2",
31
+ "method": "chat",
32
+ "span_name": "cohere.chat",
33
+ },
34
+ {
35
+ "object": "ClientV2",
36
+ "method": "chat_stream",
37
+ "span_name": "cohere.chat_stream",
38
+ },
39
+ {
40
+ "object": "ClientV2",
41
+ "method": "rerank",
42
+ "span_name": "cohere.rerank",
43
+ },
44
+ {
45
+ "object": "AsyncClientV2",
46
+ "method": "chat",
47
+ "span_name": "cohere.async.chat",
48
+ },
49
+ {
50
+ "object": "AsyncClientV2",
51
+ "method": "rerank",
52
+ "span_name": "cohere.async.rerank",
53
+ },
54
+ ]
55
+
56
+
57
+ def should_send_prompts() -> bool:
58
+ return (os.getenv("TRACELOOP_TRACE_CONTENT") or "true").lower() == "true" or context_api.get_value(
59
+ "override_enable_content_tracing"
60
+ )
61
+
62
+
63
+ def _set_span_attribute(span: Span, name: str, value: Any) -> None:
64
+ if value is not None:
65
+ if value != "":
66
+ span.set_attribute(name, value)
67
+ return
68
+
69
+
70
+ @dont_throw # type: ignore[misc]
71
+ def _set_input_attributes(span: Span, llm_request_type: LLMRequestTypeValues, kwargs: Dict[str, Any]) -> None:
72
+ _set_span_attribute(span, SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
73
+ _set_span_attribute(span, SpanAttributes.LLM_REQUEST_MAX_TOKENS, kwargs.get("max_tokens_to_sample"))
74
+ _set_span_attribute(span, SpanAttributes.LLM_REQUEST_TEMPERATURE, kwargs.get("temperature"))
75
+ _set_span_attribute(span, SpanAttributes.LLM_REQUEST_TOP_P, kwargs.get("top_p"))
76
+ _set_span_attribute(span, SpanAttributes.LLM_FREQUENCY_PENALTY, kwargs.get("frequency_penalty"))
77
+ _set_span_attribute(span, SpanAttributes.LLM_PRESENCE_PENALTY, kwargs.get("presence_penalty"))
78
+
79
+ if should_send_prompts():
80
+ if llm_request_type == LLMRequestTypeValues.COMPLETION:
81
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")
82
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.content", kwargs.get("prompt"))
83
+ elif llm_request_type == LLMRequestTypeValues.CHAT:
84
+ messages = kwargs.get("messages")
85
+ if messages:
86
+ for index, message in enumerate(messages):
87
+ if hasattr(message, "content"):
88
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.{index}.role", "user")
89
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.{index}.content", message.content)
90
+ elif isinstance(message, dict):
91
+ _set_span_attribute(
92
+ span, f"{SpanAttributes.LLM_PROMPTS}.{index}.role", message.get("role", "user")
93
+ )
94
+ _set_span_attribute(
95
+ span, f"{SpanAttributes.LLM_PROMPTS}.{index}.content", message.get("content")
96
+ )
97
+ else:
98
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")
99
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.content", kwargs.get("message"))
100
+ elif llm_request_type == LLMRequestTypeValues.RERANK:
101
+ documents = kwargs.get("documents", [])
102
+ for index, document in enumerate(documents):
103
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.{index}.role", "system")
104
+ _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.{index}.content", document)
105
+
106
+ _set_span_attribute(
107
+ span,
108
+ f"{SpanAttributes.LLM_PROMPTS}.{len(documents)}.role",
109
+ "user",
110
+ )
111
+ _set_span_attribute(
112
+ span,
113
+ f"{SpanAttributes.LLM_PROMPTS}.{len(documents)}.content",
114
+ kwargs.get("query"),
115
+ )
116
+
117
+ return
118
+
119
+
120
+ def _set_span_chat_response(span: Span, response: Any) -> None:
121
+ index = 0
122
+ prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
123
+
124
+ _set_span_attribute(span, GEN_AI_RESPONSE_ID, response.id)
125
+
126
+ if hasattr(response, "message") and hasattr(response.message, "content"):
127
+ text_content = []
128
+ for content_item in response.message.content:
129
+ if hasattr(content_item, "text"):
130
+ text_content.append(content_item.text)
131
+ if text_content:
132
+ _set_span_attribute(span, f"{prefix}.content", "\n".join(text_content))
133
+ _set_span_attribute(span, f"{prefix}.role", "assistant")
134
+
135
+ if not hasattr(response, "usage") or response.usage is None:
136
+ logger.debug("No usage information found in response")
137
+ return
138
+
139
+ logger.debug(f"Response usage object: {response.usage}")
140
+
141
+ input_tokens = None
142
+ output_tokens = None
143
+
144
+ if hasattr(response.usage, "billed_units") and response.usage.billed_units is not None:
145
+ logger.debug(f"Found billed_units: {response.usage.billed_units}")
146
+ if (
147
+ hasattr(response.usage.billed_units, "input_tokens")
148
+ and response.usage.billed_units.input_tokens is not None
149
+ ):
150
+ input_tokens = int(float(response.usage.billed_units.input_tokens))
151
+ logger.debug(f"Extracted input_tokens from billed_units: {input_tokens}")
152
+ if (
153
+ hasattr(response.usage.billed_units, "output_tokens")
154
+ and response.usage.billed_units.output_tokens is not None
155
+ ):
156
+ output_tokens = int(float(response.usage.billed_units.output_tokens))
157
+ logger.debug(f"Extracted output_tokens from billed_units: {output_tokens}")
158
+
159
+ if input_tokens is not None:
160
+ logger.debug(f"Setting {SpanAttributes.LLM_USAGE_PROMPT_TOKENS} to {input_tokens}")
161
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, input_tokens)
162
+
163
+ if output_tokens is not None:
164
+ logger.debug(f"Setting {SpanAttributes.LLM_USAGE_COMPLETION_TOKENS} to {output_tokens}")
165
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, output_tokens)
166
+
167
+ if input_tokens is not None and output_tokens is not None:
168
+ total_tokens = input_tokens + output_tokens
169
+ logger.debug(f"Setting {SpanAttributes.LLM_USAGE_TOTAL_TOKENS} to {total_tokens}")
170
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)
171
+ logger.info(
172
+ f"Successfully set token usage - Input: {input_tokens}, Output: {output_tokens}, Total: {total_tokens}"
173
+ )
174
+ else:
175
+ logger.warning(f"Could not extract complete token usage - Input: {input_tokens}, Output: {output_tokens}")
176
+
177
+
178
+ def _set_span_generations_response(span: Span, response: Any) -> None:
179
+ _set_span_attribute(span, GEN_AI_RESPONSE_ID, response.id)
180
+ if hasattr(response, "generations"):
181
+ generations = response.generations # Cohere v5
182
+ else:
183
+ generations = response # Cohere v4
184
+
185
+ for index, generation in enumerate(generations):
186
+ prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
187
+ _set_span_attribute(span, f"{prefix}.content", generation.text)
188
+ _set_span_attribute(span, f"gen_ai.response.{index}.id", generation.id)
189
+
190
+
191
+ def _set_span_rerank_response(span: Span, response: Any) -> None:
192
+ _set_span_attribute(span, GEN_AI_RESPONSE_ID, response.id)
193
+ for idx, doc in enumerate(response.results):
194
+ prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{idx}"
195
+ _set_span_attribute(span, f"{prefix}.role", "assistant")
196
+ content = f"Doc {doc.index}, Score: {doc.relevance_score}"
197
+ if doc.document:
198
+ if hasattr(doc.document, "text"):
199
+ content += f"\n{doc.document.text}"
200
+ else:
201
+ content += f"\n{doc.document.get('text')}"
202
+ _set_span_attribute(
203
+ span,
204
+ f"{prefix}.content",
205
+ content,
206
+ )
207
+
208
+
209
+ @dont_throw # type: ignore[misc]
210
+ def _set_response_attributes(span: Span, llm_request_type: LLMRequestTypeValues, response: Any) -> None:
211
+
212
+ if should_send_prompts():
213
+ if llm_request_type == LLMRequestTypeValues.CHAT:
214
+ _set_span_chat_response(span, response)
215
+ elif llm_request_type == LLMRequestTypeValues.COMPLETION:
216
+ _set_span_generations_response(span, response)
217
+ elif llm_request_type == LLMRequestTypeValues.RERANK:
218
+ _set_span_rerank_response(span, response)
219
+
220
+
221
+ def _with_tracer_wrapper(func: Any) -> Any:
222
+ """Helper for providing tracer for wrapper functions."""
223
+
224
+ def _with_tracer(tracer: Tracer, to_wrap: Dict[str, str]) -> Any:
225
+ def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
226
+ return func(tracer, to_wrap, wrapped, instance, args, kwargs)
227
+
228
+ return wrapper
229
+
230
+ return _with_tracer
231
+
232
+
233
+ def _llm_request_type_by_method(method_name: Optional[str]) -> LLMRequestTypeValues:
234
+ if method_name in ["chat", "chat_stream"]:
235
+ return LLMRequestTypeValues.CHAT
236
+ elif method_name == "generate":
237
+ return LLMRequestTypeValues.COMPLETION
238
+ elif method_name == "rerank":
239
+ return LLMRequestTypeValues.RERANK
240
+ else:
241
+ return LLMRequestTypeValues.UNKNOWN
242
+
243
+
244
+ def _build_from_streaming_response(
245
+ span: Span, response: Iterator[Any], llm_request_type: LLMRequestTypeValues, context_token: Any
246
+ ) -> Generator[Any, None, None]:
247
+ """Build response from streaming events and set span attributes."""
248
+ response_id = None
249
+ content_parts = []
250
+ usage_info = None
251
+
252
+ try:
253
+ for event in response:
254
+ if hasattr(event, "type"):
255
+ if event.type == "message-start" and hasattr(event, "id"):
256
+ response_id = event.id
257
+
258
+ elif event.type == "content-delta":
259
+ if (
260
+ hasattr(event, "delta")
261
+ and hasattr(event.delta, "message")
262
+ and hasattr(event.delta.message, "content")
263
+ and hasattr(event.delta.message.content, "text")
264
+ ):
265
+ content_parts.append(event.delta.message.content.text)
266
+
267
+ elif event.type == "message-end":
268
+ if hasattr(event, "delta") and hasattr(event.delta, "usage"):
269
+ usage_info = event.delta.usage
270
+
271
+ yield event
272
+
273
+ if response_id:
274
+ _set_span_attribute(span, GEN_AI_RESPONSE_ID, response_id)
275
+
276
+ if should_send_prompts() and content_parts:
277
+ prefix = f"{SpanAttributes.LLM_COMPLETIONS}.0"
278
+ full_content = "".join(content_parts)
279
+ _set_span_attribute(span, f"{prefix}.content", full_content)
280
+ _set_span_attribute(span, f"{prefix}.role", "assistant")
281
+
282
+ if usage_info and hasattr(usage_info, "billed_units") and usage_info.billed_units:
283
+ input_tokens = None
284
+ output_tokens = None
285
+
286
+ if hasattr(usage_info.billed_units, "input_tokens") and usage_info.billed_units.input_tokens is not None:
287
+ input_tokens = int(float(usage_info.billed_units.input_tokens))
288
+
289
+ if hasattr(usage_info.billed_units, "output_tokens") and usage_info.billed_units.output_tokens is not None:
290
+ output_tokens = int(float(usage_info.billed_units.output_tokens))
291
+
292
+ if input_tokens is not None:
293
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, input_tokens)
294
+ if output_tokens is not None:
295
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, output_tokens)
296
+ if input_tokens is not None and output_tokens is not None:
297
+ total_tokens = input_tokens + output_tokens
298
+ _set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)
299
+
300
+ span.set_status(Status(StatusCode.OK))
301
+
302
+ except Exception:
303
+ span.set_status(Status(StatusCode.ERROR))
304
+ raise
305
+ finally:
306
+ span.end()
307
+ context_api.detach(context_token)
308
+
309
+
310
+ @_with_tracer_wrapper
311
+ def _wrap(tracer: Tracer, to_wrap: Dict[str, str], wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
312
+ """Instruments and calls every function defined in TO_WRAP."""
313
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
314
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
315
+ ):
316
+ return wrapped(*args, **kwargs)
317
+
318
+ name = to_wrap.get("span_name")
319
+ method_name = to_wrap.get("method")
320
+ llm_request_type = _llm_request_type_by_method(method_name)
321
+
322
+ if method_name == "chat_stream":
323
+ span = tracer.start_span(
324
+ name,
325
+ kind=SpanKind.CLIENT,
326
+ attributes={
327
+ SpanAttributes.LLM_SYSTEM: "Cohere",
328
+ SpanAttributes.LLM_REQUEST_TYPE: llm_request_type.value,
329
+ },
330
+ )
331
+
332
+ ctx = set_span_in_context(span)
333
+ token = context_api.attach(ctx)
334
+
335
+ try:
336
+ if span.is_recording():
337
+ _set_input_attributes(span, llm_request_type, kwargs)
338
+
339
+ response = wrapped(*args, **kwargs)
340
+
341
+ if response:
342
+ return _build_from_streaming_response(span, response, llm_request_type, token)
343
+ else:
344
+ span.set_status(Status(StatusCode.ERROR))
345
+ span.end()
346
+ return response
347
+ except Exception:
348
+ span.set_status(Status(StatusCode.ERROR))
349
+ span.end()
350
+ context_api.detach(token)
351
+ raise
352
+ else:
353
+ with tracer.start_as_current_span(
354
+ name,
355
+ kind=SpanKind.CLIENT,
356
+ attributes={
357
+ SpanAttributes.LLM_SYSTEM: "Cohere",
358
+ SpanAttributes.LLM_REQUEST_TYPE: llm_request_type.value,
359
+ },
360
+ ) as span:
361
+ if span.is_recording():
362
+ _set_input_attributes(span, llm_request_type, kwargs)
363
+
364
+ response = wrapped(*args, **kwargs)
365
+
366
+ if response:
367
+ if span.is_recording():
368
+ _set_response_attributes(span, llm_request_type, response)
369
+ span.set_status(Status(StatusCode.OK))
370
+
371
+ return response
372
+
373
+
374
+ @_with_tracer_wrapper
375
+ async def _async_wrap(
376
+ tracer: Tracer, to_wrap: Dict[str, str], wrapped: Any, instance: Any, args: Any, kwargs: Any
377
+ ) -> Any:
378
+ """Instruments and calls every async function defined in TO_WRAP."""
379
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
380
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
381
+ ):
382
+ return await wrapped(*args, **kwargs)
383
+
384
+ name = to_wrap.get("span_name")
385
+ method_name = to_wrap.get("method")
386
+ llm_request_type = _llm_request_type_by_method(method_name)
387
+
388
+ with tracer.start_as_current_span(
389
+ name,
390
+ kind=SpanKind.CLIENT,
391
+ attributes={
392
+ SpanAttributes.LLM_SYSTEM: "Cohere",
393
+ SpanAttributes.LLM_REQUEST_TYPE: llm_request_type.value,
394
+ },
395
+ ) as span:
396
+ if span.is_recording():
397
+ _set_input_attributes(span, llm_request_type, kwargs)
398
+
399
+ response = await wrapped(*args, **kwargs)
400
+
401
+ if response:
402
+ if span.is_recording():
403
+ _set_response_attributes(span, llm_request_type, response)
404
+ span.set_status(Status(StatusCode.OK))
405
+
406
+ return response
407
+
408
+
409
+ class CohereInstrumentor(BaseInstrumentor): # type: ignore
410
+ """An instrumentor for Cohere's client library."""
411
+
412
+ def __init__(self, exception_logger: Optional[Any] = None) -> None:
413
+ super().__init__()
414
+ Config.exception_logger = exception_logger
415
+
416
+ def instrumentation_dependencies(self) -> Collection[str]:
417
+ return _instruments
418
+
419
+ def _instrument(self, **kwargs: Any) -> None:
420
+ tracer_provider = kwargs.get("tracer_provider")
421
+ tracer = get_tracer(__name__, __version__, tracer_provider)
422
+ for wrapped_method in WRAPPED_METHODS:
423
+ wrap_object = wrapped_method.get("object")
424
+ wrap_method = wrapped_method.get("method")
425
+
426
+ # Use async wrapper for AsyncClientV2
427
+ if wrap_object == "AsyncClientV2":
428
+ wrap_function_wrapper(
429
+ "cohere",
430
+ f"{wrap_object}.{wrap_method}",
431
+ _async_wrap(tracer, wrapped_method),
432
+ )
433
+ else:
434
+ wrap_function_wrapper(
435
+ "cohere",
436
+ f"{wrap_object}.{wrap_method}",
437
+ _wrap(tracer, wrapped_method),
438
+ )
439
+
440
+ def _uninstrument(self, **kwargs: Any) -> None:
441
+ for wrapped_method in WRAPPED_METHODS:
442
+ wrap_object = wrapped_method.get("object")
443
+ unwrap(
444
+ f"cohere.{wrap_object}",
445
+ wrapped_method.get("method"),
446
+ )
@@ -0,0 +1 @@
1
+ __version__ = "5.15.0"