netra-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of netra-sdk might be problematic. Click here for more details.

Files changed (42) hide show
  1. netra/__init__.py +148 -0
  2. netra/anonymizer/__init__.py +7 -0
  3. netra/anonymizer/anonymizer.py +79 -0
  4. netra/anonymizer/base.py +159 -0
  5. netra/anonymizer/fp_anonymizer.py +182 -0
  6. netra/config.py +111 -0
  7. netra/decorators.py +167 -0
  8. netra/exceptions/__init__.py +6 -0
  9. netra/exceptions/injection.py +33 -0
  10. netra/exceptions/pii.py +46 -0
  11. netra/input_scanner.py +142 -0
  12. netra/instrumentation/__init__.py +257 -0
  13. netra/instrumentation/aiohttp/__init__.py +378 -0
  14. netra/instrumentation/aiohttp/version.py +1 -0
  15. netra/instrumentation/cohere/__init__.py +446 -0
  16. netra/instrumentation/cohere/version.py +1 -0
  17. netra/instrumentation/google_genai/__init__.py +506 -0
  18. netra/instrumentation/google_genai/config.py +5 -0
  19. netra/instrumentation/google_genai/utils.py +31 -0
  20. netra/instrumentation/google_genai/version.py +1 -0
  21. netra/instrumentation/httpx/__init__.py +545 -0
  22. netra/instrumentation/httpx/version.py +1 -0
  23. netra/instrumentation/instruments.py +78 -0
  24. netra/instrumentation/mistralai/__init__.py +545 -0
  25. netra/instrumentation/mistralai/config.py +5 -0
  26. netra/instrumentation/mistralai/utils.py +30 -0
  27. netra/instrumentation/mistralai/version.py +1 -0
  28. netra/instrumentation/weaviate/__init__.py +121 -0
  29. netra/instrumentation/weaviate/version.py +1 -0
  30. netra/pii.py +757 -0
  31. netra/processors/__init__.py +4 -0
  32. netra/processors/session_span_processor.py +55 -0
  33. netra/processors/span_aggregation_processor.py +365 -0
  34. netra/scanner.py +104 -0
  35. netra/session.py +185 -0
  36. netra/session_manager.py +96 -0
  37. netra/tracer.py +99 -0
  38. netra/version.py +1 -0
  39. netra_sdk-0.1.0.dist-info/LICENCE +201 -0
  40. netra_sdk-0.1.0.dist-info/METADATA +573 -0
  41. netra_sdk-0.1.0.dist-info/RECORD +42 -0
  42. netra_sdk-0.1.0.dist-info/WHEEL +4 -0
netra/__init__.py ADDED
@@ -0,0 +1,148 @@
1
+ import logging
2
+ import threading
3
+ from typing import Any, Dict, Optional, Set
4
+
5
+ from netra.instrumentation.instruments import NetraInstruments
6
+
7
+ from .config import Config
8
+
9
+ # Instrumentor functions
10
+ from .instrumentation import init_instrumentations
11
+ from .session import Session
12
+ from .session_manager import SessionManager
13
+ from .tracer import Tracer
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class Netra:
19
+ """
20
+ Main SDK class. Call SDK.init(...) at the start of your application
21
+ to configure OpenTelemetry and enable all built-in LLM + VectorDB instrumentations.
22
+ """
23
+
24
+ _initialized = False
25
+ # Use RLock so the thread that already owns the lock can re-acquire it safely
26
+ _init_lock = threading.RLock()
27
+
28
+ @classmethod
29
+ def is_initialized(cls) -> bool:
30
+ """Thread-safe check if Netra has been initialized.
31
+
32
+ Returns:
33
+ bool: True if Netra has been initialized, False otherwise
34
+ """
35
+ with cls._init_lock:
36
+ return cls._initialized
37
+
38
+ @classmethod
39
+ def init(
40
+ cls,
41
+ app_name: Optional[str] = None,
42
+ headers: Optional[str] = None,
43
+ disable_batch: Optional[bool] = None,
44
+ trace_content: Optional[bool] = None,
45
+ resource_attributes: Optional[Dict[str, Any]] = None,
46
+ environment: Optional[str] = None,
47
+ instruments: Optional[Set[NetraInstruments]] = None,
48
+ block_instruments: Optional[Set[NetraInstruments]] = None,
49
+ ) -> None:
50
+ # Acquire lock at the start of the method and hold it throughout
51
+ # to prevent race conditions during initialization
52
+ with cls._init_lock:
53
+ # Check if already initialized while holding the lock
54
+ if cls._initialized:
55
+ logger.warning("Netra.init() called more than once; ignoring subsequent calls.")
56
+ return
57
+
58
+ # Build Config
59
+ cfg = Config(
60
+ app_name=app_name,
61
+ headers=headers,
62
+ disable_batch=disable_batch,
63
+ trace_content=trace_content,
64
+ resource_attributes=resource_attributes,
65
+ environment=environment,
66
+ )
67
+
68
+ # Initialize tracer (OTLP exporter, span processor, resource)
69
+ Tracer(cfg)
70
+
71
+ # Instrument all supported modules
72
+ # Pass trace_content flag to instrumentors that can capture prompts/completions
73
+ init_instrumentations(
74
+ should_enrich_metrics=True,
75
+ base64_image_uploader=None,
76
+ instruments=instruments,
77
+ block_instruments=block_instruments,
78
+ )
79
+
80
+ cls._initialized = True
81
+ logger.info("Netra successfully initialized.")
82
+
83
+ @classmethod
84
+ def set_session_id(cls, session_id: str) -> None:
85
+ """
86
+ Set session_id context attributes in the current OpenTelemetry context.
87
+
88
+ Args:
89
+ session_id: Session identifier
90
+ """
91
+ SessionManager.set_session_context("session_id", session_id)
92
+
93
+ @classmethod
94
+ def set_user_id(cls, user_id: str) -> None:
95
+ """
96
+ Set user_id context attributes in the current OpenTelemetry context.
97
+
98
+ Args:
99
+ user_id: User identifier
100
+ """
101
+ SessionManager.set_session_context("user_id", user_id)
102
+
103
+ @classmethod
104
+ def set_tenant_id(cls, tenant_id: str) -> None:
105
+ """
106
+ Set user_account_id context attributes in the current OpenTelemetry context.
107
+
108
+ Args:
109
+ user_account_id: User account identifier
110
+ """
111
+ SessionManager.set_session_context("tenant_id", tenant_id)
112
+
113
+ @classmethod
114
+ def set_custom_attributes(cls, key: str, value: Any) -> None:
115
+ """
116
+ Set custom attributes context in the current OpenTelemetry context.
117
+
118
+ Args:
119
+ key: Custom attribute key
120
+ value: Custom attribute value
121
+ """
122
+ SessionManager.set_session_context("custom_attributes", {key: value})
123
+
124
+ @classmethod
125
+ def set_custom_event(cls, event_name: str, attributes: Any) -> None:
126
+ """
127
+ Set custom event in the current OpenTelemetry context.
128
+
129
+ Args:
130
+ event_name: Name of the custom event
131
+ attributes: Attributes of the custom event
132
+ """
133
+ SessionManager.set_custom_event(event_name, attributes)
134
+
135
+ @classmethod
136
+ def start_session(
137
+ cls,
138
+ name: str,
139
+ attributes: Optional[Dict[str, str]] = None,
140
+ module_name: str = "combat_sdk",
141
+ ) -> Session:
142
+ """
143
+ Start a new session.
144
+ """
145
+ return Session(name, attributes, module_name)
146
+
147
+
148
+ __all__ = ["Netra"]
@@ -0,0 +1,7 @@
1
+ from .anonymizer import Anonymizer
2
+ from .base import AnonymizationResult
3
+
4
+ __all__ = [
5
+ "Anonymizer",
6
+ "AnonymizationResult",
7
+ ]
@@ -0,0 +1,79 @@
1
+ """
2
+ Custom anonymizer for PII data that provides consistent hashing of entities.
3
+
4
+ This module provides a custom anonymizer that can be used to replace PII entities
5
+ with consistent hash values, allowing for tracking the same entities across multiple
6
+ texts while maintaining privacy.
7
+ """
8
+
9
+ from typing import Callable, List, Optional
10
+
11
+ from presidio_analyzer.recognizer_result import RecognizerResult
12
+
13
+ from .base import AnonymizationResult, BaseAnonymizer
14
+ from .fp_anonymizer import FormatPreservingEmailAnonymizer
15
+
16
+
17
+ class Anonymizer:
18
+ """
19
+ Main anonymizer that delegates to different anonymizer classes based on entity type.
20
+
21
+ This anonymizer analyzes the entity types and uses appropriate anonymization
22
+ strategies - format-preserving for email addresses and hash-based for other types.
23
+ """
24
+
25
+ def __init__(self, hash_function: Optional[Callable[[str], str]] = None, cache_size: int = 1000):
26
+ """
27
+ Initialize the Anonymizer.
28
+
29
+ Args:
30
+ hash_function: Optional custom hash function that takes a string and returns a hash.
31
+ If not provided, a default hash function will be used.
32
+ cache_size: Maximum number of entities to cache. Uses LRU eviction policy.
33
+ Default is 1000. Set to 0 to disable caching.
34
+ """
35
+ # Initialize different anonymizer instances
36
+ self.base_anonymizer = BaseAnonymizer(hash_function=hash_function, cache_size=cache_size)
37
+ self.email_anonymizer = FormatPreservingEmailAnonymizer()
38
+
39
+ def anonymize(self, text: str, analyzer_results: List[RecognizerResult]) -> AnonymizationResult:
40
+ """
41
+ Anonymize text by replacing detected entities using appropriate anonymization strategies.
42
+
43
+ Args:
44
+ text: The original text containing PII.
45
+ analyzer_results: List of RecognizerResult objects from the Presidio analyzer.
46
+
47
+ Returns:
48
+ AnonymizationResult containing the masked text and a mapping of entity hashes to original values.
49
+ """
50
+ # Sort results by start index in descending order to avoid offset issues when replacing
51
+ sorted_results = sorted(analyzer_results, key=lambda x: x.start, reverse=True)
52
+
53
+ # Make a copy of the original text that we'll modify
54
+ masked_text = text
55
+
56
+ # Dictionary to store mapping of anonymized values to original entity values
57
+ entities_map = {}
58
+
59
+ # Replace each entity with its anonymized value
60
+ for result in sorted_results:
61
+ entity_type = result.entity_type
62
+ entity_value = text[result.start : result.end]
63
+
64
+ # Use appropriate anonymizer based on entity type
65
+ if entity_type.upper() in ["EMAIL", "EMAIL_ADDRESS"]:
66
+ # Use format-preserving email anonymization
67
+ anonymized_value = self.email_anonymizer._anonymize_email(entity_value)
68
+ placeholder = anonymized_value
69
+ entities_map[anonymized_value] = entity_value
70
+ else:
71
+ # Use base anonymizer for other entity types
72
+ entity_hash = self.base_anonymizer._get_entity_hash(entity_type, entity_value)
73
+ placeholder = f"<{entity_hash}>"
74
+ entities_map[entity_hash] = entity_value
75
+
76
+ # Replace the entity in the text with the placeholder
77
+ masked_text = masked_text[: result.start] + placeholder + masked_text[result.end :]
78
+
79
+ return AnonymizationResult(masked_text=masked_text, entities=entities_map)
@@ -0,0 +1,159 @@
1
+ """
2
+ Base anonymizer class for PII data anonymization.
3
+
4
+ This module provides the base anonymizer class that contains the core anonymization
5
+ logic that can be extended by specific anonymizer implementations.
6
+ """
7
+
8
+ import hashlib
9
+ from collections import OrderedDict
10
+ from dataclasses import dataclass
11
+ from typing import Callable, Dict, List, Optional
12
+
13
+ from presidio_analyzer.recognizer_result import RecognizerResult
14
+
15
+
16
+ @dataclass
17
+ class AnonymizationResult:
18
+ """
19
+ Result of anonymization containing the masked text and entity mappings.
20
+
21
+ Attributes:
22
+ masked_text: The text with PII entities replaced by hash placeholders.
23
+ entities: Dictionary mapping entity hashes to their original values.
24
+ """
25
+
26
+ masked_text: str
27
+ entities: Dict[str, str]
28
+
29
+
30
+ class BaseAnonymizer:
31
+ """
32
+ Base anonymizer that replaces entities with consistent hash values.
33
+
34
+ This base anonymizer provides the core anonymization logic that can be
35
+ extended by specific anonymizer implementations for different entity types.
36
+ """
37
+
38
+ def __init__(self, hash_function: Optional[Callable[[str], str]] = None, cache_size: int = 1000):
39
+ """
40
+ Initialize the BaseAnonymizer.
41
+
42
+ Args:
43
+ hash_function: Optional custom hash function that takes a string and returns a hash.
44
+ If not provided, a default hash function will be used.
45
+ cache_size: Maximum number of entities to cache. Uses LRU eviction policy.
46
+ Default is 1000. Set to 0 to disable caching.
47
+ """
48
+ self.hash_function = hash_function or self._default_hash_function
49
+ self.cache_size = cache_size
50
+
51
+ # Initialize LRU cache for entity hashes
52
+ if cache_size > 0:
53
+ self._entity_hash_cache: Optional[OrderedDict[str, str]] = OrderedDict()
54
+ else:
55
+ self._entity_hash_cache = None
56
+
57
+ def _default_hash_function(self, value: str) -> str:
58
+ """
59
+ Default hash function using SHA-256.
60
+
61
+ Args:
62
+ value: The string to hash.
63
+
64
+ Returns:
65
+ A hexadecimal hash string.
66
+ """
67
+ return hashlib.sha256(value.encode()).hexdigest()[:8]
68
+
69
+ def _get_entity_hash(self, entity_type: str, entity_value: str) -> str:
70
+ """
71
+ Get a consistent hash for an entity value, creating one if it doesn't exist.
72
+ Uses LRU cache with configurable size to balance performance and memory usage.
73
+
74
+ Args:
75
+ entity_type: The type of entity (e.g., 'EMAIL', 'PHONE', etc.)
76
+ entity_value: The original value of the entity.
77
+
78
+ Returns:
79
+ A hash string for the entity.
80
+ """
81
+ # Skip caching if cache_size is 0
82
+ if self.cache_size == 0:
83
+ entity_hash = f"{entity_type}_{self.hash_function(entity_value)}"
84
+ return entity_hash
85
+
86
+ # Create a composite key for the entity cache
87
+ cache_key = f"{entity_type}:{entity_value}"
88
+
89
+ # Check if entity exists in cache and move to end (mark as recently used)
90
+ if self._entity_hash_cache is not None and cache_key in self._entity_hash_cache:
91
+ # Move to end to mark as recently used
92
+ self._entity_hash_cache.move_to_end(cache_key)
93
+ return self._entity_hash_cache[cache_key]
94
+
95
+ # Generate a new hash for this entity
96
+ entity_hash = f"{entity_type}_{self.hash_function(entity_value)}"
97
+
98
+ # Add to cache if cache is enabled
99
+ if self._entity_hash_cache is not None:
100
+ self._entity_hash_cache[cache_key] = entity_hash
101
+
102
+ # Evict oldest entry if cache exceeds size limit
103
+ if len(self._entity_hash_cache) > self.cache_size:
104
+ # Remove the least recently used item (first item)
105
+ self._entity_hash_cache.popitem(last=False)
106
+
107
+ return entity_hash
108
+
109
+ def anonymize_entity(self, entity_type: str, entity_value: str) -> str:
110
+ """
111
+ Anonymize a single entity value.
112
+
113
+ Args:
114
+ entity_type: The type of entity (e.g., 'EMAIL', 'PHONE', etc.)
115
+ entity_value: The original value of the entity.
116
+
117
+ Returns:
118
+ The anonymized entity value.
119
+ """
120
+ # Get or create hash for this entity
121
+ entity_hash = self._get_entity_hash(entity_type, entity_value)
122
+ return f"<{entity_hash}>"
123
+
124
+ def anonymize(self, text: str, analyzer_results: List[RecognizerResult]) -> AnonymizationResult:
125
+ """
126
+ Anonymize text by replacing detected entities with hash values.
127
+
128
+ Args:
129
+ text: The original text containing PII.
130
+ analyzer_results: List of RecognizerResult objects from the Presidio analyzer.
131
+
132
+ Returns:
133
+ AnonymizationResult containing the masked text and a mapping of entity hashes to original values.
134
+ """
135
+ # Sort results by start index in descending order to avoid offset issues when replacing
136
+ sorted_results = sorted(analyzer_results, key=lambda x: x.start, reverse=True)
137
+
138
+ # Make a copy of the original text that we'll modify
139
+ masked_text = text
140
+
141
+ # Dictionary to store mapping of hash values to original entity values
142
+ entities_map: Dict[str, str] = {}
143
+
144
+ # Replace each entity with its hash
145
+ for result in sorted_results:
146
+ entity_type = result.entity_type
147
+ entity_value = text[result.start : result.end]
148
+
149
+ # Get or create hash for this entity
150
+ entity_hash = self._get_entity_hash(entity_type, entity_value)
151
+
152
+ # Replace the entity in the text with the hash placeholder
153
+ placeholder = f"<{entity_hash}>"
154
+ masked_text = masked_text[: result.start] + placeholder + masked_text[result.end :]
155
+
156
+ # Store the mapping of hash to original value
157
+ entities_map[entity_hash] = entity_value
158
+
159
+ return AnonymizationResult(masked_text=masked_text, entities=entities_map)
@@ -0,0 +1,182 @@
1
+ import hashlib
2
+ import random
3
+ import re
4
+ from typing import Dict, Optional
5
+
6
+
7
+ class FormatPreservingEmailAnonymizer:
8
+ def __init__(self, preserve_length: bool = True, preserve_structure: bool = True):
9
+ """
10
+ Initialize the email anonymizer.
11
+
12
+ Args:
13
+ preserve_length: Whether to preserve the length of original parts
14
+ preserve_structure: Whether to preserve dots, hyphens in the structure
15
+ """
16
+ self.preserve_length = preserve_length
17
+ self.preserve_structure = preserve_structure
18
+ self.email_cache: Dict[str, str] = {}
19
+ self.part_cache: Dict[str, str] = {} # Cache for individual parts
20
+
21
+ # Character sets for replacement
22
+ self.alphanumeric = "abcdefghijklmnopqrstuvwxyz0123456789"
23
+ self.letters = "abcdefghijklmnopqrstuvwxyz"
24
+
25
+ def _get_deterministic_random(self, seed: str) -> random.Random:
26
+ """Create a deterministic random generator from a seed."""
27
+ # Use hash of the seed as random seed for consistency
28
+ hash_int = int(hashlib.md5(seed.encode()).hexdigest()[:8], 16)
29
+ return random.Random(hash_int)
30
+
31
+ def _preserve_structure_replace(self, text: str, seed: str) -> str:
32
+ """
33
+ Replace text while preserving structure (length, special chars, case pattern).
34
+ """
35
+ if text in self.part_cache:
36
+ return self.part_cache[text]
37
+
38
+ rng = self._get_deterministic_random(seed)
39
+ result = []
40
+
41
+ for char in text:
42
+ if char.isalpha():
43
+ # Preserve case pattern
44
+ new_char = rng.choice(self.letters)
45
+ result.append(new_char.upper() if char.isupper() else new_char)
46
+ elif char.isdigit():
47
+ result.append(str(rng.randint(0, 9)))
48
+ else:
49
+ # Keep special characters (dots, hyphens, etc.)
50
+ result.append(char)
51
+
52
+ anonymized = "".join(result)
53
+ self.part_cache[text] = anonymized
54
+ return anonymized
55
+
56
+ def _simple_hash_replace(self, text: str, target_length: Optional[int] = None) -> str:
57
+ """Simple hash replacement with optional length preservation."""
58
+ if target_length is None:
59
+ target_length = len(text)
60
+
61
+ hash_val = hashlib.md5(text.encode()).hexdigest()
62
+
63
+ # Create a mix of letters and numbers that looks more natural
64
+ result = []
65
+ for i in range(target_length):
66
+ if i < len(hash_val):
67
+ char = hash_val[i]
68
+ if char.isdigit():
69
+ result.append(char)
70
+ else:
71
+ # Convert hex chars to letters
72
+ result.append(chr(ord("a") + (ord(char) - ord("a")) % 26))
73
+ else:
74
+ result.append("x")
75
+
76
+ return "".join(result)
77
+
78
+ def _anonymize_email(self, email: str) -> str:
79
+ """
80
+ Anonymize a single email while preserving format and structure.
81
+ """
82
+ if email in self.email_cache:
83
+ return self.email_cache[email]
84
+
85
+ # Split email into local part and domain
86
+ local_part, domain = email.split("@", 1)
87
+
88
+ if self.preserve_structure:
89
+ # Preserve the structure (dots, hyphens, length, case pattern)
90
+ local_anonymized = self._preserve_structure_replace(local_part, f"local_{local_part}")
91
+ domain_anonymized = self._preserve_structure_replace(domain, f"domain_{domain}")
92
+ else:
93
+ # Simple length-preserving hash
94
+ local_length = len(local_part) if self.preserve_length else 8
95
+ domain_length = len(domain) if self.preserve_length else 8
96
+
97
+ local_anonymized = self._simple_hash_replace(local_part, local_length)
98
+ domain_anonymized = self._simple_hash_replace(domain, domain_length)
99
+
100
+ anonymized_email = f"{local_anonymized}@{domain_anonymized}"
101
+ self.email_cache[email] = anonymized_email
102
+
103
+ return anonymized_email
104
+
105
+ def anonymize_text(self, text: str) -> str:
106
+ """
107
+ Anonymize all emails in the given text while preserving format.
108
+ """
109
+ email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
110
+
111
+ def replace_email(match: re.Match[str]) -> str:
112
+ email = match.group(0)
113
+ return self._anonymize_email(email)
114
+
115
+ return re.sub(email_pattern, replace_email, text)
116
+
117
+ def get_mapping(self) -> Dict[str, str]:
118
+ """Return the mapping of original emails to anonymized versions."""
119
+ return self.email_cache.copy()
120
+
121
+
122
+ # Example usage and comparison
123
+ if __name__ == "__main__":
124
+ print("Format-Preserving Email Anonymization - Structure Preserving:\n")
125
+
126
+ # Structure-preserving anonymizer
127
+ anonymizer1 = FormatPreservingEmailAnonymizer(preserve_structure=True)
128
+
129
+ test_emails = [
130
+ "john@gmail.com",
131
+ "john@gmail.com",
132
+ "john@outlook.com",
133
+ "joe@outlook.com",
134
+ "user.name@company.co.uk",
135
+ "test-email@sub.example.org",
136
+ "Admin123@BigCorp.net",
137
+ "a@b.co",
138
+ ]
139
+
140
+ print("Structure-Preserving Mode:")
141
+ print("=" * 50)
142
+ for email in test_emails:
143
+ anonymized = anonymizer1._anonymize_email(email)
144
+ print(f"{email:25} -> {anonymized}")
145
+
146
+ print("\n" + "=" * 50)
147
+ print("Length-Preserving Mode:")
148
+ print("=" * 50)
149
+
150
+ # Length-preserving but simpler anonymizer
151
+ anonymizer2 = FormatPreservingEmailAnonymizer(preserve_length=True, preserve_structure=False)
152
+
153
+ for email in test_emails:
154
+ anonymized = anonymizer2._anonymize_email(email)
155
+ print(f"{email:25} -> {anonymized}")
156
+
157
+ # Test with full text
158
+ print("\n" + "=" * 70)
159
+ print("Full Text Anonymization Examples:")
160
+ print("=" * 70)
161
+
162
+ test_texts = [
163
+ "Hi, my name is John and my email is john@gmail.com",
164
+ "Contact: support@company.com or admin@BigCorp.net",
165
+ "Emails: user.name@test.co.uk, simple@domain.org",
166
+ ]
167
+
168
+ for text in test_texts:
169
+ anonymized = anonymizer1.anonymize_text(text)
170
+ print(f"Original: {text}")
171
+ print(f"Anonymized: {anonymized}")
172
+ print()
173
+
174
+ # Consistency test
175
+ print("Consistency Test:")
176
+ print("-" * 30)
177
+ email = "john@gmail.com"
178
+ result1 = anonymizer1._anonymize_email(email)
179
+ result2 = anonymizer1._anonymize_email(email)
180
+ print(f"First call: {email} -> {result1}")
181
+ print(f"Second call: {email} -> {result2}")
182
+ print(f"Consistent: {'✓' if result1 == result2 else '✗'}")
netra/config.py ADDED
@@ -0,0 +1,111 @@
1
+ import json
2
+ import os
3
+ from typing import Any, Dict, Optional
4
+
5
+ from opentelemetry.util.re import parse_env_headers
6
+
7
+ from netra.version import __version__
8
+
9
+
10
+ class Config:
11
+ """
12
+ Holds configuration options for the tracer:
13
+ - app_name: Logical name for this service
14
+ - otlp_endpoint: URL for OTLP collector
15
+ - api_key: API key for the collector (sent as Bearer token)
16
+ - headers: Additional headers (W3C Correlation-Context format)
17
+ - disable_batch: Whether to disable batch span processor (bool)
18
+ - trace_content: Whether to capture prompt/completion content (bool)
19
+ - resource_attributes: Custom resource attributes dict (e.g., {'env': 'prod', 'version': '1.0.0'})
20
+ """
21
+
22
+ # SDK Constants
23
+ SDK_NAME = "netra"
24
+ LIBRARY_NAME = "netra"
25
+ LIBRARY_VERSION = __version__
26
+
27
+ def __init__(
28
+ self,
29
+ app_name: Optional[str] = None,
30
+ headers: Optional[str] = None,
31
+ disable_batch: Optional[bool] = None,
32
+ trace_content: Optional[bool] = None,
33
+ resource_attributes: Optional[Dict[str, Any]] = None,
34
+ environment: Optional[str] = None,
35
+ ):
36
+ # Application name: from param, else env
37
+ self.app_name = (
38
+ app_name or os.getenv("OTEL_SERVICE_NAME") or os.getenv("NETRA_APP_NAME") or "llm_tracing_service"
39
+ )
40
+
41
+ # OTLP endpoint: if explicit param, else OTEL_EXPORTER_OTLP_ENDPOINT
42
+ self.otlp_endpoint = os.getenv("NETRA_OTLP_ENDPOINT") or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
43
+
44
+ # API key: if explicit param, else env NETRA_API_KEY
45
+ self.api_key = os.getenv("NETRA_API_KEY")
46
+ self.headers = {}
47
+
48
+ # Custom headers: comma-separated W3C format (if provided, overrides API key)
49
+ headers = headers or os.getenv("NETRA_HEADERS")
50
+
51
+ if isinstance(headers, str):
52
+ self.headers = parse_env_headers(headers)
53
+
54
+ if self.otlp_endpoint == "https://api.dev.getcombat.ai" and not self.api_key:
55
+ print("Error: Missing Netra API key, go to https://app.dev.getcombat.ai/api-key to create one")
56
+ print("Set the NETRA_API_KEY environment variable to the key")
57
+ return
58
+
59
+ # Handle API key authentication based on OTLP endpoint
60
+ if self.api_key and self.otlp_endpoint:
61
+ # For Netra endpoints, use x-api-key header
62
+ if "getcombat" in self.otlp_endpoint.lower():
63
+ if not self.headers:
64
+ self.headers = {"x-api-key": self.api_key}
65
+ elif "x-api-key" not in self.headers:
66
+ self.headers = {**self.headers, "x-api-key": self.api_key}
67
+ # For other endpoints, set up basic auth
68
+ else:
69
+ if not self.headers:
70
+ self.headers = {"Authorization": f"Bearer {self.api_key}"}
71
+ elif "Authorization" not in self.headers:
72
+ self.headers = {**self.headers, "Authorization": f"Bearer {self.api_key}"}
73
+
74
+ # Disable batch span processor?
75
+ if disable_batch is not None:
76
+ self.disable_batch = disable_batch
77
+ else:
78
+ # Environment var can be "true"/"false"
79
+ env_db = os.getenv("NETRA_DISABLE_BATCH")
80
+ self.disable_batch = True if (env_db is not None and env_db.lower() in ("1", "true")) else False
81
+
82
+ # Trace content (prompts/completions)? Default true unless env says false
83
+ if trace_content is not None:
84
+ self.trace_content = trace_content
85
+ else:
86
+ env_tc = os.getenv("NETRA_TRACE_CONTENT")
87
+ self.trace_content = False if (env_tc is not None and env_tc.lower() in ("0", "false")) else True
88
+
89
+ # 7. Environment: param override, else env
90
+ if environment is not None:
91
+ self.environment = environment
92
+ else:
93
+ self.environment = os.getenv("NETRA_ENV", "local")
94
+
95
+ # Resource attributes: param override, else parse JSON from env, else empty dict
96
+ if resource_attributes is not None:
97
+ self.resource_attributes = resource_attributes
98
+ else:
99
+ # Expecting something like: {"env":"prod","version":"1.0.0"}
100
+ env_ra = os.getenv("NETRA_RESOURCE_ATTRS")
101
+ if env_ra:
102
+ try:
103
+ self.resource_attributes = json.loads(env_ra)
104
+ except (json.JSONDecodeError, ValueError) as e:
105
+ import logging
106
+
107
+ logger = logging.getLogger(__name__)
108
+ logger.warning(f"Failed to parse NETRA_RESOURCE_ATTRS: {e}")
109
+ self.resource_attributes = {}
110
+ else:
111
+ self.resource_attributes = {}