netra-sdk 0.1.19__tar.gz → 0.1.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of netra-sdk might be problematic. Click here for more details.
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/PKG-INFO +116 -97
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/README.md +113 -96
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/pii.py +152 -4
- netra_sdk-0.1.20/netra/version.py +1 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/pyproject.toml +3 -1
- netra_sdk-0.1.19/netra/version.py +0 -1
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/LICENCE +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/anonymizer.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/base.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/fp_anonymizer.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/config.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/decorators.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/injection.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/pii.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/input_scanner.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/aiohttp/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/aiohttp/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/cohere/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/cohere/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/fastapi/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/fastapi/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/config.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/utils.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/httpx/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/httpx/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/instruments.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/config.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/utils.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/wrappers.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/weaviate/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/weaviate/version.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/processors/__init__.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/processors/session_span_processor.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/scanner.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/session_manager.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/span_wrapper.py +0 -0
- {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/tracer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: netra-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.20
|
|
4
4
|
Summary: A Python SDK for AI application observability that provides OpenTelemetry-based monitoring, tracing, and PII protection for LLM and vector database applications. Enables easy instrumentation, session tracking, and privacy-focused data collection for AI systems in production environments.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: netra,tracing,observability,sdk,ai,llm,vector,database
|
|
@@ -69,7 +69,9 @@ Requires-Dist: opentelemetry-instrumentation-urllib3 (>=0.55b1,<1.0.0)
|
|
|
69
69
|
Requires-Dist: opentelemetry-sdk (>=1.34.0,<2.0.0)
|
|
70
70
|
Requires-Dist: presidio-analyzer (==2.2.358) ; extra == "presidio"
|
|
71
71
|
Requires-Dist: presidio-anonymizer (==2.2.358) ; extra == "presidio"
|
|
72
|
+
Requires-Dist: stanza (>=1.10.1,<2.0.0) ; extra == "presidio"
|
|
72
73
|
Requires-Dist: traceloop-sdk (>=0.40.7,<0.43.0)
|
|
74
|
+
Requires-Dist: transformers (==4.51.3) ; extra == "presidio"
|
|
73
75
|
Project-URL: Bug Tracker, https://github.com/KeyValueSoftwareSystems/netra-sdk-py/issues
|
|
74
76
|
Project-URL: Documentation, https://github.com/KeyValueSoftwareSystems/netra-sdk-py/blob/main/README.md
|
|
75
77
|
Project-URL: Homepage, https://github.com/KeyValueSoftwareSystems/netra-sdk-py
|
|
@@ -331,6 +333,119 @@ print(f"Masked text: {result.masked_text}")
|
|
|
331
333
|
print(f"PII entities: {result.pii_entities}")
|
|
332
334
|
```
|
|
333
335
|
|
|
336
|
+
#### Custom Models for PII Detection
|
|
337
|
+
|
|
338
|
+
The `PresidioPIIDetector` supports custom NLP models through the `nlp_configuration` parameter, allowing you to use specialized models for improved PII detection accuracy. You can configure custom spaCy, Stanza, or transformers models:
|
|
339
|
+
|
|
340
|
+
##### NLP Configuration Example
|
|
341
|
+
|
|
342
|
+
Follow this configuration structure to provide your custom models.
|
|
343
|
+
```python
|
|
344
|
+
nlp_configuration = {
|
|
345
|
+
"nlp_engine_name": "spacy|stanza|transformers",
|
|
346
|
+
"models": [
|
|
347
|
+
{
|
|
348
|
+
"lang_code": "en", # Language code
|
|
349
|
+
"model_name": "model_identifier" # Varies by engine type
|
|
350
|
+
}
|
|
351
|
+
],
|
|
352
|
+
"ner_model_configuration": { # Optional, mainly for transformers
|
|
353
|
+
# Additional configuration options
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
##### Using Custom spaCy Models
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
from netra.pii import PresidioPIIDetector
|
|
362
|
+
|
|
363
|
+
# Configure custom spaCy model
|
|
364
|
+
spacy_config = {
|
|
365
|
+
"nlp_engine_name": "spacy",
|
|
366
|
+
"models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
detector = PresidioPIIDetector(
|
|
370
|
+
nlp_configuration=spacy_config,
|
|
371
|
+
action_type="MASK",
|
|
372
|
+
score_threshold=0.8
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
text = "Dr. Sarah Wilson works at 123 Main St, New York"
|
|
376
|
+
result = detector.detect(text)
|
|
377
|
+
print(f"Detected entities: {result.pii_entities}")
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
##### Using Stanza Models
|
|
381
|
+
|
|
382
|
+
```python
|
|
383
|
+
from netra.pii import PresidioPIIDetector
|
|
384
|
+
|
|
385
|
+
# Configure Stanza model
|
|
386
|
+
stanza_config = {
|
|
387
|
+
"nlp_engine_name": "stanza",
|
|
388
|
+
"models": [{"lang_code": "en", "model_name": "en"}]
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
detector = PresidioPIIDetector(
|
|
392
|
+
nlp_configuration=stanza_config,
|
|
393
|
+
action_type="FLAG"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
text = "Contact Alice Smith at alice@company.com"
|
|
397
|
+
result = detector.detect(text)
|
|
398
|
+
print(f"PII detected: {result.has_pii}")
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
##### Using Transformers Models
|
|
402
|
+
|
|
403
|
+
For advanced NER capabilities, you can use transformer-based models:
|
|
404
|
+
|
|
405
|
+
```python
|
|
406
|
+
from netra.pii import PresidioPIIDetector
|
|
407
|
+
|
|
408
|
+
# Configure transformers model with entity mapping
|
|
409
|
+
transformers_config = {
|
|
410
|
+
"nlp_engine_name": "transformers",
|
|
411
|
+
"models": [{
|
|
412
|
+
"lang_code": "en",
|
|
413
|
+
"model_name": {
|
|
414
|
+
"spacy": "en_core_web_sm",
|
|
415
|
+
"transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
|
|
416
|
+
}
|
|
417
|
+
}],
|
|
418
|
+
"ner_model_configuration": {
|
|
419
|
+
"labels_to_ignore": ["O"],
|
|
420
|
+
"model_to_presidio_entity_mapping": {
|
|
421
|
+
"PER": "PERSON",
|
|
422
|
+
"LOC": "LOCATION",
|
|
423
|
+
"ORG": "ORGANIZATION",
|
|
424
|
+
"MISC": "MISC"
|
|
425
|
+
},
|
|
426
|
+
"low_confidence_score_multiplier": 0.4,
|
|
427
|
+
"low_score_entity_names": ["ORG"]
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
detector = PresidioPIIDetector(
|
|
432
|
+
nlp_configuration=transformers_config,
|
|
433
|
+
action_type="MASK"
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
text = "Microsoft Corporation is located in Redmond, Washington"
|
|
437
|
+
result = detector.detect(text)
|
|
438
|
+
print(f"Masked text: {result.masked_text}")
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
**Note**: Custom model configuration allows for:
|
|
444
|
+
- **Better accuracy** with domain-specific models
|
|
445
|
+
- **Multi-language support** by specifying different language codes
|
|
446
|
+
- **Fine-tuned models** trained on your specific data
|
|
447
|
+
- **Performance optimization** by choosing models suited to your use case
|
|
448
|
+
|
|
334
449
|
#### Regex-based Detection
|
|
335
450
|
```python
|
|
336
451
|
from netra.pii import RegexPIIDetector
|
|
@@ -555,102 +670,6 @@ Configuration values are resolved in the following order (highest to lowest prec
|
|
|
555
670
|
4. **Default Values**: Fallback values defined in the SDK
|
|
556
671
|
|
|
557
672
|
This allows you to:
|
|
558
|
-
- Override any setting directly in code for maximum control
|
|
559
|
-
- Use Netra-specific environment variables for Netra-specific settings
|
|
560
|
-
- Fall back to standard OpenTelemetry variables for compatibility
|
|
561
|
-
- Rely on sensible defaults when no other configuration is provided
|
|
562
|
-
|
|
563
|
-
**Example**:
|
|
564
|
-
```bash
|
|
565
|
-
export NETRA_APP_NAME="my-ai-service"
|
|
566
|
-
export NETRA_OTLP_ENDPOINT="https://collector.example.com:4318"
|
|
567
|
-
export NETRA_API_KEY="your-api-key-here"
|
|
568
|
-
export NETRA_ENV="production"
|
|
569
|
-
export NETRA_RESOURCE_ATTRS='{"team":"ai", "version":"1.0.0"}'
|
|
570
|
-
```
|
|
571
|
-
|
|
572
|
-
### Programmatic Configuration
|
|
573
|
-
|
|
574
|
-
You can also configure the SDK programmatically when initializing:
|
|
575
|
-
|
|
576
|
-
```python
|
|
577
|
-
from netra import Netra
|
|
578
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
579
|
-
|
|
580
|
-
Netra.init(
|
|
581
|
-
app_name="my-ai-service",
|
|
582
|
-
environment="production",
|
|
583
|
-
resource_attributes={"team": "ai", "version": "1.0.0"},
|
|
584
|
-
trace_content=True,
|
|
585
|
-
disable_batch=False,
|
|
586
|
-
instruments={InstrumentSet.OPENAI}
|
|
587
|
-
)
|
|
588
|
-
```
|
|
589
|
-
|
|
590
|
-
### Custom Instrumentation Selection
|
|
591
|
-
|
|
592
|
-
Control which instrumentations are enabled:
|
|
593
|
-
|
|
594
|
-
```python
|
|
595
|
-
from netra import Netra
|
|
596
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
597
|
-
|
|
598
|
-
# Enable specific instruments
|
|
599
|
-
Netra.init(
|
|
600
|
-
app_name="Selective App",
|
|
601
|
-
instruments={
|
|
602
|
-
InstrumentSet.OPENAI,
|
|
603
|
-
InstrumentSet.WEAVIATEDB,
|
|
604
|
-
InstrumentSet.FASTAPI
|
|
605
|
-
}
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
# Block specific instruments
|
|
609
|
-
Netra.init(
|
|
610
|
-
app_name="Blocked App",
|
|
611
|
-
block_instruments={
|
|
612
|
-
InstrumentSet.HTTPX, # Don't trace HTTPX calls
|
|
613
|
-
InstrumentSet.REDIS # Don't trace Redis operations
|
|
614
|
-
}
|
|
615
|
-
)
|
|
616
|
-
```
|
|
617
|
-
|
|
618
|
-
### 🌐 Custom Endpoint Integration
|
|
619
|
-
|
|
620
|
-
Since Netra SDK follows the **OpenTelemetry standard**, you can integrate it with any OpenTelemetry-compatible observability backend:
|
|
621
|
-
|
|
622
|
-
#### Popular OpenTelemetry Backends
|
|
623
|
-
- **Jaeger** - Distributed tracing platform
|
|
624
|
-
- **Zipkin** - Distributed tracing system
|
|
625
|
-
- **Prometheus** - Monitoring and alerting toolkit
|
|
626
|
-
- **Grafana** - Observability and data visualization
|
|
627
|
-
- **New Relic** - Full-stack observability platform
|
|
628
|
-
- **Datadog** - Monitoring and analytics platform
|
|
629
|
-
- **Honeycomb** - Observability for complex systems
|
|
630
|
-
- **Lightstep** - Distributed tracing and observability
|
|
631
|
-
- **AWS X-Ray** - Distributed tracing service
|
|
632
|
-
- **Google Cloud Trace** - Distributed tracing system
|
|
633
|
-
|
|
634
|
-
#### Custom Endpoint Configuration
|
|
635
|
-
|
|
636
|
-
**Recommended: Environment Variable Configuration (No Code Changes Required)**
|
|
637
|
-
```bash
|
|
638
|
-
# Set custom OTLP endpoint via environment variables
|
|
639
|
-
export NETRA_OTLP_ENDPOINT="https://your-custom-backend.com/v1/traces"
|
|
640
|
-
export NETRA_HEADERS="authorization=Bearer your-token"
|
|
641
|
-
|
|
642
|
-
```
|
|
643
|
-
|
|
644
|
-
```python
|
|
645
|
-
from netra import Netra
|
|
646
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
647
|
-
|
|
648
|
-
# Simple initialization - SDK automatically picks up environment variables
|
|
649
|
-
Netra.init(app_name="Your App", instruments={InstrumentSet})
|
|
650
|
-
# No endpoint configuration needed in code!
|
|
651
|
-
```
|
|
652
|
-
|
|
653
|
-
#### Benefits of OpenTelemetry Compatibility
|
|
654
673
|
- **🔄 Vendor Agnostic**: Switch between observability platforms without code changes
|
|
655
674
|
- **📊 Standard Format**: Consistent telemetry data across all tools
|
|
656
675
|
- **🔧 Flexible Integration**: Works with existing observability infrastructure
|
|
@@ -253,6 +253,119 @@ print(f"Masked text: {result.masked_text}")
|
|
|
253
253
|
print(f"PII entities: {result.pii_entities}")
|
|
254
254
|
```
|
|
255
255
|
|
|
256
|
+
#### Custom Models for PII Detection
|
|
257
|
+
|
|
258
|
+
The `PresidioPIIDetector` supports custom NLP models through the `nlp_configuration` parameter, allowing you to use specialized models for improved PII detection accuracy. You can configure custom spaCy, Stanza, or transformers models:
|
|
259
|
+
|
|
260
|
+
##### NLP Configuration Example
|
|
261
|
+
|
|
262
|
+
Follow this configuration structure to provide your custom models.
|
|
263
|
+
```python
|
|
264
|
+
nlp_configuration = {
|
|
265
|
+
"nlp_engine_name": "spacy|stanza|transformers",
|
|
266
|
+
"models": [
|
|
267
|
+
{
|
|
268
|
+
"lang_code": "en", # Language code
|
|
269
|
+
"model_name": "model_identifier" # Varies by engine type
|
|
270
|
+
}
|
|
271
|
+
],
|
|
272
|
+
"ner_model_configuration": { # Optional, mainly for transformers
|
|
273
|
+
# Additional configuration options
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
##### Using Custom spaCy Models
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
from netra.pii import PresidioPIIDetector
|
|
282
|
+
|
|
283
|
+
# Configure custom spaCy model
|
|
284
|
+
spacy_config = {
|
|
285
|
+
"nlp_engine_name": "spacy",
|
|
286
|
+
"models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
detector = PresidioPIIDetector(
|
|
290
|
+
nlp_configuration=spacy_config,
|
|
291
|
+
action_type="MASK",
|
|
292
|
+
score_threshold=0.8
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
text = "Dr. Sarah Wilson works at 123 Main St, New York"
|
|
296
|
+
result = detector.detect(text)
|
|
297
|
+
print(f"Detected entities: {result.pii_entities}")
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
##### Using Stanza Models
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
from netra.pii import PresidioPIIDetector
|
|
304
|
+
|
|
305
|
+
# Configure Stanza model
|
|
306
|
+
stanza_config = {
|
|
307
|
+
"nlp_engine_name": "stanza",
|
|
308
|
+
"models": [{"lang_code": "en", "model_name": "en"}]
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
detector = PresidioPIIDetector(
|
|
312
|
+
nlp_configuration=stanza_config,
|
|
313
|
+
action_type="FLAG"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
text = "Contact Alice Smith at alice@company.com"
|
|
317
|
+
result = detector.detect(text)
|
|
318
|
+
print(f"PII detected: {result.has_pii}")
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
##### Using Transformers Models
|
|
322
|
+
|
|
323
|
+
For advanced NER capabilities, you can use transformer-based models:
|
|
324
|
+
|
|
325
|
+
```python
|
|
326
|
+
from netra.pii import PresidioPIIDetector
|
|
327
|
+
|
|
328
|
+
# Configure transformers model with entity mapping
|
|
329
|
+
transformers_config = {
|
|
330
|
+
"nlp_engine_name": "transformers",
|
|
331
|
+
"models": [{
|
|
332
|
+
"lang_code": "en",
|
|
333
|
+
"model_name": {
|
|
334
|
+
"spacy": "en_core_web_sm",
|
|
335
|
+
"transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
|
|
336
|
+
}
|
|
337
|
+
}],
|
|
338
|
+
"ner_model_configuration": {
|
|
339
|
+
"labels_to_ignore": ["O"],
|
|
340
|
+
"model_to_presidio_entity_mapping": {
|
|
341
|
+
"PER": "PERSON",
|
|
342
|
+
"LOC": "LOCATION",
|
|
343
|
+
"ORG": "ORGANIZATION",
|
|
344
|
+
"MISC": "MISC"
|
|
345
|
+
},
|
|
346
|
+
"low_confidence_score_multiplier": 0.4,
|
|
347
|
+
"low_score_entity_names": ["ORG"]
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
detector = PresidioPIIDetector(
|
|
352
|
+
nlp_configuration=transformers_config,
|
|
353
|
+
action_type="MASK"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
text = "Microsoft Corporation is located in Redmond, Washington"
|
|
357
|
+
result = detector.detect(text)
|
|
358
|
+
print(f"Masked text: {result.masked_text}")
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
**Note**: Custom model configuration allows for:
|
|
364
|
+
- **Better accuracy** with domain-specific models
|
|
365
|
+
- **Multi-language support** by specifying different language codes
|
|
366
|
+
- **Fine-tuned models** trained on your specific data
|
|
367
|
+
- **Performance optimization** by choosing models suited to your use case
|
|
368
|
+
|
|
256
369
|
#### Regex-based Detection
|
|
257
370
|
```python
|
|
258
371
|
from netra.pii import RegexPIIDetector
|
|
@@ -477,102 +590,6 @@ Configuration values are resolved in the following order (highest to lowest prec
|
|
|
477
590
|
4. **Default Values**: Fallback values defined in the SDK
|
|
478
591
|
|
|
479
592
|
This allows you to:
|
|
480
|
-
- Override any setting directly in code for maximum control
|
|
481
|
-
- Use Netra-specific environment variables for Netra-specific settings
|
|
482
|
-
- Fall back to standard OpenTelemetry variables for compatibility
|
|
483
|
-
- Rely on sensible defaults when no other configuration is provided
|
|
484
|
-
|
|
485
|
-
**Example**:
|
|
486
|
-
```bash
|
|
487
|
-
export NETRA_APP_NAME="my-ai-service"
|
|
488
|
-
export NETRA_OTLP_ENDPOINT="https://collector.example.com:4318"
|
|
489
|
-
export NETRA_API_KEY="your-api-key-here"
|
|
490
|
-
export NETRA_ENV="production"
|
|
491
|
-
export NETRA_RESOURCE_ATTRS='{"team":"ai", "version":"1.0.0"}'
|
|
492
|
-
```
|
|
493
|
-
|
|
494
|
-
### Programmatic Configuration
|
|
495
|
-
|
|
496
|
-
You can also configure the SDK programmatically when initializing:
|
|
497
|
-
|
|
498
|
-
```python
|
|
499
|
-
from netra import Netra
|
|
500
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
501
|
-
|
|
502
|
-
Netra.init(
|
|
503
|
-
app_name="my-ai-service",
|
|
504
|
-
environment="production",
|
|
505
|
-
resource_attributes={"team": "ai", "version": "1.0.0"},
|
|
506
|
-
trace_content=True,
|
|
507
|
-
disable_batch=False,
|
|
508
|
-
instruments={InstrumentSet.OPENAI}
|
|
509
|
-
)
|
|
510
|
-
```
|
|
511
|
-
|
|
512
|
-
### Custom Instrumentation Selection
|
|
513
|
-
|
|
514
|
-
Control which instrumentations are enabled:
|
|
515
|
-
|
|
516
|
-
```python
|
|
517
|
-
from netra import Netra
|
|
518
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
519
|
-
|
|
520
|
-
# Enable specific instruments
|
|
521
|
-
Netra.init(
|
|
522
|
-
app_name="Selective App",
|
|
523
|
-
instruments={
|
|
524
|
-
InstrumentSet.OPENAI,
|
|
525
|
-
InstrumentSet.WEAVIATEDB,
|
|
526
|
-
InstrumentSet.FASTAPI
|
|
527
|
-
}
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
# Block specific instruments
|
|
531
|
-
Netra.init(
|
|
532
|
-
app_name="Blocked App",
|
|
533
|
-
block_instruments={
|
|
534
|
-
InstrumentSet.HTTPX, # Don't trace HTTPX calls
|
|
535
|
-
InstrumentSet.REDIS # Don't trace Redis operations
|
|
536
|
-
}
|
|
537
|
-
)
|
|
538
|
-
```
|
|
539
|
-
|
|
540
|
-
### 🌐 Custom Endpoint Integration
|
|
541
|
-
|
|
542
|
-
Since Netra SDK follows the **OpenTelemetry standard**, you can integrate it with any OpenTelemetry-compatible observability backend:
|
|
543
|
-
|
|
544
|
-
#### Popular OpenTelemetry Backends
|
|
545
|
-
- **Jaeger** - Distributed tracing platform
|
|
546
|
-
- **Zipkin** - Distributed tracing system
|
|
547
|
-
- **Prometheus** - Monitoring and alerting toolkit
|
|
548
|
-
- **Grafana** - Observability and data visualization
|
|
549
|
-
- **New Relic** - Full-stack observability platform
|
|
550
|
-
- **Datadog** - Monitoring and analytics platform
|
|
551
|
-
- **Honeycomb** - Observability for complex systems
|
|
552
|
-
- **Lightstep** - Distributed tracing and observability
|
|
553
|
-
- **AWS X-Ray** - Distributed tracing service
|
|
554
|
-
- **Google Cloud Trace** - Distributed tracing system
|
|
555
|
-
|
|
556
|
-
#### Custom Endpoint Configuration
|
|
557
|
-
|
|
558
|
-
**Recommended: Environment Variable Configuration (No Code Changes Required)**
|
|
559
|
-
```bash
|
|
560
|
-
# Set custom OTLP endpoint via environment variables
|
|
561
|
-
export NETRA_OTLP_ENDPOINT="https://your-custom-backend.com/v1/traces"
|
|
562
|
-
export NETRA_HEADERS="authorization=Bearer your-token"
|
|
563
|
-
|
|
564
|
-
```
|
|
565
|
-
|
|
566
|
-
```python
|
|
567
|
-
from netra import Netra
|
|
568
|
-
from netra.instrumentation.instruments import InstrumentSet
|
|
569
|
-
|
|
570
|
-
# Simple initialization - SDK automatically picks up environment variables
|
|
571
|
-
Netra.init(app_name="Your App", instruments={InstrumentSet})
|
|
572
|
-
# No endpoint configuration needed in code!
|
|
573
|
-
```
|
|
574
|
-
|
|
575
|
-
#### Benefits of OpenTelemetry Compatibility
|
|
576
593
|
- **🔄 Vendor Agnostic**: Switch between observability platforms without code changes
|
|
577
594
|
- **📊 Standard Format**: Consistent telemetry data across all tools
|
|
578
595
|
- **🔧 Flexible Integration**: Works with existing observability infrastructure
|
|
@@ -577,7 +577,7 @@ class PresidioPIIDetector(PIIDetector):
|
|
|
577
577
|
call Presidio's Analyzer + Anonymizer on a string.
|
|
578
578
|
|
|
579
579
|
Examples:
|
|
580
|
-
# Using default
|
|
580
|
+
# Using default configuration
|
|
581
581
|
detector = PresidioPIIDetector()
|
|
582
582
|
result = detector.detect("My email is john@example.com")
|
|
583
583
|
|
|
@@ -592,6 +592,41 @@ class PresidioPIIDetector(PIIDetector):
|
|
|
592
592
|
action_type="MASK",
|
|
593
593
|
score_threshold=0.8
|
|
594
594
|
)
|
|
595
|
+
|
|
596
|
+
# Using custom spaCy model configuration
|
|
597
|
+
spacy_config = {
|
|
598
|
+
"nlp_engine_name": "spacy",
|
|
599
|
+
"models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
|
|
600
|
+
}
|
|
601
|
+
detector = PresidioPIIDetector(nlp_configuration=spacy_config)
|
|
602
|
+
|
|
603
|
+
# Using Stanza model configuration
|
|
604
|
+
stanza_config = {
|
|
605
|
+
"nlp_engine_name": "stanza",
|
|
606
|
+
"models": [{"lang_code": "en", "model_name": "en"}]
|
|
607
|
+
}
|
|
608
|
+
detector = PresidioPIIDetector(nlp_configuration=stanza_config)
|
|
609
|
+
|
|
610
|
+
# Using transformers model configuration
|
|
611
|
+
transformers_config = {
|
|
612
|
+
"nlp_engine_name": "transformers",
|
|
613
|
+
"models": [{
|
|
614
|
+
"lang_code": "en",
|
|
615
|
+
"model_name": {
|
|
616
|
+
"spacy": "en_core_web_sm",
|
|
617
|
+
"transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
|
|
618
|
+
}
|
|
619
|
+
}],
|
|
620
|
+
"ner_model_configuration": {
|
|
621
|
+
"labels_to_ignore": ["O"],
|
|
622
|
+
"model_to_presidio_entity_mapping": {
|
|
623
|
+
"PER": "PERSON",
|
|
624
|
+
"LOC": "LOCATION",
|
|
625
|
+
"ORG": "ORGANIZATION"
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
detector = PresidioPIIDetector(nlp_configuration=transformers_config)
|
|
595
630
|
"""
|
|
596
631
|
|
|
597
632
|
def __init__(
|
|
@@ -602,7 +637,35 @@ class PresidioPIIDetector(PIIDetector):
|
|
|
602
637
|
action_type: Optional[Literal["BLOCK", "FLAG", "MASK"]] = None,
|
|
603
638
|
anonymizer_cache_size: int = 1000,
|
|
604
639
|
hash_function: Optional[Callable[[str], str]] = None,
|
|
640
|
+
nlp_configuration: Optional[Dict[str, Any]] = None,
|
|
605
641
|
) -> None:
|
|
642
|
+
"""
|
|
643
|
+
Initialize the Presidio PII detector.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
entities: List of entity types to detect. If None, uses DEFAULT_ENTITIES.
|
|
647
|
+
language: Language code for detection (default: "en").
|
|
648
|
+
score_threshold: Minimum confidence score for detections (default: 0.6).
|
|
649
|
+
action_type: Action to take when PII is detected ("BLOCK", "FLAG", "MASK").
|
|
650
|
+
anonymizer_cache_size: Size of the anonymizer cache (default: 1000).
|
|
651
|
+
hash_function: Custom hash function for anonymization.
|
|
652
|
+
nlp_configuration: Dictionary containing NLP engine configuration.
|
|
653
|
+
Format: {
|
|
654
|
+
"nlp_engine_name": "spacy|stanza|transformers",
|
|
655
|
+
"models": [{"lang_code": "en", "model_name": "model_name"}],
|
|
656
|
+
"ner_model_configuration": {...} # Optional, for transformers
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
For spaCy and Stanza:
|
|
660
|
+
- model_name should be a string (e.g., "en_core_web_lg", "en")
|
|
661
|
+
|
|
662
|
+
For transformers:
|
|
663
|
+
- model_name should be a dict with "spacy" and "transformers" keys
|
|
664
|
+
- Example: {"spacy": "en_core_web_sm", "transformers": "model_path"}
|
|
665
|
+
|
|
666
|
+
Raises:
|
|
667
|
+
ImportError: If presidio-analyzer is not installed or required NLP library is missing.
|
|
668
|
+
"""
|
|
606
669
|
if action_type is None:
|
|
607
670
|
action_type = "FLAG"
|
|
608
671
|
env_action = os.getenv("NETRA_ACTION_TYPE", "FLAG")
|
|
@@ -610,18 +673,99 @@ class PresidioPIIDetector(PIIDetector):
|
|
|
610
673
|
if env_action in ["BLOCK", "FLAG", "MASK"]:
|
|
611
674
|
action_type = cast(Literal["BLOCK", "FLAG", "MASK"], env_action)
|
|
612
675
|
super().__init__(action_type=action_type)
|
|
676
|
+
|
|
677
|
+
# Import presidio-analyzer
|
|
613
678
|
try:
|
|
614
679
|
from presidio_analyzer import AnalyzerEngine # noqa: F401
|
|
615
680
|
except ImportError as exc:
|
|
616
|
-
raise ImportError("Presidio-based PII detection requires: presidio-analyzer.
|
|
681
|
+
raise ImportError("Presidio-based PII detection requires: presidio-analyzer. Install via pip.") from exc
|
|
617
682
|
|
|
618
683
|
self.language: str = language
|
|
619
684
|
self.entities: Optional[List[str]] = entities if entities else DEFAULT_ENTITIES
|
|
620
685
|
self.score_threshold: float = score_threshold
|
|
621
686
|
|
|
622
|
-
|
|
687
|
+
# Initialize AnalyzerEngine with custom or default NLP engine
|
|
688
|
+
if nlp_configuration is not None:
|
|
689
|
+
self.analyzer = self._create_analyzer_with_custom_nlp(nlp_configuration)
|
|
690
|
+
else:
|
|
691
|
+
# Use default AnalyzerEngine
|
|
692
|
+
self.analyzer = AnalyzerEngine()
|
|
693
|
+
|
|
623
694
|
self.anonymizer = Anonymizer(hash_function=hash_function, cache_size=anonymizer_cache_size)
|
|
624
695
|
|
|
696
|
+
def _create_analyzer_with_custom_nlp(self, nlp_configuration: Dict[str, Any]) -> Any:
|
|
697
|
+
"""
|
|
698
|
+
Create an AnalyzerEngine with custom NLP configuration.
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
nlp_configuration: Dictionary containing NLP engine configuration.
|
|
702
|
+
|
|
703
|
+
Returns:
|
|
704
|
+
AnalyzerEngine instance with custom NLP engine.
|
|
705
|
+
|
|
706
|
+
Raises:
|
|
707
|
+
ImportError: If required NLP library is not available.
|
|
708
|
+
"""
|
|
709
|
+
try:
|
|
710
|
+
from presidio_analyzer import AnalyzerEngine
|
|
711
|
+
from presidio_analyzer.nlp_engine import NlpEngineProvider
|
|
712
|
+
except ImportError as exc:
|
|
713
|
+
raise ImportError("Presidio-based PII detection requires: presidio-analyzer. Install via pip.") from exc
|
|
714
|
+
|
|
715
|
+
# Validate and prepare configuration
|
|
716
|
+
engine_name = nlp_configuration.get("nlp_engine_name", "").lower()
|
|
717
|
+
|
|
718
|
+
# Perform lazy imports based on engine type
|
|
719
|
+
if engine_name == "spacy":
|
|
720
|
+
self._ensure_spacy_available()
|
|
721
|
+
elif engine_name == "stanza":
|
|
722
|
+
self._ensure_stanza_available()
|
|
723
|
+
elif engine_name == "transformers":
|
|
724
|
+
self._ensure_transformers_available()
|
|
725
|
+
else:
|
|
726
|
+
# Default behavior - let Presidio handle it
|
|
727
|
+
pass
|
|
728
|
+
|
|
729
|
+
# Create NLP engine from configuration
|
|
730
|
+
provider = NlpEngineProvider(nlp_configuration=nlp_configuration)
|
|
731
|
+
custom_nlp_engine = provider.create_engine()
|
|
732
|
+
|
|
733
|
+
# Extract supported languages from configuration
|
|
734
|
+
supported_languages = [self.language]
|
|
735
|
+
if "models" in nlp_configuration:
|
|
736
|
+
supported_languages = [model["lang_code"] for model in nlp_configuration["models"]]
|
|
737
|
+
|
|
738
|
+
return AnalyzerEngine(nlp_engine=custom_nlp_engine, supported_languages=supported_languages)
|
|
739
|
+
|
|
740
|
+
def _ensure_spacy_available(self) -> None:
|
|
741
|
+
"""Ensure spaCy is available when needed."""
|
|
742
|
+
try:
|
|
743
|
+
import spacy # noqa: F401
|
|
744
|
+
except ImportError as exc:
|
|
745
|
+
raise ImportError(
|
|
746
|
+
"spaCy is required for spaCy-based PII detection. Install via: pip install spacy"
|
|
747
|
+
) from exc
|
|
748
|
+
|
|
749
|
+
def _ensure_stanza_available(self) -> None:
|
|
750
|
+
"""Ensure Stanza is available when needed."""
|
|
751
|
+
try:
|
|
752
|
+
import stanza # noqa: F401
|
|
753
|
+
except ImportError as exc:
|
|
754
|
+
raise ImportError(
|
|
755
|
+
"Stanza is required for Stanza-based PII detection. Install via: pip install stanza"
|
|
756
|
+
) from exc
|
|
757
|
+
|
|
758
|
+
def _ensure_transformers_available(self) -> None:
|
|
759
|
+
"""Ensure transformers is available when needed."""
|
|
760
|
+
try:
|
|
761
|
+
import torch # noqa: F401
|
|
762
|
+
import transformers # noqa: F401
|
|
763
|
+
except ImportError as exc:
|
|
764
|
+
raise ImportError(
|
|
765
|
+
"Transformers and PyTorch are required for transformers-based PII detection. "
|
|
766
|
+
"Install via: pip install transformers torch"
|
|
767
|
+
) from exc
|
|
768
|
+
|
|
625
769
|
def _detect_pii(self, text: str) -> Tuple[bool, Counter[str], str, Dict[str, str]]:
|
|
626
770
|
"""
|
|
627
771
|
Detect PII in a single message.
|
|
@@ -666,6 +810,7 @@ def get_default_detector(
|
|
|
666
810
|
action_type: Optional[Literal["BLOCK", "FLAG", "MASK"]] = None,
|
|
667
811
|
entities: Optional[List[str]] = None,
|
|
668
812
|
hash_function: Optional[Callable[[str], str]] = None,
|
|
813
|
+
nlp_configuration: Optional[Dict[str, Any]] = None,
|
|
669
814
|
) -> PIIDetector:
|
|
670
815
|
"""
|
|
671
816
|
Returns a default PII detector instance (Presidio-based by default).
|
|
@@ -678,8 +823,11 @@ def get_default_detector(
|
|
|
678
823
|
- "MASK": Replace PII with mask tokens (default)
|
|
679
824
|
entities: Optional list of entity types to detect. If None, uses Presidio's default entities
|
|
680
825
|
hash_function: Optional custom hash function for anonymization. If None, uses default hash function.
|
|
826
|
+
nlp_configuration: Dictionary containing NLP engine configuration for custom models.
|
|
681
827
|
"""
|
|
682
|
-
return PresidioPIIDetector(
|
|
828
|
+
return PresidioPIIDetector(
|
|
829
|
+
action_type=action_type, entities=entities, hash_function=hash_function, nlp_configuration=nlp_configuration
|
|
830
|
+
)
|
|
683
831
|
|
|
684
832
|
|
|
685
833
|
# ---------------------------------------------------------------------------- #
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.20"
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "netra-sdk"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.20"
|
|
8
8
|
description = "A Python SDK for AI application observability that provides OpenTelemetry-based monitoring, tracing, and PII protection for LLM and vector database applications. Enables easy instrumentation, session tracking, and privacy-focused data collection for AI systems in production environments."
|
|
9
9
|
authors = [
|
|
10
10
|
{name = "Sooraj Thomas",email = "sooraj@keyvalue.systems"}
|
|
@@ -95,6 +95,8 @@ llm_guard = [
|
|
|
95
95
|
presidio = [
|
|
96
96
|
"presidio-analyzer==2.2.358",
|
|
97
97
|
"presidio-anonymizer==2.2.358",
|
|
98
|
+
"transformers==4.51.3",
|
|
99
|
+
"stanza>=1.10.1,<2.0.0"
|
|
98
100
|
]
|
|
99
101
|
|
|
100
102
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.19"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|