netra-sdk 0.1.19__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of netra-sdk might be problematic. Click here for more details.

Files changed (47) hide show
  1. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/PKG-INFO +116 -97
  2. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/README.md +113 -96
  3. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/pii.py +152 -4
  4. netra_sdk-0.1.20/netra/version.py +1 -0
  5. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/pyproject.toml +3 -1
  6. netra_sdk-0.1.19/netra/version.py +0 -1
  7. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/LICENCE +0 -0
  8. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/__init__.py +0 -0
  9. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/__init__.py +0 -0
  10. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/anonymizer.py +0 -0
  11. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/base.py +0 -0
  12. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/anonymizer/fp_anonymizer.py +0 -0
  13. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/config.py +0 -0
  14. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/decorators.py +0 -0
  15. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/__init__.py +0 -0
  16. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/injection.py +0 -0
  17. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/exceptions/pii.py +0 -0
  18. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/input_scanner.py +0 -0
  19. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/__init__.py +0 -0
  20. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/aiohttp/__init__.py +0 -0
  21. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/aiohttp/version.py +0 -0
  22. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/cohere/__init__.py +0 -0
  23. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/cohere/version.py +0 -0
  24. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/fastapi/__init__.py +0 -0
  25. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/fastapi/version.py +0 -0
  26. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/__init__.py +0 -0
  27. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/config.py +0 -0
  28. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/utils.py +0 -0
  29. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/google_genai/version.py +0 -0
  30. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/httpx/__init__.py +0 -0
  31. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/httpx/version.py +0 -0
  32. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/instruments.py +0 -0
  33. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/__init__.py +0 -0
  34. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/config.py +0 -0
  35. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/utils.py +0 -0
  36. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/mistralai/version.py +0 -0
  37. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/__init__.py +0 -0
  38. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/version.py +0 -0
  39. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/openai/wrappers.py +0 -0
  40. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/weaviate/__init__.py +0 -0
  41. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/instrumentation/weaviate/version.py +0 -0
  42. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/processors/__init__.py +0 -0
  43. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/processors/session_span_processor.py +0 -0
  44. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/scanner.py +0 -0
  45. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/session_manager.py +0 -0
  46. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/span_wrapper.py +0 -0
  47. {netra_sdk-0.1.19 → netra_sdk-0.1.20}/netra/tracer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: netra-sdk
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: A Python SDK for AI application observability that provides OpenTelemetry-based monitoring, tracing, and PII protection for LLM and vector database applications. Enables easy instrumentation, session tracking, and privacy-focused data collection for AI systems in production environments.
5
5
  License: Apache-2.0
6
6
  Keywords: netra,tracing,observability,sdk,ai,llm,vector,database
@@ -69,7 +69,9 @@ Requires-Dist: opentelemetry-instrumentation-urllib3 (>=0.55b1,<1.0.0)
69
69
  Requires-Dist: opentelemetry-sdk (>=1.34.0,<2.0.0)
70
70
  Requires-Dist: presidio-analyzer (==2.2.358) ; extra == "presidio"
71
71
  Requires-Dist: presidio-anonymizer (==2.2.358) ; extra == "presidio"
72
+ Requires-Dist: stanza (>=1.10.1,<2.0.0) ; extra == "presidio"
72
73
  Requires-Dist: traceloop-sdk (>=0.40.7,<0.43.0)
74
+ Requires-Dist: transformers (==4.51.3) ; extra == "presidio"
73
75
  Project-URL: Bug Tracker, https://github.com/KeyValueSoftwareSystems/netra-sdk-py/issues
74
76
  Project-URL: Documentation, https://github.com/KeyValueSoftwareSystems/netra-sdk-py/blob/main/README.md
75
77
  Project-URL: Homepage, https://github.com/KeyValueSoftwareSystems/netra-sdk-py
@@ -331,6 +333,119 @@ print(f"Masked text: {result.masked_text}")
331
333
  print(f"PII entities: {result.pii_entities}")
332
334
  ```
333
335
 
336
+ #### Custom Models for PII Detection
337
+
338
+ The `PresidioPIIDetector` supports custom NLP models through the `nlp_configuration` parameter, allowing you to use specialized models for improved PII detection accuracy. You can configure custom spaCy, Stanza, or transformers models:
339
+
340
+ ##### NLP Configuration Example
341
+
342
+ Follow this configuration structure to provide your custom models.
343
+ ```python
344
+ nlp_configuration = {
345
+ "nlp_engine_name": "spacy|stanza|transformers",
346
+ "models": [
347
+ {
348
+ "lang_code": "en", # Language code
349
+ "model_name": "model_identifier" # Varies by engine type
350
+ }
351
+ ],
352
+ "ner_model_configuration": { # Optional, mainly for transformers
353
+ # Additional configuration options
354
+ }
355
+ }
356
+ ```
357
+
358
+ ##### Using Custom spaCy Models
359
+
360
+ ```python
361
+ from netra.pii import PresidioPIIDetector
362
+
363
+ # Configure custom spaCy model
364
+ spacy_config = {
365
+ "nlp_engine_name": "spacy",
366
+ "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
367
+ }
368
+
369
+ detector = PresidioPIIDetector(
370
+ nlp_configuration=spacy_config,
371
+ action_type="MASK",
372
+ score_threshold=0.8
373
+ )
374
+
375
+ text = "Dr. Sarah Wilson works at 123 Main St, New York"
376
+ result = detector.detect(text)
377
+ print(f"Detected entities: {result.pii_entities}")
378
+ ```
379
+
380
+ ##### Using Stanza Models
381
+
382
+ ```python
383
+ from netra.pii import PresidioPIIDetector
384
+
385
+ # Configure Stanza model
386
+ stanza_config = {
387
+ "nlp_engine_name": "stanza",
388
+ "models": [{"lang_code": "en", "model_name": "en"}]
389
+ }
390
+
391
+ detector = PresidioPIIDetector(
392
+ nlp_configuration=stanza_config,
393
+ action_type="FLAG"
394
+ )
395
+
396
+ text = "Contact Alice Smith at alice@company.com"
397
+ result = detector.detect(text)
398
+ print(f"PII detected: {result.has_pii}")
399
+ ```
400
+
401
+ ##### Using Transformers Models
402
+
403
+ For advanced NER capabilities, you can use transformer-based models:
404
+
405
+ ```python
406
+ from netra.pii import PresidioPIIDetector
407
+
408
+ # Configure transformers model with entity mapping
409
+ transformers_config = {
410
+ "nlp_engine_name": "transformers",
411
+ "models": [{
412
+ "lang_code": "en",
413
+ "model_name": {
414
+ "spacy": "en_core_web_sm",
415
+ "transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
416
+ }
417
+ }],
418
+ "ner_model_configuration": {
419
+ "labels_to_ignore": ["O"],
420
+ "model_to_presidio_entity_mapping": {
421
+ "PER": "PERSON",
422
+ "LOC": "LOCATION",
423
+ "ORG": "ORGANIZATION",
424
+ "MISC": "MISC"
425
+ },
426
+ "low_confidence_score_multiplier": 0.4,
427
+ "low_score_entity_names": ["ORG"]
428
+ }
429
+ }
430
+
431
+ detector = PresidioPIIDetector(
432
+ nlp_configuration=transformers_config,
433
+ action_type="MASK"
434
+ )
435
+
436
+ text = "Microsoft Corporation is located in Redmond, Washington"
437
+ result = detector.detect(text)
438
+ print(f"Masked text: {result.masked_text}")
439
+ ```
440
+
441
+
442
+
443
+ **Note**: Custom model configuration allows for:
444
+ - **Better accuracy** with domain-specific models
445
+ - **Multi-language support** by specifying different language codes
446
+ - **Fine-tuned models** trained on your specific data
447
+ - **Performance optimization** by choosing models suited to your use case
448
+
334
449
  #### Regex-based Detection
335
450
  ```python
336
451
  from netra.pii import RegexPIIDetector
@@ -555,102 +670,6 @@ Configuration values are resolved in the following order (highest to lowest prec
555
670
  4. **Default Values**: Fallback values defined in the SDK
556
671
 
557
672
  This allows you to:
558
- - Override any setting directly in code for maximum control
559
- - Use Netra-specific environment variables for Netra-specific settings
560
- - Fall back to standard OpenTelemetry variables for compatibility
561
- - Rely on sensible defaults when no other configuration is provided
562
-
563
- **Example**:
564
- ```bash
565
- export NETRA_APP_NAME="my-ai-service"
566
- export NETRA_OTLP_ENDPOINT="https://collector.example.com:4318"
567
- export NETRA_API_KEY="your-api-key-here"
568
- export NETRA_ENV="production"
569
- export NETRA_RESOURCE_ATTRS='{"team":"ai", "version":"1.0.0"}'
570
- ```
571
-
572
- ### Programmatic Configuration
573
-
574
- You can also configure the SDK programmatically when initializing:
575
-
576
- ```python
577
- from netra import Netra
578
- from netra.instrumentation.instruments import InstrumentSet
579
-
580
- Netra.init(
581
- app_name="my-ai-service",
582
- environment="production",
583
- resource_attributes={"team": "ai", "version": "1.0.0"},
584
- trace_content=True,
585
- disable_batch=False,
586
- instruments={InstrumentSet.OPENAI}
587
- )
588
- ```
589
-
590
- ### Custom Instrumentation Selection
591
-
592
- Control which instrumentations are enabled:
593
-
594
- ```python
595
- from netra import Netra
596
- from netra.instrumentation.instruments import InstrumentSet
597
-
598
- # Enable specific instruments
599
- Netra.init(
600
- app_name="Selective App",
601
- instruments={
602
- InstrumentSet.OPENAI,
603
- InstrumentSet.WEAVIATEDB,
604
- InstrumentSet.FASTAPI
605
- }
606
- )
607
-
608
- # Block specific instruments
609
- Netra.init(
610
- app_name="Blocked App",
611
- block_instruments={
612
- InstrumentSet.HTTPX, # Don't trace HTTPX calls
613
- InstrumentSet.REDIS # Don't trace Redis operations
614
- }
615
- )
616
- ```
617
-
618
- ### 🌐 Custom Endpoint Integration
619
-
620
- Since Netra SDK follows the **OpenTelemetry standard**, you can integrate it with any OpenTelemetry-compatible observability backend:
621
-
622
- #### Popular OpenTelemetry Backends
623
- - **Jaeger** - Distributed tracing platform
624
- - **Zipkin** - Distributed tracing system
625
- - **Prometheus** - Monitoring and alerting toolkit
626
- - **Grafana** - Observability and data visualization
627
- - **New Relic** - Full-stack observability platform
628
- - **Datadog** - Monitoring and analytics platform
629
- - **Honeycomb** - Observability for complex systems
630
- - **Lightstep** - Distributed tracing and observability
631
- - **AWS X-Ray** - Distributed tracing service
632
- - **Google Cloud Trace** - Distributed tracing system
633
-
634
- #### Custom Endpoint Configuration
635
-
636
- **Recommended: Environment Variable Configuration (No Code Changes Required)**
637
- ```bash
638
- # Set custom OTLP endpoint via environment variables
639
- export NETRA_OTLP_ENDPOINT="https://your-custom-backend.com/v1/traces"
640
- export NETRA_HEADERS="authorization=Bearer your-token"
641
-
642
- ```
643
-
644
- ```python
645
- from netra import Netra
646
- from netra.instrumentation.instruments import InstrumentSet
647
-
648
- # Simple initialization - SDK automatically picks up environment variables
649
- Netra.init(app_name="Your App", instruments={InstrumentSet})
650
- # No endpoint configuration needed in code!
651
- ```
652
-
653
- #### Benefits of OpenTelemetry Compatibility
654
673
  - **🔄 Vendor Agnostic**: Switch between observability platforms without code changes
655
674
  - **📊 Standard Format**: Consistent telemetry data across all tools
656
675
  - **🔧 Flexible Integration**: Works with existing observability infrastructure
@@ -253,6 +253,119 @@ print(f"Masked text: {result.masked_text}")
253
253
  print(f"PII entities: {result.pii_entities}")
254
254
  ```
255
255
 
256
+ #### Custom Models for PII Detection
257
+
258
+ The `PresidioPIIDetector` supports custom NLP models through the `nlp_configuration` parameter, allowing you to use specialized models for improved PII detection accuracy. You can configure custom spaCy, Stanza, or transformers models:
259
+
260
+ ##### NLP Configuration Example
261
+
262
+ Follow this configuration structure to provide your custom models.
263
+ ```python
264
+ nlp_configuration = {
265
+ "nlp_engine_name": "spacy|stanza|transformers",
266
+ "models": [
267
+ {
268
+ "lang_code": "en", # Language code
269
+ "model_name": "model_identifier" # Varies by engine type
270
+ }
271
+ ],
272
+ "ner_model_configuration": { # Optional, mainly for transformers
273
+ # Additional configuration options
274
+ }
275
+ }
276
+ ```
277
+
278
+ ##### Using Custom spaCy Models
279
+
280
+ ```python
281
+ from netra.pii import PresidioPIIDetector
282
+
283
+ # Configure custom spaCy model
284
+ spacy_config = {
285
+ "nlp_engine_name": "spacy",
286
+ "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
287
+ }
288
+
289
+ detector = PresidioPIIDetector(
290
+ nlp_configuration=spacy_config,
291
+ action_type="MASK",
292
+ score_threshold=0.8
293
+ )
294
+
295
+ text = "Dr. Sarah Wilson works at 123 Main St, New York"
296
+ result = detector.detect(text)
297
+ print(f"Detected entities: {result.pii_entities}")
298
+ ```
299
+
300
+ ##### Using Stanza Models
301
+
302
+ ```python
303
+ from netra.pii import PresidioPIIDetector
304
+
305
+ # Configure Stanza model
306
+ stanza_config = {
307
+ "nlp_engine_name": "stanza",
308
+ "models": [{"lang_code": "en", "model_name": "en"}]
309
+ }
310
+
311
+ detector = PresidioPIIDetector(
312
+ nlp_configuration=stanza_config,
313
+ action_type="FLAG"
314
+ )
315
+
316
+ text = "Contact Alice Smith at alice@company.com"
317
+ result = detector.detect(text)
318
+ print(f"PII detected: {result.has_pii}")
319
+ ```
320
+
321
+ ##### Using Transformers Models
322
+
323
+ For advanced NER capabilities, you can use transformer-based models:
324
+
325
+ ```python
326
+ from netra.pii import PresidioPIIDetector
327
+
328
+ # Configure transformers model with entity mapping
329
+ transformers_config = {
330
+ "nlp_engine_name": "transformers",
331
+ "models": [{
332
+ "lang_code": "en",
333
+ "model_name": {
334
+ "spacy": "en_core_web_sm",
335
+ "transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
336
+ }
337
+ }],
338
+ "ner_model_configuration": {
339
+ "labels_to_ignore": ["O"],
340
+ "model_to_presidio_entity_mapping": {
341
+ "PER": "PERSON",
342
+ "LOC": "LOCATION",
343
+ "ORG": "ORGANIZATION",
344
+ "MISC": "MISC"
345
+ },
346
+ "low_confidence_score_multiplier": 0.4,
347
+ "low_score_entity_names": ["ORG"]
348
+ }
349
+ }
350
+
351
+ detector = PresidioPIIDetector(
352
+ nlp_configuration=transformers_config,
353
+ action_type="MASK"
354
+ )
355
+
356
+ text = "Microsoft Corporation is located in Redmond, Washington"
357
+ result = detector.detect(text)
358
+ print(f"Masked text: {result.masked_text}")
359
+ ```
360
+
361
+
362
+
363
+ **Note**: Custom model configuration allows for:
364
+ - **Better accuracy** with domain-specific models
365
+ - **Multi-language support** by specifying different language codes
366
+ - **Fine-tuned models** trained on your specific data
367
+ - **Performance optimization** by choosing models suited to your use case
368
+
256
369
  #### Regex-based Detection
257
370
  ```python
258
371
  from netra.pii import RegexPIIDetector
@@ -477,102 +590,6 @@ Configuration values are resolved in the following order (highest to lowest prec
477
590
  4. **Default Values**: Fallback values defined in the SDK
478
591
 
479
592
  This allows you to:
480
- - Override any setting directly in code for maximum control
481
- - Use Netra-specific environment variables for Netra-specific settings
482
- - Fall back to standard OpenTelemetry variables for compatibility
483
- - Rely on sensible defaults when no other configuration is provided
484
-
485
- **Example**:
486
- ```bash
487
- export NETRA_APP_NAME="my-ai-service"
488
- export NETRA_OTLP_ENDPOINT="https://collector.example.com:4318"
489
- export NETRA_API_KEY="your-api-key-here"
490
- export NETRA_ENV="production"
491
- export NETRA_RESOURCE_ATTRS='{"team":"ai", "version":"1.0.0"}'
492
- ```
493
-
494
- ### Programmatic Configuration
495
-
496
- You can also configure the SDK programmatically when initializing:
497
-
498
- ```python
499
- from netra import Netra
500
- from netra.instrumentation.instruments import InstrumentSet
501
-
502
- Netra.init(
503
- app_name="my-ai-service",
504
- environment="production",
505
- resource_attributes={"team": "ai", "version": "1.0.0"},
506
- trace_content=True,
507
- disable_batch=False,
508
- instruments={InstrumentSet.OPENAI}
509
- )
510
- ```
511
-
512
- ### Custom Instrumentation Selection
513
-
514
- Control which instrumentations are enabled:
515
-
516
- ```python
517
- from netra import Netra
518
- from netra.instrumentation.instruments import InstrumentSet
519
-
520
- # Enable specific instruments
521
- Netra.init(
522
- app_name="Selective App",
523
- instruments={
524
- InstrumentSet.OPENAI,
525
- InstrumentSet.WEAVIATEDB,
526
- InstrumentSet.FASTAPI
527
- }
528
- )
529
-
530
- # Block specific instruments
531
- Netra.init(
532
- app_name="Blocked App",
533
- block_instruments={
534
- InstrumentSet.HTTPX, # Don't trace HTTPX calls
535
- InstrumentSet.REDIS # Don't trace Redis operations
536
- }
537
- )
538
- ```
539
-
540
- ### 🌐 Custom Endpoint Integration
541
-
542
- Since Netra SDK follows the **OpenTelemetry standard**, you can integrate it with any OpenTelemetry-compatible observability backend:
543
-
544
- #### Popular OpenTelemetry Backends
545
- - **Jaeger** - Distributed tracing platform
546
- - **Zipkin** - Distributed tracing system
547
- - **Prometheus** - Monitoring and alerting toolkit
548
- - **Grafana** - Observability and data visualization
549
- - **New Relic** - Full-stack observability platform
550
- - **Datadog** - Monitoring and analytics platform
551
- - **Honeycomb** - Observability for complex systems
552
- - **Lightstep** - Distributed tracing and observability
553
- - **AWS X-Ray** - Distributed tracing service
554
- - **Google Cloud Trace** - Distributed tracing system
555
-
556
- #### Custom Endpoint Configuration
557
-
558
- **Recommended: Environment Variable Configuration (No Code Changes Required)**
559
- ```bash
560
- # Set custom OTLP endpoint via environment variables
561
- export NETRA_OTLP_ENDPOINT="https://your-custom-backend.com/v1/traces"
562
- export NETRA_HEADERS="authorization=Bearer your-token"
563
-
564
- ```
565
-
566
- ```python
567
- from netra import Netra
568
- from netra.instrumentation.instruments import InstrumentSet
569
-
570
- # Simple initialization - SDK automatically picks up environment variables
571
- Netra.init(app_name="Your App", instruments={InstrumentSet})
572
- # No endpoint configuration needed in code!
573
- ```
574
-
575
- #### Benefits of OpenTelemetry Compatibility
576
593
  - **🔄 Vendor Agnostic**: Switch between observability platforms without code changes
577
594
  - **📊 Standard Format**: Consistent telemetry data across all tools
578
595
  - **🔧 Flexible Integration**: Works with existing observability infrastructure
@@ -577,7 +577,7 @@ class PresidioPIIDetector(PIIDetector):
577
577
  call Presidio's Analyzer + Anonymizer on a string.
578
578
 
579
579
  Examples:
580
- # Using default hash function
580
+ # Using default configuration
581
581
  detector = PresidioPIIDetector()
582
582
  result = detector.detect("My email is john@example.com")
583
583
 
@@ -592,6 +592,41 @@ class PresidioPIIDetector(PIIDetector):
592
592
  action_type="MASK",
593
593
  score_threshold=0.8
594
594
  )
595
+
596
+ # Using custom spaCy model configuration
597
+ spacy_config = {
598
+ "nlp_engine_name": "spacy",
599
+ "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}]
600
+ }
601
+ detector = PresidioPIIDetector(nlp_configuration=spacy_config)
602
+
603
+ # Using Stanza model configuration
604
+ stanza_config = {
605
+ "nlp_engine_name": "stanza",
606
+ "models": [{"lang_code": "en", "model_name": "en"}]
607
+ }
608
+ detector = PresidioPIIDetector(nlp_configuration=stanza_config)
609
+
610
+ # Using transformers model configuration
611
+ transformers_config = {
612
+ "nlp_engine_name": "transformers",
613
+ "models": [{
614
+ "lang_code": "en",
615
+ "model_name": {
616
+ "spacy": "en_core_web_sm",
617
+ "transformers": "dbmdz/bert-large-cased-finetuned-conll03-english"
618
+ }
619
+ }],
620
+ "ner_model_configuration": {
621
+ "labels_to_ignore": ["O"],
622
+ "model_to_presidio_entity_mapping": {
623
+ "PER": "PERSON",
624
+ "LOC": "LOCATION",
625
+ "ORG": "ORGANIZATION"
626
+ }
627
+ }
628
+ }
629
+ detector = PresidioPIIDetector(nlp_configuration=transformers_config)
595
630
  """
596
631
 
597
632
  def __init__(
@@ -602,7 +637,35 @@ class PresidioPIIDetector(PIIDetector):
602
637
  action_type: Optional[Literal["BLOCK", "FLAG", "MASK"]] = None,
603
638
  anonymizer_cache_size: int = 1000,
604
639
  hash_function: Optional[Callable[[str], str]] = None,
640
+ nlp_configuration: Optional[Dict[str, Any]] = None,
605
641
  ) -> None:
642
+ """
643
+ Initialize the Presidio PII detector.
644
+
645
+ Args:
646
+ entities: List of entity types to detect. If None, uses DEFAULT_ENTITIES.
647
+ language: Language code for detection (default: "en").
648
+ score_threshold: Minimum confidence score for detections (default: 0.6).
649
+ action_type: Action to take when PII is detected ("BLOCK", "FLAG", "MASK").
650
+ anonymizer_cache_size: Size of the anonymizer cache (default: 1000).
651
+ hash_function: Custom hash function for anonymization.
652
+ nlp_configuration: Dictionary containing NLP engine configuration.
653
+ Format: {
654
+ "nlp_engine_name": "spacy|stanza|transformers",
655
+ "models": [{"lang_code": "en", "model_name": "model_name"}],
656
+ "ner_model_configuration": {...} # Optional, for transformers
657
+ }
658
+
659
+ For spaCy and Stanza:
660
+ - model_name should be a string (e.g., "en_core_web_lg", "en")
661
+
662
+ For transformers:
663
+ - model_name should be a dict with "spacy" and "transformers" keys
664
+ - Example: {"spacy": "en_core_web_sm", "transformers": "model_path"}
665
+
666
+ Raises:
667
+ ImportError: If presidio-analyzer is not installed or required NLP library is missing.
668
+ """
606
669
  if action_type is None:
607
670
  action_type = "FLAG"
608
671
  env_action = os.getenv("NETRA_ACTION_TYPE", "FLAG")
@@ -610,18 +673,99 @@ class PresidioPIIDetector(PIIDetector):
610
673
  if env_action in ["BLOCK", "FLAG", "MASK"]:
611
674
  action_type = cast(Literal["BLOCK", "FLAG", "MASK"], env_action)
612
675
  super().__init__(action_type=action_type)
676
+
677
+ # Import presidio-analyzer
613
678
  try:
614
679
  from presidio_analyzer import AnalyzerEngine # noqa: F401
615
680
  except ImportError as exc:
616
- raise ImportError("Presidio-based PII detection requires: presidio-analyzer. " "Install via pip.") from exc
681
+ raise ImportError("Presidio-based PII detection requires: presidio-analyzer. Install via pip.") from exc
617
682
 
618
683
  self.language: str = language
619
684
  self.entities: Optional[List[str]] = entities if entities else DEFAULT_ENTITIES
620
685
  self.score_threshold: float = score_threshold
621
686
 
622
- self.analyzer = AnalyzerEngine()
687
+ # Initialize AnalyzerEngine with custom or default NLP engine
688
+ if nlp_configuration is not None:
689
+ self.analyzer = self._create_analyzer_with_custom_nlp(nlp_configuration)
690
+ else:
691
+ # Use default AnalyzerEngine
692
+ self.analyzer = AnalyzerEngine()
693
+
623
694
  self.anonymizer = Anonymizer(hash_function=hash_function, cache_size=anonymizer_cache_size)
624
695
 
696
+ def _create_analyzer_with_custom_nlp(self, nlp_configuration: Dict[str, Any]) -> Any:
697
+ """
698
+ Create an AnalyzerEngine with custom NLP configuration.
699
+
700
+ Args:
701
+ nlp_configuration: Dictionary containing NLP engine configuration.
702
+
703
+ Returns:
704
+ AnalyzerEngine instance with custom NLP engine.
705
+
706
+ Raises:
707
+ ImportError: If required NLP library is not available.
708
+ """
709
+ try:
710
+ from presidio_analyzer import AnalyzerEngine
711
+ from presidio_analyzer.nlp_engine import NlpEngineProvider
712
+ except ImportError as exc:
713
+ raise ImportError("Presidio-based PII detection requires: presidio-analyzer. Install via pip.") from exc
714
+
715
+ # Validate and prepare configuration
716
+ engine_name = nlp_configuration.get("nlp_engine_name", "").lower()
717
+
718
+ # Perform lazy imports based on engine type
719
+ if engine_name == "spacy":
720
+ self._ensure_spacy_available()
721
+ elif engine_name == "stanza":
722
+ self._ensure_stanza_available()
723
+ elif engine_name == "transformers":
724
+ self._ensure_transformers_available()
725
+ else:
726
+ # Default behavior - let Presidio handle it
727
+ pass
728
+
729
+ # Create NLP engine from configuration
730
+ provider = NlpEngineProvider(nlp_configuration=nlp_configuration)
731
+ custom_nlp_engine = provider.create_engine()
732
+
733
+ # Extract supported languages from configuration
734
+ supported_languages = [self.language]
735
+ if "models" in nlp_configuration:
736
+ supported_languages = [model["lang_code"] for model in nlp_configuration["models"]]
737
+
738
+ return AnalyzerEngine(nlp_engine=custom_nlp_engine, supported_languages=supported_languages)
739
+
740
+ def _ensure_spacy_available(self) -> None:
741
+ """Ensure spaCy is available when needed."""
742
+ try:
743
+ import spacy # noqa: F401
744
+ except ImportError as exc:
745
+ raise ImportError(
746
+ "spaCy is required for spaCy-based PII detection. Install via: pip install spacy"
747
+ ) from exc
748
+
749
+ def _ensure_stanza_available(self) -> None:
750
+ """Ensure Stanza is available when needed."""
751
+ try:
752
+ import stanza # noqa: F401
753
+ except ImportError as exc:
754
+ raise ImportError(
755
+ "Stanza is required for Stanza-based PII detection. Install via: pip install stanza"
756
+ ) from exc
757
+
758
+ def _ensure_transformers_available(self) -> None:
759
+ """Ensure transformers is available when needed."""
760
+ try:
761
+ import torch # noqa: F401
762
+ import transformers # noqa: F401
763
+ except ImportError as exc:
764
+ raise ImportError(
765
+ "Transformers and PyTorch are required for transformers-based PII detection. "
766
+ "Install via: pip install transformers torch"
767
+ ) from exc
768
+
625
769
  def _detect_pii(self, text: str) -> Tuple[bool, Counter[str], str, Dict[str, str]]:
626
770
  """
627
771
  Detect PII in a single message.
@@ -666,6 +810,7 @@ def get_default_detector(
666
810
  action_type: Optional[Literal["BLOCK", "FLAG", "MASK"]] = None,
667
811
  entities: Optional[List[str]] = None,
668
812
  hash_function: Optional[Callable[[str], str]] = None,
813
+ nlp_configuration: Optional[Dict[str, Any]] = None,
669
814
  ) -> PIIDetector:
670
815
  """
671
816
  Returns a default PII detector instance (Presidio-based by default).
@@ -678,8 +823,11 @@ def get_default_detector(
678
823
  - "MASK": Replace PII with mask tokens (default)
679
824
  entities: Optional list of entity types to detect. If None, uses Presidio's default entities
680
825
  hash_function: Optional custom hash function for anonymization. If None, uses default hash function.
826
+ nlp_configuration: Dictionary containing NLP engine configuration for custom models.
681
827
  """
682
- return PresidioPIIDetector(action_type=action_type, entities=entities, hash_function=hash_function)
828
+ return PresidioPIIDetector(
829
+ action_type=action_type, entities=entities, hash_function=hash_function, nlp_configuration=nlp_configuration
830
+ )
683
831
 
684
832
 
685
833
  # ---------------------------------------------------------------------------- #
@@ -0,0 +1 @@
1
+ __version__ = "0.1.20"
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [project]
6
6
  name = "netra-sdk"
7
- version = "0.1.19"
7
+ version = "0.1.20"
8
8
  description = "A Python SDK for AI application observability that provides OpenTelemetry-based monitoring, tracing, and PII protection for LLM and vector database applications. Enables easy instrumentation, session tracking, and privacy-focused data collection for AI systems in production environments."
9
9
  authors = [
10
10
  {name = "Sooraj Thomas",email = "sooraj@keyvalue.systems"}
@@ -95,6 +95,8 @@ llm_guard = [
95
95
  presidio = [
96
96
  "presidio-analyzer==2.2.358",
97
97
  "presidio-anonymizer==2.2.358",
98
+ "transformers==4.51.3",
99
+ "stanza>=1.10.1,<2.0.0"
98
100
  ]
99
101
 
100
102
  [tool.poetry.group.dev.dependencies]
@@ -1 +0,0 @@
1
- __version__ = "0.1.19"
File without changes
File without changes
File without changes
File without changes
File without changes