pii-protector 2.2.1__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {pii_protector-2.2.1 → pii_protector-2.2.2}/PKG-INFO +4 -4
  2. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/__init__.py +1 -1
  3. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/detector.py +47 -46
  4. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/PKG-INFO +4 -4
  5. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/requires.txt +2 -2
  6. {pii_protector-2.2.1 → pii_protector-2.2.2}/pyproject.toml +5 -5
  7. {pii_protector-2.2.1 → pii_protector-2.2.2}/README.md +0 -0
  8. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/__main__.py +0 -0
  9. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/cli.py +0 -0
  10. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/config/__init__.py +0 -0
  11. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/config/settings.py +0 -0
  12. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/__init__.py +0 -0
  13. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/api_secrets.py +0 -0
  14. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/cloud_providers.py +0 -0
  15. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/communication.py +0 -0
  16. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/confidential_markers.py +0 -0
  17. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/core_pii.py +0 -0
  18. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/dates_urls.py +0 -0
  19. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/file_control.py +0 -0
  20. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/india_pii.py +0 -0
  21. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/monitoring.py +0 -0
  22. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/north_america_pii.py +0 -0
  23. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/payment.py +0 -0
  24. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/version_control.py +0 -0
  25. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/utils/__init__.py +0 -0
  26. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/utils/helpers.py +0 -0
  27. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/__init__.py +0 -0
  28. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/api_validators.py +0 -0
  29. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/pii_validators.py +0 -0
  30. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/SOURCES.txt +0 -0
  31. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/dependency_links.txt +0 -0
  32. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/entry_points.txt +0 -0
  33. {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/top_level.txt +0 -0
  34. {pii_protector-2.2.1 → pii_protector-2.2.2}/setup.cfg +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pii-protector
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: Production-grade PII detection with multi-model ensemble
5
5
  Author-email: tensoryug <jainsatyam26@gmail.com>
6
6
  License-Expression: MIT
7
- Project-URL: Homepage, https://github.com/sjain26/pii-guard
7
+ Project-URL: Homepage, https://github.com/sjain26/pii-detector
8
8
  Keywords: pii,privacy,nlp,detection,presidio,data-protection
9
9
  Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Intended Audience :: Developers
@@ -22,12 +22,12 @@ Requires-Dist: presidio-analyzer>=2.2; extra == "presidio"
22
22
  Requires-Dist: spacy>=3.7; extra == "presidio"
23
23
  Provides-Extra: transformers
24
24
  Requires-Dist: transformers>=4.40; extra == "transformers"
25
- Requires-Dist: torch>=2.0; extra == "transformers"
25
+ Requires-Dist: optimum[onnxruntime]>=1.18; extra == "transformers"
26
26
  Provides-Extra: full
27
27
  Requires-Dist: presidio-analyzer>=2.2; extra == "full"
28
28
  Requires-Dist: spacy>=3.7; extra == "full"
29
29
  Requires-Dist: transformers>=4.40; extra == "full"
30
- Requires-Dist: torch>=2.0; extra == "full"
30
+ Requires-Dist: optimum[onnxruntime]>=1.18; extra == "full"
31
31
 
32
32
  # pii-protector
33
33
 
@@ -7,5 +7,5 @@ Multi-model ensemble with pre-compiled patterns and type-aware voting
7
7
  from .detector import AdvancedPIIDetector, PIIEntity, PresidioCustomDetector
8
8
  from .config.settings import PII_CONFIG
9
9
 
10
- __version__ = "2.2.1"
10
+ __version__ = "2.2.2"
11
11
  __all__ = ["AdvancedPIIDetector", "PIIEntity", "PresidioCustomDetector", "PII_CONFIG"]
@@ -317,6 +317,25 @@ class AdvancedPIIDetector:
317
317
  if self.use_pii_model:
318
318
  self._init_pii_transformer() # PII model structured PII
319
319
 
320
+ if self.use_presidio and not self.presidio:
321
+ warnings.warn(
322
+ "Presidio (Layer 2) unavailable — NAME/ORG/LOCATION detection disabled. "
323
+ "Install: pip install 'pii-protector[presidio]' then: python -m spacy download en_core_web_lg",
324
+ UserWarning, stacklevel=2,
325
+ )
326
+ if self.use_transformers and not self.ner_pipeline:
327
+ warnings.warn(
328
+ "NER transformer (Layer 3) unavailable — high-accuracy name detection disabled. "
329
+ "Install: pip install 'optimum[onnxruntime]>=1.18' 'transformers>=4.40'",
330
+ UserWarning, stacklevel=2,
331
+ )
332
+ if self.use_pii_model and not self.pii_pipeline:
333
+ warnings.warn(
334
+ "PII model (Layer 4) unavailable — PHONE/EMAIL/DOB/SSN transformer detection disabled. "
335
+ "Install: pip install 'optimum[onnxruntime]>=1.18' 'transformers>=4.40'",
336
+ UserWarning, stacklevel=2,
337
+ )
338
+
320
339
  logger.info(
321
340
  f"PII Detector ready | mode=SEQUENTIAL_LAYERED "
322
341
  f"| presidio={'ON' if self.presidio else 'OFF'} "
@@ -363,59 +382,41 @@ class AdvancedPIIDetector:
363
382
  logger.warning(f"Presidio unavailable: {e}")
364
383
 
365
384
  def _init_transformers(self):
366
- """NER transformer — high-accuracy NAME/ORG/LOC."""
385
+ """NER transformer — high-accuracy NAME/ORG/LOC (ONNX Runtime)."""
367
386
  try:
368
- from transformers import pipeline
369
- try:
370
- with _suppress_stdout():
371
- self.ner_pipeline = pipeline(
372
- "ner",
373
- model=self.transformer_model_name,
374
- aggregation_strategy="first",
375
- device=0,
376
- model_kwargs={"dtype": "float16"},
377
- )
378
- self.ner_pipeline("warmup") # GPU kernel compile
379
- logger.info(f"NER ready on GPU ({self.transformer_model_name})")
380
- except Exception:
381
- with _suppress_stdout():
382
- self.ner_pipeline = pipeline(
383
- "ner",
384
- model=self.transformer_model_name,
385
- aggregation_strategy="first",
386
- device=-1,
387
- )
388
- self.ner_pipeline("warmup")
389
- logger.info(f"NER ready on CPU ({self.transformer_model_name})")
387
+ from optimum.onnxruntime import ORTModelForTokenClassification
388
+ from transformers import AutoTokenizer, pipeline
389
+ with _suppress_stdout():
390
+ model = ORTModelForTokenClassification.from_pretrained(self.transformer_model_name)
391
+ tokenizer = AutoTokenizer.from_pretrained(self.transformer_model_name)
392
+ self.ner_pipeline = pipeline(
393
+ "ner",
394
+ model=model,
395
+ tokenizer=tokenizer,
396
+ aggregation_strategy="first",
397
+ )
398
+ self.ner_pipeline("warmup")
390
399
  self._ner_sources_count += 1
400
+ logger.info(f"NER ready ({self.transformer_model_name})")
391
401
  except Exception as e:
392
402
  logger.warning(f"NER transformer unavailable: {e}")
393
403
 
394
404
  def _init_pii_transformer(self):
395
- """PII model structured PII detection — PHONE, EMAIL, SSN, DOB, ADDRESS etc."""
405
+ """PII model structured PII detection — PHONE, EMAIL, SSN, DOB, ADDRESS etc. (ONNX Runtime)."""
396
406
  try:
397
- from transformers import pipeline
398
- try:
399
- with _suppress_stdout():
400
- self.pii_pipeline = pipeline(
401
- "token-classification",
402
- model=self.pii_model_name,
403
- aggregation_strategy="first",
404
- device=0,
405
- model_kwargs={"dtype": "float16"},
406
- )
407
- self.pii_pipeline("warmup") # GPU kernel compile
408
- logger.info(f"PII model ready on GPU ({self.pii_model_name})")
409
- except Exception:
410
- with _suppress_stdout():
411
- self.pii_pipeline = pipeline(
412
- "token-classification",
413
- model=self.pii_model_name,
414
- aggregation_strategy="first",
415
- device=-1,
416
- )
417
- self.pii_pipeline("warmup")
418
- logger.info(f"PII model ready on CPU ({self.pii_model_name})")
407
+ from optimum.onnxruntime import ORTModelForTokenClassification
408
+ from transformers import AutoTokenizer, pipeline
409
+ with _suppress_stdout():
410
+ model = ORTModelForTokenClassification.from_pretrained(self.pii_model_name)
411
+ tokenizer = AutoTokenizer.from_pretrained(self.pii_model_name)
412
+ self.pii_pipeline = pipeline(
413
+ "token-classification",
414
+ model=model,
415
+ tokenizer=tokenizer,
416
+ aggregation_strategy="first",
417
+ )
418
+ self.pii_pipeline("warmup")
419
+ logger.info(f"PII model ready ({self.pii_model_name})")
419
420
  except Exception as e:
420
421
  logger.warning(f"PII model unavailable: {e}")
421
422
 
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pii-protector
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: Production-grade PII detection with multi-model ensemble
5
5
  Author-email: tensoryug <jainsatyam26@gmail.com>
6
6
  License-Expression: MIT
7
- Project-URL: Homepage, https://github.com/sjain26/pii-guard
7
+ Project-URL: Homepage, https://github.com/sjain26/pii-detector
8
8
  Keywords: pii,privacy,nlp,detection,presidio,data-protection
9
9
  Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Intended Audience :: Developers
@@ -22,12 +22,12 @@ Requires-Dist: presidio-analyzer>=2.2; extra == "presidio"
22
22
  Requires-Dist: spacy>=3.7; extra == "presidio"
23
23
  Provides-Extra: transformers
24
24
  Requires-Dist: transformers>=4.40; extra == "transformers"
25
- Requires-Dist: torch>=2.0; extra == "transformers"
25
+ Requires-Dist: optimum[onnxruntime]>=1.18; extra == "transformers"
26
26
  Provides-Extra: full
27
27
  Requires-Dist: presidio-analyzer>=2.2; extra == "full"
28
28
  Requires-Dist: spacy>=3.7; extra == "full"
29
29
  Requires-Dist: transformers>=4.40; extra == "full"
30
- Requires-Dist: torch>=2.0; extra == "full"
30
+ Requires-Dist: optimum[onnxruntime]>=1.18; extra == "full"
31
31
 
32
32
  # pii-protector
33
33
 
@@ -3,7 +3,7 @@
3
3
  presidio-analyzer>=2.2
4
4
  spacy>=3.7
5
5
  transformers>=4.40
6
- torch>=2.0
6
+ optimum[onnxruntime]>=1.18
7
7
 
8
8
  [presidio]
9
9
  presidio-analyzer>=2.2
@@ -11,4 +11,4 @@ spacy>=3.7
11
11
 
12
12
  [transformers]
13
13
  transformers>=4.40
14
- torch>=2.0
14
+ optimum[onnxruntime]>=1.18
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pii-protector"
7
- version = "2.2.1"
7
+ version = "2.2.2"
8
8
  description = "Production-grade PII detection with multi-model ensemble"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -35,24 +35,24 @@ presidio = [
35
35
  "presidio-analyzer>=2.2",
36
36
  "spacy>=3.7",
37
37
  ]
38
- # Layer 3 & 4: NER transformer + PII model
38
+ # Layer 3 & 4: NER transformer + PII model (ONNX Runtime)
39
39
  transformers = [
40
40
  "transformers>=4.40",
41
- "torch>=2.0",
41
+ "optimum[onnxruntime]>=1.18",
42
42
  ]
43
43
  # Install everything
44
44
  full = [
45
45
  "presidio-analyzer>=2.2",
46
46
  "spacy>=3.7",
47
47
  "transformers>=4.40",
48
- "torch>=2.0",
48
+ "optimum[onnxruntime]>=1.18",
49
49
  ]
50
50
 
51
51
  [project.scripts]
52
52
  pii-detect = "pii_detector.cli:main"
53
53
 
54
54
  [project.urls]
55
- Homepage = "https://github.com/sjain26/pii-guard"
55
+ Homepage = "https://github.com/sjain26/pii-detector"
56
56
 
57
57
  [tool.setuptools.packages.find]
58
58
  include = ["pii_detector*"]
File without changes
File without changes