pii-protector 2.2.1__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pii_protector-2.2.1 → pii_protector-2.2.2}/PKG-INFO +4 -4
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/__init__.py +1 -1
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/detector.py +47 -46
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/PKG-INFO +4 -4
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/requires.txt +2 -2
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pyproject.toml +5 -5
- {pii_protector-2.2.1 → pii_protector-2.2.2}/README.md +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/__main__.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/cli.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/config/__init__.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/config/settings.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/__init__.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/api_secrets.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/cloud_providers.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/communication.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/confidential_markers.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/core_pii.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/dates_urls.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/file_control.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/india_pii.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/monitoring.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/north_america_pii.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/payment.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/patterns/version_control.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/utils/__init__.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/utils/helpers.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/__init__.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/api_validators.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_detector/validators/pii_validators.py +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/SOURCES.txt +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/dependency_links.txt +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/entry_points.txt +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/pii_protector.egg-info/top_level.txt +0 -0
- {pii_protector-2.2.1 → pii_protector-2.2.2}/setup.cfg +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pii-protector
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Production-grade PII detection with multi-model ensemble
|
|
5
5
|
Author-email: tensoryug <jainsatyam26@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/sjain26/pii-
|
|
7
|
+
Project-URL: Homepage, https://github.com/sjain26/pii-detector
|
|
8
8
|
Keywords: pii,privacy,nlp,detection,presidio,data-protection
|
|
9
9
|
Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
@@ -22,12 +22,12 @@ Requires-Dist: presidio-analyzer>=2.2; extra == "presidio"
|
|
|
22
22
|
Requires-Dist: spacy>=3.7; extra == "presidio"
|
|
23
23
|
Provides-Extra: transformers
|
|
24
24
|
Requires-Dist: transformers>=4.40; extra == "transformers"
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: optimum[onnxruntime]>=1.18; extra == "transformers"
|
|
26
26
|
Provides-Extra: full
|
|
27
27
|
Requires-Dist: presidio-analyzer>=2.2; extra == "full"
|
|
28
28
|
Requires-Dist: spacy>=3.7; extra == "full"
|
|
29
29
|
Requires-Dist: transformers>=4.40; extra == "full"
|
|
30
|
-
Requires-Dist:
|
|
30
|
+
Requires-Dist: optimum[onnxruntime]>=1.18; extra == "full"
|
|
31
31
|
|
|
32
32
|
# pii-protector
|
|
33
33
|
|
|
@@ -7,5 +7,5 @@ Multi-model ensemble with pre-compiled patterns and type-aware voting
|
|
|
7
7
|
from .detector import AdvancedPIIDetector, PIIEntity, PresidioCustomDetector
|
|
8
8
|
from .config.settings import PII_CONFIG
|
|
9
9
|
|
|
10
|
-
__version__ = "2.2.
|
|
10
|
+
__version__ = "2.2.2"
|
|
11
11
|
__all__ = ["AdvancedPIIDetector", "PIIEntity", "PresidioCustomDetector", "PII_CONFIG"]
|
|
@@ -317,6 +317,25 @@ class AdvancedPIIDetector:
|
|
|
317
317
|
if self.use_pii_model:
|
|
318
318
|
self._init_pii_transformer() # PII model structured PII
|
|
319
319
|
|
|
320
|
+
if self.use_presidio and not self.presidio:
|
|
321
|
+
warnings.warn(
|
|
322
|
+
"Presidio (Layer 2) unavailable — NAME/ORG/LOCATION detection disabled. "
|
|
323
|
+
"Install: pip install 'pii-protector[presidio]' then: python -m spacy download en_core_web_lg",
|
|
324
|
+
UserWarning, stacklevel=2,
|
|
325
|
+
)
|
|
326
|
+
if self.use_transformers and not self.ner_pipeline:
|
|
327
|
+
warnings.warn(
|
|
328
|
+
"NER transformer (Layer 3) unavailable — high-accuracy name detection disabled. "
|
|
329
|
+
"Install: pip install 'optimum[onnxruntime]>=1.18' 'transformers>=4.40'",
|
|
330
|
+
UserWarning, stacklevel=2,
|
|
331
|
+
)
|
|
332
|
+
if self.use_pii_model and not self.pii_pipeline:
|
|
333
|
+
warnings.warn(
|
|
334
|
+
"PII model (Layer 4) unavailable — PHONE/EMAIL/DOB/SSN transformer detection disabled. "
|
|
335
|
+
"Install: pip install 'optimum[onnxruntime]>=1.18' 'transformers>=4.40'",
|
|
336
|
+
UserWarning, stacklevel=2,
|
|
337
|
+
)
|
|
338
|
+
|
|
320
339
|
logger.info(
|
|
321
340
|
f"PII Detector ready | mode=SEQUENTIAL_LAYERED "
|
|
322
341
|
f"| presidio={'ON' if self.presidio else 'OFF'} "
|
|
@@ -363,59 +382,41 @@ class AdvancedPIIDetector:
|
|
|
363
382
|
logger.warning(f"Presidio unavailable: {e}")
|
|
364
383
|
|
|
365
384
|
def _init_transformers(self):
|
|
366
|
-
"""NER transformer — high-accuracy NAME/ORG/LOC."""
|
|
385
|
+
"""NER transformer — high-accuracy NAME/ORG/LOC (ONNX Runtime)."""
|
|
367
386
|
try:
|
|
368
|
-
from
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
except Exception:
|
|
381
|
-
with _suppress_stdout():
|
|
382
|
-
self.ner_pipeline = pipeline(
|
|
383
|
-
"ner",
|
|
384
|
-
model=self.transformer_model_name,
|
|
385
|
-
aggregation_strategy="first",
|
|
386
|
-
device=-1,
|
|
387
|
-
)
|
|
388
|
-
self.ner_pipeline("warmup")
|
|
389
|
-
logger.info(f"NER ready on CPU ({self.transformer_model_name})")
|
|
387
|
+
from optimum.onnxruntime import ORTModelForTokenClassification
|
|
388
|
+
from transformers import AutoTokenizer, pipeline
|
|
389
|
+
with _suppress_stdout():
|
|
390
|
+
model = ORTModelForTokenClassification.from_pretrained(self.transformer_model_name)
|
|
391
|
+
tokenizer = AutoTokenizer.from_pretrained(self.transformer_model_name)
|
|
392
|
+
self.ner_pipeline = pipeline(
|
|
393
|
+
"ner",
|
|
394
|
+
model=model,
|
|
395
|
+
tokenizer=tokenizer,
|
|
396
|
+
aggregation_strategy="first",
|
|
397
|
+
)
|
|
398
|
+
self.ner_pipeline("warmup")
|
|
390
399
|
self._ner_sources_count += 1
|
|
400
|
+
logger.info(f"NER ready ({self.transformer_model_name})")
|
|
391
401
|
except Exception as e:
|
|
392
402
|
logger.warning(f"NER transformer unavailable: {e}")
|
|
393
403
|
|
|
394
404
|
def _init_pii_transformer(self):
|
|
395
|
-
"""PII model structured PII detection — PHONE, EMAIL, SSN, DOB, ADDRESS etc."""
|
|
405
|
+
"""PII model structured PII detection — PHONE, EMAIL, SSN, DOB, ADDRESS etc. (ONNX Runtime)."""
|
|
396
406
|
try:
|
|
397
|
-
from
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
with _suppress_stdout():
|
|
411
|
-
self.pii_pipeline = pipeline(
|
|
412
|
-
"token-classification",
|
|
413
|
-
model=self.pii_model_name,
|
|
414
|
-
aggregation_strategy="first",
|
|
415
|
-
device=-1,
|
|
416
|
-
)
|
|
417
|
-
self.pii_pipeline("warmup")
|
|
418
|
-
logger.info(f"PII model ready on CPU ({self.pii_model_name})")
|
|
407
|
+
from optimum.onnxruntime import ORTModelForTokenClassification
|
|
408
|
+
from transformers import AutoTokenizer, pipeline
|
|
409
|
+
with _suppress_stdout():
|
|
410
|
+
model = ORTModelForTokenClassification.from_pretrained(self.pii_model_name)
|
|
411
|
+
tokenizer = AutoTokenizer.from_pretrained(self.pii_model_name)
|
|
412
|
+
self.pii_pipeline = pipeline(
|
|
413
|
+
"token-classification",
|
|
414
|
+
model=model,
|
|
415
|
+
tokenizer=tokenizer,
|
|
416
|
+
aggregation_strategy="first",
|
|
417
|
+
)
|
|
418
|
+
self.pii_pipeline("warmup")
|
|
419
|
+
logger.info(f"PII model ready ({self.pii_model_name})")
|
|
419
420
|
except Exception as e:
|
|
420
421
|
logger.warning(f"PII model unavailable: {e}")
|
|
421
422
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pii-protector
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Production-grade PII detection with multi-model ensemble
|
|
5
5
|
Author-email: tensoryug <jainsatyam26@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/sjain26/pii-
|
|
7
|
+
Project-URL: Homepage, https://github.com/sjain26/pii-detector
|
|
8
8
|
Keywords: pii,privacy,nlp,detection,presidio,data-protection
|
|
9
9
|
Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
@@ -22,12 +22,12 @@ Requires-Dist: presidio-analyzer>=2.2; extra == "presidio"
|
|
|
22
22
|
Requires-Dist: spacy>=3.7; extra == "presidio"
|
|
23
23
|
Provides-Extra: transformers
|
|
24
24
|
Requires-Dist: transformers>=4.40; extra == "transformers"
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: optimum[onnxruntime]>=1.18; extra == "transformers"
|
|
26
26
|
Provides-Extra: full
|
|
27
27
|
Requires-Dist: presidio-analyzer>=2.2; extra == "full"
|
|
28
28
|
Requires-Dist: spacy>=3.7; extra == "full"
|
|
29
29
|
Requires-Dist: transformers>=4.40; extra == "full"
|
|
30
|
-
Requires-Dist:
|
|
30
|
+
Requires-Dist: optimum[onnxruntime]>=1.18; extra == "full"
|
|
31
31
|
|
|
32
32
|
# pii-protector
|
|
33
33
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
presidio-analyzer>=2.2
|
|
4
4
|
spacy>=3.7
|
|
5
5
|
transformers>=4.40
|
|
6
|
-
|
|
6
|
+
optimum[onnxruntime]>=1.18
|
|
7
7
|
|
|
8
8
|
[presidio]
|
|
9
9
|
presidio-analyzer>=2.2
|
|
@@ -11,4 +11,4 @@ spacy>=3.7
|
|
|
11
11
|
|
|
12
12
|
[transformers]
|
|
13
13
|
transformers>=4.40
|
|
14
|
-
|
|
14
|
+
optimum[onnxruntime]>=1.18
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pii-protector"
|
|
7
|
-
version = "2.2.
|
|
7
|
+
version = "2.2.2"
|
|
8
8
|
description = "Production-grade PII detection with multi-model ensemble"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -35,24 +35,24 @@ presidio = [
|
|
|
35
35
|
"presidio-analyzer>=2.2",
|
|
36
36
|
"spacy>=3.7",
|
|
37
37
|
]
|
|
38
|
-
# Layer 3 & 4: NER transformer + PII model
|
|
38
|
+
# Layer 3 & 4: NER transformer + PII model (ONNX Runtime)
|
|
39
39
|
transformers = [
|
|
40
40
|
"transformers>=4.40",
|
|
41
|
-
"
|
|
41
|
+
"optimum[onnxruntime]>=1.18",
|
|
42
42
|
]
|
|
43
43
|
# Install everything
|
|
44
44
|
full = [
|
|
45
45
|
"presidio-analyzer>=2.2",
|
|
46
46
|
"spacy>=3.7",
|
|
47
47
|
"transformers>=4.40",
|
|
48
|
-
"
|
|
48
|
+
"optimum[onnxruntime]>=1.18",
|
|
49
49
|
]
|
|
50
50
|
|
|
51
51
|
[project.scripts]
|
|
52
52
|
pii-detect = "pii_detector.cli:main"
|
|
53
53
|
|
|
54
54
|
[project.urls]
|
|
55
|
-
Homepage = "https://github.com/sjain26/pii-
|
|
55
|
+
Homepage = "https://github.com/sjain26/pii-detector"
|
|
56
56
|
|
|
57
57
|
[tool.setuptools.packages.find]
|
|
58
58
|
include = ["pii_detector*"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|