admina-framework 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- admina/__init__.py +34 -0
- admina/cli/__init__.py +14 -0
- admina/cli/commands/__init__.py +14 -0
- admina/cli/main.py +1522 -0
- admina/cli/templates/admina.yaml.j2 +77 -0
- admina/cli/templates/docker-compose.yml.j2 +254 -0
- admina/cli/templates/env.j2 +10 -0
- admina/cli/templates/main.py.j2 +95 -0
- admina/cli/templates/plugin.py.j2 +145 -0
- admina/cli/templates/plugin_pyproject.toml.j2 +15 -0
- admina/cli/templates/plugin_readme.md.j2 +27 -0
- admina/cli/templates/plugin_test.py.j2 +48 -0
- admina/core/__init__.py +14 -0
- admina/core/config.py +497 -0
- admina/core/event_bus.py +112 -0
- admina/core/secrets.py +257 -0
- admina/core/types.py +146 -0
- admina/dashboard/__init__.py +8 -0
- admina/dashboard/static/heimdall.png +0 -0
- admina/dashboard/static/index.html +1045 -0
- admina/dashboard/static/vendor/alpinejs.min.js +5 -0
- admina/domains/__init__.py +14 -0
- admina/domains/agent_security/__init__.py +41 -0
- admina/domains/agent_security/firewall.py +634 -0
- admina/domains/agent_security/loop_breaker.py +176 -0
- admina/domains/ai_infra/__init__.py +79 -0
- admina/domains/ai_infra/llm_engine.py +477 -0
- admina/domains/ai_infra/rag.py +817 -0
- admina/domains/ai_infra/webui.py +292 -0
- admina/domains/compliance/__init__.py +109 -0
- admina/domains/compliance/cross_regulation.py +314 -0
- admina/domains/compliance/eu_ai_act.py +367 -0
- admina/domains/compliance/forensic.py +380 -0
- admina/domains/compliance/gdpr.py +331 -0
- admina/domains/compliance/nis2.py +258 -0
- admina/domains/compliance/oisg.py +658 -0
- admina/domains/compliance/otel.py +101 -0
- admina/domains/data_sovereignty/__init__.py +42 -0
- admina/domains/data_sovereignty/classification.py +102 -0
- admina/domains/data_sovereignty/pii.py +260 -0
- admina/domains/data_sovereignty/residency.py +121 -0
- admina/integrations/__init__.py +14 -0
- admina/integrations/_engines.py +63 -0
- admina/integrations/cheshirecat/__init__.py +13 -0
- admina/integrations/cheshirecat/admina-plugin/admina_governance.py +207 -0
- admina/integrations/crewai/__init__.py +13 -0
- admina/integrations/crewai/callbacks.py +347 -0
- admina/integrations/langchain/__init__.py +13 -0
- admina/integrations/langchain/callbacks.py +341 -0
- admina/integrations/n8n/__init__.py +14 -0
- admina/integrations/openclaw/__init__.py +14 -0
- admina/plugins/__init__.py +49 -0
- admina/plugins/base.py +633 -0
- admina/plugins/builtin/__init__.py +14 -0
- admina/plugins/builtin/adapters/__init__.py +14 -0
- admina/plugins/builtin/adapters/ollama.py +120 -0
- admina/plugins/builtin/adapters/openai.py +138 -0
- admina/plugins/builtin/alerts/__init__.py +14 -0
- admina/plugins/builtin/alerts/log.py +66 -0
- admina/plugins/builtin/alerts/webhook.py +102 -0
- admina/plugins/builtin/auth/__init__.py +14 -0
- admina/plugins/builtin/auth/apikey.py +138 -0
- admina/plugins/builtin/compliance/__init__.py +14 -0
- admina/plugins/builtin/compliance/eu_ai_act.py +202 -0
- admina/plugins/builtin/connectors/__init__.py +14 -0
- admina/plugins/builtin/connectors/chromadb.py +137 -0
- admina/plugins/builtin/connectors/filesystem.py +111 -0
- admina/plugins/builtin/forensic/__init__.py +14 -0
- admina/plugins/builtin/forensic/filesystem.py +163 -0
- admina/plugins/builtin/forensic/minio.py +180 -0
- admina/plugins/builtin/guards/__init__.py +0 -0
- admina/plugins/builtin/guards/guardrailsai_guard.py +172 -0
- admina/plugins/builtin/pii/__init__.py +14 -0
- admina/plugins/builtin/pii/spacy_regex.py +160 -0
- admina/plugins/builtin/transports/__init__.py +14 -0
- admina/plugins/builtin/transports/http_rest.py +97 -0
- admina/plugins/builtin/transports/mcp.py +173 -0
- admina/plugins/registry.py +356 -0
- admina/proxy/__init__.py +15 -0
- admina/proxy/api/__init__.py +17 -0
- admina/proxy/api/dashboard.py +925 -0
- admina/proxy/api/integration.py +153 -0
- admina/proxy/config.py +214 -0
- admina/proxy/engine_bridge.py +306 -0
- admina/proxy/governance.py +232 -0
- admina/proxy/main.py +1484 -0
- admina/proxy/multi_upstream.py +156 -0
- admina/proxy/state.py +97 -0
- admina/py.typed +0 -0
- admina/sdk/__init__.py +34 -0
- admina/sdk/_compat.py +43 -0
- admina/sdk/compliance_kit.py +359 -0
- admina/sdk/governed_agent.py +391 -0
- admina/sdk/governed_data.py +434 -0
- admina/sdk/governed_model.py +241 -0
- admina_framework-0.9.0.dist-info/METADATA +575 -0
- admina_framework-0.9.0.dist-info/RECORD +102 -0
- admina_framework-0.9.0.dist-info/WHEEL +5 -0
- admina_framework-0.9.0.dist-info/entry_points.txt +2 -0
- admina_framework-0.9.0.dist-info/licenses/LICENSE +191 -0
- admina_framework-0.9.0.dist-info/licenses/NOTICE +16 -0
- admina_framework-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Admina — OpenTelemetry governance observability.
|
|
16
|
+
|
|
17
|
+
Provides structured OTEL tracing for governance decisions.
|
|
18
|
+
Every governance domain decision emits a span with action, risk level, latency.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("admina.compliance.otel")
|
|
27
|
+
|
|
28
|
+
# Try to import OTEL — optional dependency at module level
|
|
29
|
+
try:
|
|
30
|
+
from opentelemetry import trace
|
|
31
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
32
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
33
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
34
|
+
|
|
35
|
+
_OTEL_AVAILABLE = True
|
|
36
|
+
except ImportError:
|
|
37
|
+
_OTEL_AVAILABLE = False
|
|
38
|
+
logger.info("OpenTelemetry not installed — governance tracing disabled")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OTELGovernanceExporter:
|
|
42
|
+
"""Exports governance decisions as OTEL spans.
|
|
43
|
+
|
|
44
|
+
If OTEL SDK is not installed, all methods are no-ops.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
endpoint: OTLP gRPC endpoint (e.g., "http://localhost:4317").
|
|
48
|
+
service_name: Service name for the tracer.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
endpoint: str = "http://localhost:4317",
|
|
54
|
+
service_name: str = "admina-governance",
|
|
55
|
+
) -> None:
|
|
56
|
+
self._enabled = _OTEL_AVAILABLE
|
|
57
|
+
self._tracer = None
|
|
58
|
+
if self._enabled:
|
|
59
|
+
try:
|
|
60
|
+
provider = TracerProvider()
|
|
61
|
+
exporter = OTLPSpanExporter(endpoint=endpoint, insecure=True)
|
|
62
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
63
|
+
trace.set_tracer_provider(provider)
|
|
64
|
+
self._tracer = trace.get_tracer(service_name)
|
|
65
|
+
logger.info("OTEL exporter initialized -> %s", endpoint)
|
|
66
|
+
except (OSError, RuntimeError, ValueError) as exc:
|
|
67
|
+
logger.warning("OTEL initialization failed: %s", exc)
|
|
68
|
+
self._enabled = False
|
|
69
|
+
|
|
70
|
+
def trace_governance_decision(
|
|
71
|
+
self,
|
|
72
|
+
*,
|
|
73
|
+
domain: str,
|
|
74
|
+
action: str,
|
|
75
|
+
risk_level: str,
|
|
76
|
+
latency_us: float,
|
|
77
|
+
session_id: str | None = None,
|
|
78
|
+
metadata: dict[str, Any] | None = None,
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Record a governance decision as an OTEL span."""
|
|
81
|
+
if not self._enabled or self._tracer is None:
|
|
82
|
+
return
|
|
83
|
+
with self._tracer.start_as_current_span(f"governance.{domain}") as span:
|
|
84
|
+
span.set_attribute("admina.domain", domain)
|
|
85
|
+
span.set_attribute("admina.action", action)
|
|
86
|
+
span.set_attribute("admina.risk_level", risk_level)
|
|
87
|
+
span.set_attribute("admina.latency_us", latency_us)
|
|
88
|
+
if session_id:
|
|
89
|
+
span.set_attribute("admina.session_id", session_id)
|
|
90
|
+
if metadata:
|
|
91
|
+
for k, v in metadata.items():
|
|
92
|
+
span.set_attribute(f"admina.meta.{k}", str(v))
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def enabled(self) -> bool:
|
|
96
|
+
"""Whether OTEL export is active."""
|
|
97
|
+
return self._enabled
|
|
98
|
+
|
|
99
|
+
def get_stats(self) -> dict[str, Any]:
|
|
100
|
+
"""Return exporter status for diagnostics."""
|
|
101
|
+
return {"enabled": self._enabled, "engine": "otel"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Admina — Data Sovereignty Domain.
|
|
16
|
+
|
|
17
|
+
PII redaction, data residency, and classification.
|
|
18
|
+
|
|
19
|
+
PIIRedactor depends on ``spacy`` (the ``[nlp]`` extra) and is loaded
|
|
20
|
+
lazily via PEP 562 ``__getattr__`` so importing this package never
|
|
21
|
+
fails on a pure-SDK install.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from typing import TYPE_CHECKING
|
|
27
|
+
|
|
28
|
+
from admina.domains.data_sovereignty.classification import DataClassifier, SensitivityLevel
|
|
29
|
+
from admina.domains.data_sovereignty.residency import ResidencyEnforcer
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
32
|
+
from admina.domains.data_sovereignty.pii import PIIRedactor
|
|
33
|
+
|
|
34
|
+
__all__ = ["PIIRedactor", "ResidencyEnforcer", "DataClassifier", "SensitivityLevel"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __getattr__(name: str):
|
|
38
|
+
if name == "PIIRedactor":
|
|
39
|
+
from admina.domains.data_sovereignty.pii import PIIRedactor
|
|
40
|
+
|
|
41
|
+
return PIIRedactor
|
|
42
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Admina — Automatic data sensitivity classification.
|
|
16
|
+
|
|
17
|
+
Tags data as public/internal/confidential/restricted based on PII scan
|
|
18
|
+
results and configurable rules.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from enum import Enum
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("admina.data_sovereignty.classification")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SensitivityLevel(str, Enum):
|
|
31
|
+
"""Data sensitivity levels."""
|
|
32
|
+
|
|
33
|
+
PUBLIC = "public"
|
|
34
|
+
INTERNAL = "internal"
|
|
35
|
+
CONFIDENTIAL = "confidential"
|
|
36
|
+
RESTRICTED = "restricted"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# PII categories that trigger elevated classification
|
|
40
|
+
_CONFIDENTIAL_PII = {"credit_card", "ssn", "iban"}
|
|
41
|
+
_RESTRICTED_PII = {"medical", "biometric", "criminal"}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class DataClassifier:
|
|
45
|
+
"""Classifies data sensitivity based on PII scan results.
|
|
46
|
+
|
|
47
|
+
Uses the output of PIIRedactor.redact() to determine the sensitivity
|
|
48
|
+
level of a data record. Integrated into GovernedData.ingest().
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
default_level: Sensitivity level when no PII is detected.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, default_level: SensitivityLevel = SensitivityLevel.INTERNAL) -> None:
|
|
55
|
+
self._default_level = default_level
|
|
56
|
+
self._classifications_total = 0
|
|
57
|
+
|
|
58
|
+
def classify(
|
|
59
|
+
self, *, pii_categories: list[str] | None = None, text: str = ""
|
|
60
|
+
) -> dict[str, Any]:
|
|
61
|
+
"""Classify data sensitivity.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
pii_categories: List of PII category names found (e.g., ["email", "phone"]).
|
|
65
|
+
text: Original text (not used for classification, available for custom rules).
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dict with ``level`` (SensitivityLevel), ``reason``, ``pii_found``.
|
|
69
|
+
"""
|
|
70
|
+
self._classifications_total += 1
|
|
71
|
+
categories = set(pii_categories or [])
|
|
72
|
+
|
|
73
|
+
if categories & _RESTRICTED_PII:
|
|
74
|
+
return {
|
|
75
|
+
"level": SensitivityLevel.RESTRICTED.value,
|
|
76
|
+
"reason": f"Contains restricted PII: {categories & _RESTRICTED_PII}",
|
|
77
|
+
"pii_found": list(categories),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if categories & _CONFIDENTIAL_PII:
|
|
81
|
+
return {
|
|
82
|
+
"level": SensitivityLevel.CONFIDENTIAL.value,
|
|
83
|
+
"reason": f"Contains confidential PII: {categories & _CONFIDENTIAL_PII}",
|
|
84
|
+
"pii_found": list(categories),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if categories:
|
|
88
|
+
return {
|
|
89
|
+
"level": SensitivityLevel.CONFIDENTIAL.value,
|
|
90
|
+
"reason": f"Contains PII: {categories}",
|
|
91
|
+
"pii_found": list(categories),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
"level": self._default_level.value,
|
|
96
|
+
"reason": "No PII detected",
|
|
97
|
+
"pii_found": [],
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
def get_stats(self) -> dict[str, Any]:
|
|
101
|
+
"""Return classification statistics."""
|
|
102
|
+
return {"classifications_total": self._classifications_total}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
Admina — PII Redaction Engine — Data Sovereignty domain
|
|
17
|
+
Bidirectional masking of sensitive data on requests and responses.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
|
|
24
|
+
import spacy
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("admina.pii_redactor")
|
|
27
|
+
|
|
28
|
+
# NLP model used for NER-based PII detection.
|
|
29
|
+
# Priority: admina.yaml `pii.ner_model` > ADMINA_SPACY_MODEL env var > default.
|
|
30
|
+
SPACY_MODEL = os.environ.get("ADMINA_SPACY_MODEL", "en_core_web_sm")
|
|
31
|
+
|
|
32
|
+
# Mapping from the short category names used in admina.yaml to PII_CATEGORIES keys.
|
|
33
|
+
# Allows the YAML list ["email", "ssn", ...] to toggle entries in PII_CATEGORIES.
|
|
34
|
+
_YAML_CATEGORY_MAP: dict[str, str] = {
|
|
35
|
+
"email": "EMAIL",
|
|
36
|
+
"phone": "PHONE",
|
|
37
|
+
"credit_card": "CREDIT_CARD",
|
|
38
|
+
"ssn": "SSN",
|
|
39
|
+
"iban": "IBAN",
|
|
40
|
+
"ip": "IP_ADDRESS",
|
|
41
|
+
"person": "PERSON",
|
|
42
|
+
"org": "ORG",
|
|
43
|
+
"gpe": "GPE",
|
|
44
|
+
"loc": "LOC",
|
|
45
|
+
"dob": "DATE_OF_BIRTH",
|
|
46
|
+
"it_codice_fiscale": "IT_CODICE_FISCALE",
|
|
47
|
+
"es_dni": "ES_DNI_NIE",
|
|
48
|
+
"es_nie": "ES_DNI_NIE",
|
|
49
|
+
"de_personalausweis": "DE_PERSONALAUSWEIS",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# Categories of PII that can be individually configured
|
|
53
|
+
PII_CATEGORIES = {
|
|
54
|
+
"PERSON": {"enabled": True, "mask": "[PERSON]"},
|
|
55
|
+
"ORG": {"enabled": True, "mask": "[ORG]"},
|
|
56
|
+
"GPE": {"enabled": True, "mask": "[LOCATION]"}, # Geopolitical entities
|
|
57
|
+
"LOC": {"enabled": True, "mask": "[LOCATION]"},
|
|
58
|
+
"EMAIL": {"enabled": True, "mask": "[EMAIL]"},
|
|
59
|
+
"PHONE": {"enabled": True, "mask": "[PHONE]"},
|
|
60
|
+
"CREDIT_CARD": {"enabled": True, "mask": "[CREDIT_CARD]"},
|
|
61
|
+
"SSN": {"enabled": True, "mask": "[SSN]"},
|
|
62
|
+
"IBAN": {"enabled": True, "mask": "[IBAN]"},
|
|
63
|
+
"IP_ADDRESS": {"enabled": True, "mask": "[IP_ADDR]"},
|
|
64
|
+
"DATE_OF_BIRTH": {"enabled": True, "mask": "[DOB]"},
|
|
65
|
+
# EU national identifiers — opt-in but on by default to match the
|
|
66
|
+
# framework's EU-first positioning.
|
|
67
|
+
"IT_CODICE_FISCALE": {"enabled": True, "mask": "[CF]"},
|
|
68
|
+
"ES_DNI_NIE": {"enabled": True, "mask": "[DNI]"},
|
|
69
|
+
"DE_PERSONALAUSWEIS": {"enabled": False, "mask": "[AUSWEIS]"}, # off — ambiguous regex
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Regex patterns for PII not covered by spaCy NER
|
|
73
|
+
REGEX_PII_PATTERNS = {
|
|
74
|
+
"EMAIL": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"),
|
|
75
|
+
"PHONE": re.compile(r"(?<!\d)(\+\d{1,3}[\s.-]?)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}(?!\d)"),
|
|
76
|
+
"CREDIT_CARD": re.compile(r"\b(?:\d{4}[-\s]?){3}\d{4}\b"),
|
|
77
|
+
"SSN": re.compile(r"\b\d{3}-\d{2}-\d{4}\b"),
|
|
78
|
+
"IBAN": re.compile(
|
|
79
|
+
r"\b[A-Z]{2}\d{2}\s?[\dA-Z]{4}\s?[\dA-Z]{4}\s?[\dA-Z]{4}(?:\s?[\dA-Z]{4}){0,4}\b"
|
|
80
|
+
),
|
|
81
|
+
# IPv4 with proper octet validation (each octet 0-255). Avoids matching
|
|
82
|
+
# version strings like 1.2.3.999 or build numbers > 255.
|
|
83
|
+
"IP_ADDRESS": re.compile(
|
|
84
|
+
r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
|
|
85
|
+
r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"
|
|
86
|
+
),
|
|
87
|
+
# Italian Codice Fiscale (16 chars: 6 letters + 2 digits + 1 letter +
|
|
88
|
+
# 2 digits + 1 letter + 3 alphanumeric + 1 letter)
|
|
89
|
+
"IT_CODICE_FISCALE": re.compile(
|
|
90
|
+
r"\b[A-Z]{6}\d{2}[A-EHLMPRST]\d{2}[A-Z]\d{3}[A-Z]\b",
|
|
91
|
+
re.IGNORECASE,
|
|
92
|
+
),
|
|
93
|
+
# Spanish DNI/NIE: 8 digits + 1 letter (DNI) or X/Y/Z + 7 digits + letter (NIE)
|
|
94
|
+
"ES_DNI_NIE": re.compile(
|
|
95
|
+
r"\b(?:\d{8}|[XYZ]\d{7})[-\s]?[A-Z]\b",
|
|
96
|
+
re.IGNORECASE,
|
|
97
|
+
),
|
|
98
|
+
# German Personalausweis: 10 chars (alphanumeric, no I/O)
|
|
99
|
+
# NB: deliberately conservative — exact format varies by issue date.
|
|
100
|
+
"DE_PERSONALAUSWEIS": re.compile(r"\b[CFGHJKLMNPRTVWXYZ\d]{10}\b"),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Context windows used to filter version-string false positives on IPv4.
|
|
104
|
+
# Looks at the ~24 chars preceding/following the candidate match.
|
|
105
|
+
_VERSION_PREFIX_RX = re.compile(
|
|
106
|
+
r"\b(?:version|versione|versión|ver|v|release|build|revision|rev|"
|
|
107
|
+
r"update|patch|firmware|api[\s-]?v)\.?\s*$",
|
|
108
|
+
re.IGNORECASE,
|
|
109
|
+
)
|
|
110
|
+
_VERSION_SUFFIX_RX = re.compile(
|
|
111
|
+
r"^\s*(?:released|build|revision|update|patch|published|"
|
|
112
|
+
r"\(.*?\)|out|stable|rc\d|beta|alpha)",
|
|
113
|
+
re.IGNORECASE,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _is_real_ipv4(text: str, start: int, end: int) -> bool:
|
|
118
|
+
"""Heuristic: skip IP-shaped numbers that are clearly version strings.
|
|
119
|
+
|
|
120
|
+
Returns True if the match at [start:end] looks like a network address,
|
|
121
|
+
False if it appears in a version-number context (e.g. "version 1.2.3.4",
|
|
122
|
+
"released 1.2.3.4", "v1.2.3.4 stable").
|
|
123
|
+
"""
|
|
124
|
+
prefix = text[max(0, start - 24) : start]
|
|
125
|
+
if _VERSION_PREFIX_RX.search(prefix):
|
|
126
|
+
return False
|
|
127
|
+
suffix = text[end : end + 24]
|
|
128
|
+
if _VERSION_SUFFIX_RX.match(suffix):
|
|
129
|
+
return False
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# Per-category mask for the new EU IDs (used by PIIRedactor when not overridden)
|
|
134
|
+
_DEFAULT_EU_ID_MASKS = {
|
|
135
|
+
"IT_CODICE_FISCALE": "[CF]",
|
|
136
|
+
"ES_DNI_NIE": "[DNI]",
|
|
137
|
+
"DE_PERSONALAUSWEIS": "[AUSWEIS]",
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class PIIRedactor:
|
|
142
|
+
"""
|
|
143
|
+
Bidirectional PII redaction engine.
|
|
144
|
+
Uses spaCy NER + regex patterns for comprehensive coverage.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
config: Optional PIIConfig (or any object with `.ner_model` / `.categories`
|
|
148
|
+
attributes) loaded from admina.yaml. When supplied, its values
|
|
149
|
+
take precedence over the module-level defaults and env vars.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, config=None):
|
|
153
|
+
# Resolve NLP model: config.ner_model > ADMINA_SPACY_MODEL env var > default
|
|
154
|
+
model_name = getattr(config, "ner_model", None) or SPACY_MODEL
|
|
155
|
+
try:
|
|
156
|
+
self.nlp = spacy.load(model_name)
|
|
157
|
+
logger.info("[OK] spaCy model loaded: %s", model_name)
|
|
158
|
+
except OSError:
|
|
159
|
+
logger.warning("[WARN] spaCy model '%s' not found, using regex-only mode", model_name)
|
|
160
|
+
self.nlp = None
|
|
161
|
+
|
|
162
|
+
# Build active categories: start from PII_CATEGORIES defaults, then
|
|
163
|
+
# disable any category not listed in config.categories (if provided).
|
|
164
|
+
if config is not None and getattr(config, "categories", None) is not None:
|
|
165
|
+
enabled_keys = {_YAML_CATEGORY_MAP.get(c.lower(), c.upper()) for c in config.categories}
|
|
166
|
+
self._active_categories = {
|
|
167
|
+
k: {**v, "enabled": k in enabled_keys} for k, v in PII_CATEGORIES.items()
|
|
168
|
+
}
|
|
169
|
+
else:
|
|
170
|
+
self._active_categories = PII_CATEGORIES
|
|
171
|
+
|
|
172
|
+
self.total_redacted: int = 0
|
|
173
|
+
self.redactions_by_type: dict[str, int] = {}
|
|
174
|
+
|
|
175
|
+
def redact(self, text: str, categories: dict | None = None) -> dict:
|
|
176
|
+
"""
|
|
177
|
+
Redact PII from text.
|
|
178
|
+
Returns: {redacted_text: str, entities: [...], count: int}
|
|
179
|
+
"""
|
|
180
|
+
if not text:
|
|
181
|
+
return {"redacted_text": text, "entities": [], "count": 0}
|
|
182
|
+
|
|
183
|
+
active_categories = categories or self._active_categories
|
|
184
|
+
entities_found = []
|
|
185
|
+
redacted = text
|
|
186
|
+
|
|
187
|
+
# Step 1 — regex-based detection (higher precision for structured PII)
|
|
188
|
+
for cat_name, pattern in REGEX_PII_PATTERNS.items():
|
|
189
|
+
cat_config = active_categories.get(cat_name, {})
|
|
190
|
+
if not cat_config.get("enabled", True):
|
|
191
|
+
continue
|
|
192
|
+
mask = cat_config.get("mask", f"[{cat_name}]")
|
|
193
|
+
|
|
194
|
+
# Find all matches; for IP_ADDRESS, drop version-string matches
|
|
195
|
+
# (e.g. "version 1.2.3.4 released") to reduce false positives.
|
|
196
|
+
matches = list(pattern.finditer(redacted))
|
|
197
|
+
if cat_name == "IP_ADDRESS":
|
|
198
|
+
matches = [m for m in matches if _is_real_ipv4(redacted, m.start(), m.end())]
|
|
199
|
+
|
|
200
|
+
if not matches:
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
for match in matches:
|
|
204
|
+
entities_found.append(
|
|
205
|
+
{
|
|
206
|
+
"type": cat_name,
|
|
207
|
+
"start": match.start(),
|
|
208
|
+
"end": match.end(),
|
|
209
|
+
"original_length": match.end() - match.start(),
|
|
210
|
+
"method": "regex",
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Replace in reverse order to preserve byte offsets of earlier matches.
|
|
215
|
+
for match in sorted(matches, key=lambda m: m.start(), reverse=True):
|
|
216
|
+
redacted = redacted[: match.start()] + mask + redacted[match.end() :]
|
|
217
|
+
|
|
218
|
+
# Step 2 — spaCy NER-based detection
|
|
219
|
+
if self.nlp:
|
|
220
|
+
doc = self.nlp(redacted)
|
|
221
|
+
# Process entities in reverse order to maintain positions
|
|
222
|
+
ner_entities = sorted(doc.ents, key=lambda e: e.start_char, reverse=True)
|
|
223
|
+
for ent in ner_entities:
|
|
224
|
+
cat_config = active_categories.get(ent.label_, {})
|
|
225
|
+
if not cat_config.get("enabled", False):
|
|
226
|
+
continue
|
|
227
|
+
mask = cat_config.get("mask", f"[{ent.label_}]")
|
|
228
|
+
entities_found.append(
|
|
229
|
+
{
|
|
230
|
+
"type": ent.label_,
|
|
231
|
+
"start": ent.start_char,
|
|
232
|
+
"end": ent.end_char,
|
|
233
|
+
"original_length": ent.end_char - ent.start_char,
|
|
234
|
+
"method": "spacy_ner",
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
redacted = redacted[: ent.start_char] + mask + redacted[ent.end_char :]
|
|
238
|
+
|
|
239
|
+
count = len(entities_found)
|
|
240
|
+
if count > 0:
|
|
241
|
+
self.total_redacted += count
|
|
242
|
+
for e in entities_found:
|
|
243
|
+
t = e["type"]
|
|
244
|
+
self.redactions_by_type[t] = self.redactions_by_type.get(t, 0) + 1
|
|
245
|
+
logger.info(
|
|
246
|
+
"[REDACTED] %d PII entities: %s", count, [e["type"] for e in entities_found]
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"redacted_text": redacted,
|
|
251
|
+
"entities": entities_found,
|
|
252
|
+
"count": count,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
def get_stats(self) -> dict:
|
|
256
|
+
return {
|
|
257
|
+
"total_redacted": self.total_redacted,
|
|
258
|
+
"redactions_by_type": self.redactions_by_type,
|
|
259
|
+
"spacy_available": self.nlp is not None,
|
|
260
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Admina — Data residency enforcement.
|
|
16
|
+
|
|
17
|
+
Ensures data stays within allowed geographic/logical zones.
|
|
18
|
+
Blocks outbound transfers that violate zone policy.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("admina.data_sovereignty.residency")
|
|
28
|
+
|
|
29
|
+
# Default configuration
|
|
30
|
+
_DEFAULT_ALLOWED_ZONES = ["local", "eu"]
|
|
31
|
+
_DEFAULT_BLOCK_OUTBOUND = True
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ResidencyViolation:
|
|
36
|
+
"""A residency policy violation."""
|
|
37
|
+
|
|
38
|
+
source_zone: str
|
|
39
|
+
target_zone: str
|
|
40
|
+
reason: str
|
|
41
|
+
blocked: bool = True
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ResidencyEnforcer:
|
|
45
|
+
"""Enforces data residency policies.
|
|
46
|
+
|
|
47
|
+
Validates that data operations stay within allowed zones.
|
|
48
|
+
Used by GovernedData to check every ingest/query operation.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
allowed_zones: List of permitted zone identifiers.
|
|
52
|
+
block_outbound: Whether to block transfers outside allowed zones.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
allowed_zones: list[str] | None = None,
|
|
58
|
+
block_outbound: bool = _DEFAULT_BLOCK_OUTBOUND,
|
|
59
|
+
) -> None:
|
|
60
|
+
self._allowed_zones = allowed_zones or list(_DEFAULT_ALLOWED_ZONES)
|
|
61
|
+
self._block_outbound = block_outbound
|
|
62
|
+
self._checks_total = 0
|
|
63
|
+
self._violations_total = 0
|
|
64
|
+
|
|
65
|
+
def check(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
source_zone: str = "local",
|
|
69
|
+
target_zone: str | None = None,
|
|
70
|
+
data_type: str = "unknown",
|
|
71
|
+
) -> dict[str, Any]:
|
|
72
|
+
"""Check whether a data operation complies with residency policy.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
source_zone: Where the data currently resides.
|
|
76
|
+
target_zone: Where the data would move (None = stays in place).
|
|
77
|
+
data_type: Category of data for logging.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dict with ``allowed`` (bool), ``zone`` info, and optional ``violation``.
|
|
81
|
+
"""
|
|
82
|
+
self._checks_total += 1
|
|
83
|
+
effective_target = target_zone or source_zone
|
|
84
|
+
|
|
85
|
+
# Check source zone is allowed
|
|
86
|
+
if source_zone not in self._allowed_zones:
|
|
87
|
+
self._violations_total += 1
|
|
88
|
+
return {
|
|
89
|
+
"allowed": False,
|
|
90
|
+
"source_zone": source_zone,
|
|
91
|
+
"target_zone": effective_target,
|
|
92
|
+
"violation": f"Source zone '{source_zone}' not in allowed zones: {self._allowed_zones}",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Check target zone if different from source
|
|
96
|
+
if target_zone and target_zone != source_zone:
|
|
97
|
+
if target_zone not in self._allowed_zones:
|
|
98
|
+
self._violations_total += 1
|
|
99
|
+
blocked = self._block_outbound
|
|
100
|
+
return {
|
|
101
|
+
"allowed": not blocked,
|
|
102
|
+
"source_zone": source_zone,
|
|
103
|
+
"target_zone": target_zone,
|
|
104
|
+
"violation": f"Transfer to zone '{target_zone}' blocked (allowed: {self._allowed_zones})",
|
|
105
|
+
"blocked": blocked,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
"allowed": True,
|
|
110
|
+
"source_zone": source_zone,
|
|
111
|
+
"target_zone": effective_target,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
def get_stats(self) -> dict[str, Any]:
|
|
115
|
+
"""Return enforcement statistics."""
|
|
116
|
+
return {
|
|
117
|
+
"allowed_zones": self._allowed_zones,
|
|
118
|
+
"block_outbound": self._block_outbound,
|
|
119
|
+
"checks_total": self._checks_total,
|
|
120
|
+
"violations_total": self._violations_total,
|
|
121
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|