admina-framework 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. admina/__init__.py +34 -0
  2. admina/cli/__init__.py +14 -0
  3. admina/cli/commands/__init__.py +14 -0
  4. admina/cli/main.py +1522 -0
  5. admina/cli/templates/admina.yaml.j2 +77 -0
  6. admina/cli/templates/docker-compose.yml.j2 +254 -0
  7. admina/cli/templates/env.j2 +10 -0
  8. admina/cli/templates/main.py.j2 +95 -0
  9. admina/cli/templates/plugin.py.j2 +145 -0
  10. admina/cli/templates/plugin_pyproject.toml.j2 +15 -0
  11. admina/cli/templates/plugin_readme.md.j2 +27 -0
  12. admina/cli/templates/plugin_test.py.j2 +48 -0
  13. admina/core/__init__.py +14 -0
  14. admina/core/config.py +497 -0
  15. admina/core/event_bus.py +112 -0
  16. admina/core/secrets.py +257 -0
  17. admina/core/types.py +146 -0
  18. admina/dashboard/__init__.py +8 -0
  19. admina/dashboard/static/heimdall.png +0 -0
  20. admina/dashboard/static/index.html +1045 -0
  21. admina/dashboard/static/vendor/alpinejs.min.js +5 -0
  22. admina/domains/__init__.py +14 -0
  23. admina/domains/agent_security/__init__.py +41 -0
  24. admina/domains/agent_security/firewall.py +634 -0
  25. admina/domains/agent_security/loop_breaker.py +176 -0
  26. admina/domains/ai_infra/__init__.py +79 -0
  27. admina/domains/ai_infra/llm_engine.py +477 -0
  28. admina/domains/ai_infra/rag.py +817 -0
  29. admina/domains/ai_infra/webui.py +292 -0
  30. admina/domains/compliance/__init__.py +109 -0
  31. admina/domains/compliance/cross_regulation.py +314 -0
  32. admina/domains/compliance/eu_ai_act.py +367 -0
  33. admina/domains/compliance/forensic.py +380 -0
  34. admina/domains/compliance/gdpr.py +331 -0
  35. admina/domains/compliance/nis2.py +258 -0
  36. admina/domains/compliance/oisg.py +658 -0
  37. admina/domains/compliance/otel.py +101 -0
  38. admina/domains/data_sovereignty/__init__.py +42 -0
  39. admina/domains/data_sovereignty/classification.py +102 -0
  40. admina/domains/data_sovereignty/pii.py +260 -0
  41. admina/domains/data_sovereignty/residency.py +121 -0
  42. admina/integrations/__init__.py +14 -0
  43. admina/integrations/_engines.py +63 -0
  44. admina/integrations/cheshirecat/__init__.py +13 -0
  45. admina/integrations/cheshirecat/admina-plugin/admina_governance.py +207 -0
  46. admina/integrations/crewai/__init__.py +13 -0
  47. admina/integrations/crewai/callbacks.py +347 -0
  48. admina/integrations/langchain/__init__.py +13 -0
  49. admina/integrations/langchain/callbacks.py +341 -0
  50. admina/integrations/n8n/__init__.py +14 -0
  51. admina/integrations/openclaw/__init__.py +14 -0
  52. admina/plugins/__init__.py +49 -0
  53. admina/plugins/base.py +633 -0
  54. admina/plugins/builtin/__init__.py +14 -0
  55. admina/plugins/builtin/adapters/__init__.py +14 -0
  56. admina/plugins/builtin/adapters/ollama.py +120 -0
  57. admina/plugins/builtin/adapters/openai.py +138 -0
  58. admina/plugins/builtin/alerts/__init__.py +14 -0
  59. admina/plugins/builtin/alerts/log.py +66 -0
  60. admina/plugins/builtin/alerts/webhook.py +102 -0
  61. admina/plugins/builtin/auth/__init__.py +14 -0
  62. admina/plugins/builtin/auth/apikey.py +138 -0
  63. admina/plugins/builtin/compliance/__init__.py +14 -0
  64. admina/plugins/builtin/compliance/eu_ai_act.py +202 -0
  65. admina/plugins/builtin/connectors/__init__.py +14 -0
  66. admina/plugins/builtin/connectors/chromadb.py +137 -0
  67. admina/plugins/builtin/connectors/filesystem.py +111 -0
  68. admina/plugins/builtin/forensic/__init__.py +14 -0
  69. admina/plugins/builtin/forensic/filesystem.py +163 -0
  70. admina/plugins/builtin/forensic/minio.py +180 -0
  71. admina/plugins/builtin/guards/__init__.py +0 -0
  72. admina/plugins/builtin/guards/guardrailsai_guard.py +172 -0
  73. admina/plugins/builtin/pii/__init__.py +14 -0
  74. admina/plugins/builtin/pii/spacy_regex.py +160 -0
  75. admina/plugins/builtin/transports/__init__.py +14 -0
  76. admina/plugins/builtin/transports/http_rest.py +97 -0
  77. admina/plugins/builtin/transports/mcp.py +173 -0
  78. admina/plugins/registry.py +356 -0
  79. admina/proxy/__init__.py +15 -0
  80. admina/proxy/api/__init__.py +17 -0
  81. admina/proxy/api/dashboard.py +925 -0
  82. admina/proxy/api/integration.py +153 -0
  83. admina/proxy/config.py +214 -0
  84. admina/proxy/engine_bridge.py +306 -0
  85. admina/proxy/governance.py +232 -0
  86. admina/proxy/main.py +1484 -0
  87. admina/proxy/multi_upstream.py +156 -0
  88. admina/proxy/state.py +97 -0
  89. admina/py.typed +0 -0
  90. admina/sdk/__init__.py +34 -0
  91. admina/sdk/_compat.py +43 -0
  92. admina/sdk/compliance_kit.py +359 -0
  93. admina/sdk/governed_agent.py +391 -0
  94. admina/sdk/governed_data.py +434 -0
  95. admina/sdk/governed_model.py +241 -0
  96. admina_framework-0.9.0.dist-info/METADATA +575 -0
  97. admina_framework-0.9.0.dist-info/RECORD +102 -0
  98. admina_framework-0.9.0.dist-info/WHEEL +5 -0
  99. admina_framework-0.9.0.dist-info/entry_points.txt +2 -0
  100. admina_framework-0.9.0.dist-info/licenses/LICENSE +191 -0
  101. admina_framework-0.9.0.dist-info/licenses/NOTICE +16 -0
  102. admina_framework-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,101 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Admina — OpenTelemetry governance observability.
16
+
17
+ Provides structured OTEL tracing for governance decisions.
18
+ Every governance domain decision emits a span with action, risk level, latency.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ from typing import Any
25
+
26
+ logger = logging.getLogger("admina.compliance.otel")
27
+
28
+ # Try to import OTEL — optional dependency at module level
29
+ try:
30
+ from opentelemetry import trace
31
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
32
+ from opentelemetry.sdk.trace import TracerProvider
33
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
34
+
35
+ _OTEL_AVAILABLE = True
36
+ except ImportError:
37
+ _OTEL_AVAILABLE = False
38
+ logger.info("OpenTelemetry not installed — governance tracing disabled")
39
+
40
+
41
+ class OTELGovernanceExporter:
42
+ """Exports governance decisions as OTEL spans.
43
+
44
+ If OTEL SDK is not installed, all methods are no-ops.
45
+
46
+ Args:
47
+ endpoint: OTLP gRPC endpoint (e.g., "http://localhost:4317").
48
+ service_name: Service name for the tracer.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ endpoint: str = "http://localhost:4317",
54
+ service_name: str = "admina-governance",
55
+ ) -> None:
56
+ self._enabled = _OTEL_AVAILABLE
57
+ self._tracer = None
58
+ if self._enabled:
59
+ try:
60
+ provider = TracerProvider()
61
+ exporter = OTLPSpanExporter(endpoint=endpoint, insecure=True)
62
+ provider.add_span_processor(BatchSpanProcessor(exporter))
63
+ trace.set_tracer_provider(provider)
64
+ self._tracer = trace.get_tracer(service_name)
65
+ logger.info("OTEL exporter initialized -> %s", endpoint)
66
+ except (OSError, RuntimeError, ValueError) as exc:
67
+ logger.warning("OTEL initialization failed: %s", exc)
68
+ self._enabled = False
69
+
70
+ def trace_governance_decision(
71
+ self,
72
+ *,
73
+ domain: str,
74
+ action: str,
75
+ risk_level: str,
76
+ latency_us: float,
77
+ session_id: str | None = None,
78
+ metadata: dict[str, Any] | None = None,
79
+ ) -> None:
80
+ """Record a governance decision as an OTEL span."""
81
+ if not self._enabled or self._tracer is None:
82
+ return
83
+ with self._tracer.start_as_current_span(f"governance.{domain}") as span:
84
+ span.set_attribute("admina.domain", domain)
85
+ span.set_attribute("admina.action", action)
86
+ span.set_attribute("admina.risk_level", risk_level)
87
+ span.set_attribute("admina.latency_us", latency_us)
88
+ if session_id:
89
+ span.set_attribute("admina.session_id", session_id)
90
+ if metadata:
91
+ for k, v in metadata.items():
92
+ span.set_attribute(f"admina.meta.{k}", str(v))
93
+
94
+ @property
95
+ def enabled(self) -> bool:
96
+ """Whether OTEL export is active."""
97
+ return self._enabled
98
+
99
+ def get_stats(self) -> dict[str, Any]:
100
+ """Return exporter status for diagnostics."""
101
+ return {"enabled": self._enabled, "engine": "otel"}
@@ -0,0 +1,42 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Admina — Data Sovereignty Domain.
16
+
17
+ PII redaction, data residency, and classification.
18
+
19
+ PIIRedactor depends on ``spacy`` (the ``[nlp]`` extra) and is loaded
20
+ lazily via PEP 562 ``__getattr__`` so importing this package never
21
+ fails on a pure-SDK install.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from typing import TYPE_CHECKING
27
+
28
+ from admina.domains.data_sovereignty.classification import DataClassifier, SensitivityLevel
29
+ from admina.domains.data_sovereignty.residency import ResidencyEnforcer
30
+
31
+ if TYPE_CHECKING: # pragma: no cover
32
+ from admina.domains.data_sovereignty.pii import PIIRedactor
33
+
34
+ __all__ = ["PIIRedactor", "ResidencyEnforcer", "DataClassifier", "SensitivityLevel"]
35
+
36
+
37
+ def __getattr__(name: str):
38
+ if name == "PIIRedactor":
39
+ from admina.domains.data_sovereignty.pii import PIIRedactor
40
+
41
+ return PIIRedactor
42
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,102 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Admina — Automatic data sensitivity classification.
16
+
17
+ Tags data as public/internal/confidential/restricted based on PII scan
18
+ results and configurable rules.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ from enum import Enum
25
+ from typing import Any
26
+
27
+ logger = logging.getLogger("admina.data_sovereignty.classification")
28
+
29
+
30
+ class SensitivityLevel(str, Enum):
31
+ """Data sensitivity levels."""
32
+
33
+ PUBLIC = "public"
34
+ INTERNAL = "internal"
35
+ CONFIDENTIAL = "confidential"
36
+ RESTRICTED = "restricted"
37
+
38
+
39
+ # PII categories that trigger elevated classification
40
+ _CONFIDENTIAL_PII = {"credit_card", "ssn", "iban"}
41
+ _RESTRICTED_PII = {"medical", "biometric", "criminal"}
42
+
43
+
44
+ class DataClassifier:
45
+ """Classifies data sensitivity based on PII scan results.
46
+
47
+ Uses the output of PIIRedactor.redact() to determine the sensitivity
48
+ level of a data record. Integrated into GovernedData.ingest().
49
+
50
+ Args:
51
+ default_level: Sensitivity level when no PII is detected.
52
+ """
53
+
54
+ def __init__(self, default_level: SensitivityLevel = SensitivityLevel.INTERNAL) -> None:
55
+ self._default_level = default_level
56
+ self._classifications_total = 0
57
+
58
+ def classify(
59
+ self, *, pii_categories: list[str] | None = None, text: str = ""
60
+ ) -> dict[str, Any]:
61
+ """Classify data sensitivity.
62
+
63
+ Args:
64
+ pii_categories: List of PII category names found (e.g., ["email", "phone"]).
65
+ text: Original text (not used for classification, available for custom rules).
66
+
67
+ Returns:
68
+ Dict with ``level`` (SensitivityLevel), ``reason``, ``pii_found``.
69
+ """
70
+ self._classifications_total += 1
71
+ categories = set(pii_categories or [])
72
+
73
+ if categories & _RESTRICTED_PII:
74
+ return {
75
+ "level": SensitivityLevel.RESTRICTED.value,
76
+ "reason": f"Contains restricted PII: {categories & _RESTRICTED_PII}",
77
+ "pii_found": list(categories),
78
+ }
79
+
80
+ if categories & _CONFIDENTIAL_PII:
81
+ return {
82
+ "level": SensitivityLevel.CONFIDENTIAL.value,
83
+ "reason": f"Contains confidential PII: {categories & _CONFIDENTIAL_PII}",
84
+ "pii_found": list(categories),
85
+ }
86
+
87
+ if categories:
88
+ return {
89
+ "level": SensitivityLevel.CONFIDENTIAL.value,
90
+ "reason": f"Contains PII: {categories}",
91
+ "pii_found": list(categories),
92
+ }
93
+
94
+ return {
95
+ "level": self._default_level.value,
96
+ "reason": "No PII detected",
97
+ "pii_found": [],
98
+ }
99
+
100
+ def get_stats(self) -> dict[str, Any]:
101
+ """Return classification statistics."""
102
+ return {"classifications_total": self._classifications_total}
@@ -0,0 +1,260 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Admina — PII Redaction Engine — Data Sovereignty domain
17
+ Bidirectional masking of sensitive data on requests and responses.
18
+ """
19
+
20
+ import logging
21
+ import os
22
+ import re
23
+
24
+ import spacy
25
+
26
+ logger = logging.getLogger("admina.pii_redactor")
27
+
28
+ # NLP model used for NER-based PII detection.
29
+ # Priority: admina.yaml `pii.ner_model` > ADMINA_SPACY_MODEL env var > default.
30
+ SPACY_MODEL = os.environ.get("ADMINA_SPACY_MODEL", "en_core_web_sm")
31
+
32
+ # Mapping from the short category names used in admina.yaml to PII_CATEGORIES keys.
33
+ # Allows the YAML list ["email", "ssn", ...] to toggle entries in PII_CATEGORIES.
34
+ _YAML_CATEGORY_MAP: dict[str, str] = {
35
+ "email": "EMAIL",
36
+ "phone": "PHONE",
37
+ "credit_card": "CREDIT_CARD",
38
+ "ssn": "SSN",
39
+ "iban": "IBAN",
40
+ "ip": "IP_ADDRESS",
41
+ "person": "PERSON",
42
+ "org": "ORG",
43
+ "gpe": "GPE",
44
+ "loc": "LOC",
45
+ "dob": "DATE_OF_BIRTH",
46
+ "it_codice_fiscale": "IT_CODICE_FISCALE",
47
+ "es_dni": "ES_DNI_NIE",
48
+ "es_nie": "ES_DNI_NIE",
49
+ "de_personalausweis": "DE_PERSONALAUSWEIS",
50
+ }
51
+
52
+ # Categories of PII that can be individually configured
53
+ PII_CATEGORIES = {
54
+ "PERSON": {"enabled": True, "mask": "[PERSON]"},
55
+ "ORG": {"enabled": True, "mask": "[ORG]"},
56
+ "GPE": {"enabled": True, "mask": "[LOCATION]"}, # Geopolitical entities
57
+ "LOC": {"enabled": True, "mask": "[LOCATION]"},
58
+ "EMAIL": {"enabled": True, "mask": "[EMAIL]"},
59
+ "PHONE": {"enabled": True, "mask": "[PHONE]"},
60
+ "CREDIT_CARD": {"enabled": True, "mask": "[CREDIT_CARD]"},
61
+ "SSN": {"enabled": True, "mask": "[SSN]"},
62
+ "IBAN": {"enabled": True, "mask": "[IBAN]"},
63
+ "IP_ADDRESS": {"enabled": True, "mask": "[IP_ADDR]"},
64
+ "DATE_OF_BIRTH": {"enabled": True, "mask": "[DOB]"},
65
+ # EU national identifiers — opt-in but on by default to match the
66
+ # framework's EU-first positioning.
67
+ "IT_CODICE_FISCALE": {"enabled": True, "mask": "[CF]"},
68
+ "ES_DNI_NIE": {"enabled": True, "mask": "[DNI]"},
69
+ "DE_PERSONALAUSWEIS": {"enabled": False, "mask": "[AUSWEIS]"}, # off — ambiguous regex
70
+ }
71
+
72
+ # Regex patterns for PII not covered by spaCy NER
73
+ REGEX_PII_PATTERNS = {
74
+ "EMAIL": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"),
75
+ "PHONE": re.compile(r"(?<!\d)(\+\d{1,3}[\s.-]?)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}(?!\d)"),
76
+ "CREDIT_CARD": re.compile(r"\b(?:\d{4}[-\s]?){3}\d{4}\b"),
77
+ "SSN": re.compile(r"\b\d{3}-\d{2}-\d{4}\b"),
78
+ "IBAN": re.compile(
79
+ r"\b[A-Z]{2}\d{2}\s?[\dA-Z]{4}\s?[\dA-Z]{4}\s?[\dA-Z]{4}(?:\s?[\dA-Z]{4}){0,4}\b"
80
+ ),
81
+ # IPv4 with proper octet validation (each octet 0-255). Avoids matching
82
+ # version strings like 1.2.3.999 or build numbers > 255.
83
+ "IP_ADDRESS": re.compile(
84
+ r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
85
+ r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"
86
+ ),
87
+ # Italian Codice Fiscale (16 chars: 6 letters + 2 digits + 1 letter +
88
+ # 2 digits + 1 letter + 3 alphanumeric + 1 letter)
89
+ "IT_CODICE_FISCALE": re.compile(
90
+ r"\b[A-Z]{6}\d{2}[A-EHLMPRST]\d{2}[A-Z]\d{3}[A-Z]\b",
91
+ re.IGNORECASE,
92
+ ),
93
+ # Spanish DNI/NIE: 8 digits + 1 letter (DNI) or X/Y/Z + 7 digits + letter (NIE)
94
+ "ES_DNI_NIE": re.compile(
95
+ r"\b(?:\d{8}|[XYZ]\d{7})[-\s]?[A-Z]\b",
96
+ re.IGNORECASE,
97
+ ),
98
+ # German Personalausweis: 10 chars (alphanumeric, no I/O)
99
+ # NB: deliberately conservative — exact format varies by issue date.
100
+ "DE_PERSONALAUSWEIS": re.compile(r"\b[CFGHJKLMNPRTVWXYZ\d]{10}\b"),
101
+ }
102
+
103
+ # Context windows used to filter version-string false positives on IPv4.
104
+ # Looks at the ~24 chars preceding/following the candidate match.
105
+ _VERSION_PREFIX_RX = re.compile(
106
+ r"\b(?:version|versione|versión|ver|v|release|build|revision|rev|"
107
+ r"update|patch|firmware|api[\s-]?v)\.?\s*$",
108
+ re.IGNORECASE,
109
+ )
110
+ _VERSION_SUFFIX_RX = re.compile(
111
+ r"^\s*(?:released|build|revision|update|patch|published|"
112
+ r"\(.*?\)|out|stable|rc\d|beta|alpha)",
113
+ re.IGNORECASE,
114
+ )
115
+
116
+
117
+ def _is_real_ipv4(text: str, start: int, end: int) -> bool:
118
+ """Heuristic: skip IP-shaped numbers that are clearly version strings.
119
+
120
+ Returns True if the match at [start:end] looks like a network address,
121
+ False if it appears in a version-number context (e.g. "version 1.2.3.4",
122
+ "released 1.2.3.4", "v1.2.3.4 stable").
123
+ """
124
+ prefix = text[max(0, start - 24) : start]
125
+ if _VERSION_PREFIX_RX.search(prefix):
126
+ return False
127
+ suffix = text[end : end + 24]
128
+ if _VERSION_SUFFIX_RX.match(suffix):
129
+ return False
130
+ return True
131
+
132
+
133
+ # Per-category mask for the new EU IDs (used by PIIRedactor when not overridden)
134
+ _DEFAULT_EU_ID_MASKS = {
135
+ "IT_CODICE_FISCALE": "[CF]",
136
+ "ES_DNI_NIE": "[DNI]",
137
+ "DE_PERSONALAUSWEIS": "[AUSWEIS]",
138
+ }
139
+
140
+
141
+ class PIIRedactor:
142
+ """
143
+ Bidirectional PII redaction engine.
144
+ Uses spaCy NER + regex patterns for comprehensive coverage.
145
+
146
+ Args:
147
+ config: Optional PIIConfig (or any object with `.ner_model` / `.categories`
148
+ attributes) loaded from admina.yaml. When supplied, its values
149
+ take precedence over the module-level defaults and env vars.
150
+ """
151
+
152
+ def __init__(self, config=None):
153
+ # Resolve NLP model: config.ner_model > ADMINA_SPACY_MODEL env var > default
154
+ model_name = getattr(config, "ner_model", None) or SPACY_MODEL
155
+ try:
156
+ self.nlp = spacy.load(model_name)
157
+ logger.info("[OK] spaCy model loaded: %s", model_name)
158
+ except OSError:
159
+ logger.warning("[WARN] spaCy model '%s' not found, using regex-only mode", model_name)
160
+ self.nlp = None
161
+
162
+ # Build active categories: start from PII_CATEGORIES defaults, then
163
+ # disable any category not listed in config.categories (if provided).
164
+ if config is not None and getattr(config, "categories", None) is not None:
165
+ enabled_keys = {_YAML_CATEGORY_MAP.get(c.lower(), c.upper()) for c in config.categories}
166
+ self._active_categories = {
167
+ k: {**v, "enabled": k in enabled_keys} for k, v in PII_CATEGORIES.items()
168
+ }
169
+ else:
170
+ self._active_categories = PII_CATEGORIES
171
+
172
+ self.total_redacted: int = 0
173
+ self.redactions_by_type: dict[str, int] = {}
174
+
175
+ def redact(self, text: str, categories: dict | None = None) -> dict:
176
+ """
177
+ Redact PII from text.
178
+ Returns: {redacted_text: str, entities: [...], count: int}
179
+ """
180
+ if not text:
181
+ return {"redacted_text": text, "entities": [], "count": 0}
182
+
183
+ active_categories = categories or self._active_categories
184
+ entities_found = []
185
+ redacted = text
186
+
187
+ # Step 1 — regex-based detection (higher precision for structured PII)
188
+ for cat_name, pattern in REGEX_PII_PATTERNS.items():
189
+ cat_config = active_categories.get(cat_name, {})
190
+ if not cat_config.get("enabled", True):
191
+ continue
192
+ mask = cat_config.get("mask", f"[{cat_name}]")
193
+
194
+ # Find all matches; for IP_ADDRESS, drop version-string matches
195
+ # (e.g. "version 1.2.3.4 released") to reduce false positives.
196
+ matches = list(pattern.finditer(redacted))
197
+ if cat_name == "IP_ADDRESS":
198
+ matches = [m for m in matches if _is_real_ipv4(redacted, m.start(), m.end())]
199
+
200
+ if not matches:
201
+ continue
202
+
203
+ for match in matches:
204
+ entities_found.append(
205
+ {
206
+ "type": cat_name,
207
+ "start": match.start(),
208
+ "end": match.end(),
209
+ "original_length": match.end() - match.start(),
210
+ "method": "regex",
211
+ }
212
+ )
213
+
214
+ # Replace in reverse order to preserve byte offsets of earlier matches.
215
+ for match in sorted(matches, key=lambda m: m.start(), reverse=True):
216
+ redacted = redacted[: match.start()] + mask + redacted[match.end() :]
217
+
218
+ # Step 2 — spaCy NER-based detection
219
+ if self.nlp:
220
+ doc = self.nlp(redacted)
221
+ # Process entities in reverse order to maintain positions
222
+ ner_entities = sorted(doc.ents, key=lambda e: e.start_char, reverse=True)
223
+ for ent in ner_entities:
224
+ cat_config = active_categories.get(ent.label_, {})
225
+ if not cat_config.get("enabled", False):
226
+ continue
227
+ mask = cat_config.get("mask", f"[{ent.label_}]")
228
+ entities_found.append(
229
+ {
230
+ "type": ent.label_,
231
+ "start": ent.start_char,
232
+ "end": ent.end_char,
233
+ "original_length": ent.end_char - ent.start_char,
234
+ "method": "spacy_ner",
235
+ }
236
+ )
237
+ redacted = redacted[: ent.start_char] + mask + redacted[ent.end_char :]
238
+
239
+ count = len(entities_found)
240
+ if count > 0:
241
+ self.total_redacted += count
242
+ for e in entities_found:
243
+ t = e["type"]
244
+ self.redactions_by_type[t] = self.redactions_by_type.get(t, 0) + 1
245
+ logger.info(
246
+ "[REDACTED] %d PII entities: %s", count, [e["type"] for e in entities_found]
247
+ )
248
+
249
+ return {
250
+ "redacted_text": redacted,
251
+ "entities": entities_found,
252
+ "count": count,
253
+ }
254
+
255
+ def get_stats(self) -> dict:
256
+ return {
257
+ "total_redacted": self.total_redacted,
258
+ "redactions_by_type": self.redactions_by_type,
259
+ "spacy_available": self.nlp is not None,
260
+ }
@@ -0,0 +1,121 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Admina — Data residency enforcement.
16
+
17
+ Ensures data stays within allowed geographic/logical zones.
18
+ Blocks outbound transfers that violate zone policy.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ from dataclasses import dataclass
25
+ from typing import Any
26
+
27
+ logger = logging.getLogger("admina.data_sovereignty.residency")
28
+
29
+ # Default configuration
30
+ _DEFAULT_ALLOWED_ZONES = ["local", "eu"]
31
+ _DEFAULT_BLOCK_OUTBOUND = True
32
+
33
+
34
+ @dataclass
35
+ class ResidencyViolation:
36
+ """A residency policy violation."""
37
+
38
+ source_zone: str
39
+ target_zone: str
40
+ reason: str
41
+ blocked: bool = True
42
+
43
+
44
+ class ResidencyEnforcer:
45
+ """Enforces data residency policies.
46
+
47
+ Validates that data operations stay within allowed zones.
48
+ Used by GovernedData to check every ingest/query operation.
49
+
50
+ Args:
51
+ allowed_zones: List of permitted zone identifiers.
52
+ block_outbound: Whether to block transfers outside allowed zones.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ allowed_zones: list[str] | None = None,
58
+ block_outbound: bool = _DEFAULT_BLOCK_OUTBOUND,
59
+ ) -> None:
60
+ self._allowed_zones = allowed_zones or list(_DEFAULT_ALLOWED_ZONES)
61
+ self._block_outbound = block_outbound
62
+ self._checks_total = 0
63
+ self._violations_total = 0
64
+
65
+ def check(
66
+ self,
67
+ *,
68
+ source_zone: str = "local",
69
+ target_zone: str | None = None,
70
+ data_type: str = "unknown",
71
+ ) -> dict[str, Any]:
72
+ """Check whether a data operation complies with residency policy.
73
+
74
+ Args:
75
+ source_zone: Where the data currently resides.
76
+ target_zone: Where the data would move (None = stays in place).
77
+ data_type: Category of data for logging.
78
+
79
+ Returns:
80
+ Dict with ``allowed`` (bool), ``zone`` info, and optional ``violation``.
81
+ """
82
+ self._checks_total += 1
83
+ effective_target = target_zone or source_zone
84
+
85
+ # Check source zone is allowed
86
+ if source_zone not in self._allowed_zones:
87
+ self._violations_total += 1
88
+ return {
89
+ "allowed": False,
90
+ "source_zone": source_zone,
91
+ "target_zone": effective_target,
92
+ "violation": f"Source zone '{source_zone}' not in allowed zones: {self._allowed_zones}",
93
+ }
94
+
95
+ # Check target zone if different from source
96
+ if target_zone and target_zone != source_zone:
97
+ if target_zone not in self._allowed_zones:
98
+ self._violations_total += 1
99
+ blocked = self._block_outbound
100
+ return {
101
+ "allowed": not blocked,
102
+ "source_zone": source_zone,
103
+ "target_zone": target_zone,
104
+ "violation": f"Transfer to zone '{target_zone}' blocked (allowed: {self._allowed_zones})",
105
+ "blocked": blocked,
106
+ }
107
+
108
+ return {
109
+ "allowed": True,
110
+ "source_zone": source_zone,
111
+ "target_zone": effective_target,
112
+ }
113
+
114
+ def get_stats(self) -> dict[str, Any]:
115
+ """Return enforcement statistics."""
116
+ return {
117
+ "allowed_zones": self._allowed_zones,
118
+ "block_outbound": self._block_outbound,
119
+ "checks_total": self._checks_total,
120
+ "violations_total": self._violations_total,
121
+ }
@@ -0,0 +1,14 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+