sether 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sether/__init__.py ADDED
@@ -0,0 +1,148 @@
1
+ """Sether -- streaming PII redaction for AI applications.
2
+
3
+ Hide personal data from your AI before it reaches any LLM provider. Sether
4
+ detects sensitive data, swaps each match for a stable token before the request
5
+ leaves your boundary, then restores the originals transparently in the response.
6
+
7
+ from sether import Sether
8
+
9
+ sether = Sether()
10
+ safe = sether.redact_sync("my email is alice@example.com")
11
+ # -> "my email is <EMAIL_...>"
12
+ back = sether.restore_sync(safe)
13
+ # -> "my email is alice@example.com"
14
+
15
+ Named for the Hebrew *sether* -- *the hiding place*. Psalm 32:7.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ __version__ = "0.1.0"
21
+
22
+ from .audit import (
23
+ DEFAULT_REGULATION_MAPPINGS,
24
+ AuditEvent,
25
+ AuditSink,
26
+ ConsoleAuditSink,
27
+ MemoryAuditSink,
28
+ RegulationMapping,
29
+ )
30
+ from .core import Sether
31
+ from .detectors import (
32
+ Detector,
33
+ DetectorMatch,
34
+ address_detector,
35
+ anthropic_key_detector,
36
+ aws_access_key_detector,
37
+ basic_detectors,
38
+ credit_card_detector,
39
+ dob_detector,
40
+ email_detector,
41
+ github_pat_detector,
42
+ high_entropy_detector,
43
+ iban_detector,
44
+ identity_detectors,
45
+ ipv4_detector,
46
+ ipv6_detector,
47
+ jwt_detector,
48
+ name_detector,
49
+ openai_key_detector,
50
+ passport_detector,
51
+ phone_detector,
52
+ secrets_detectors,
53
+ slack_token_detector,
54
+ ssn_detector,
55
+ stripe_key_detector,
56
+ )
57
+ from .middleware import (
58
+ SetherASGIMiddleware,
59
+ SetherWSGIMiddleware,
60
+ wrap_anthropic,
61
+ wrap_httpx,
62
+ wrap_openai,
63
+ )
64
+ from .stream import (
65
+ RedactStream,
66
+ RestoreStream,
67
+ SSEStream,
68
+ aredact_iter,
69
+ arestore_iter,
70
+ asse_redact_iter,
71
+ asse_restore_iter,
72
+ create_redact_stream,
73
+ create_restore_stream,
74
+ create_sse_redact_stream,
75
+ create_sse_restore_stream,
76
+ redact_iter,
77
+ redact_sync,
78
+ restore_iter,
79
+ restore_sync,
80
+ sse_redact_iter,
81
+ sse_restore_iter,
82
+ )
83
+ from .vault import MemoryVault, Vault
84
+
85
+ __all__ = [
86
+ "__version__",
87
+ # core
88
+ "Sether",
89
+ "MemoryVault",
90
+ "Vault",
91
+ # one-shot + streaming
92
+ "redact_sync",
93
+ "restore_sync",
94
+ "RedactStream",
95
+ "RestoreStream",
96
+ "create_redact_stream",
97
+ "create_restore_stream",
98
+ "redact_iter",
99
+ "aredact_iter",
100
+ "restore_iter",
101
+ "arestore_iter",
102
+ # detectors
103
+ "Detector",
104
+ "DetectorMatch",
105
+ "basic_detectors",
106
+ "email_detector",
107
+ "credit_card_detector",
108
+ "ssn_detector",
109
+ "ipv4_detector",
110
+ "ipv6_detector",
111
+ "iban_detector",
112
+ "phone_detector",
113
+ "secrets_detectors",
114
+ "aws_access_key_detector",
115
+ "openai_key_detector",
116
+ "anthropic_key_detector",
117
+ "github_pat_detector",
118
+ "slack_token_detector",
119
+ "stripe_key_detector",
120
+ "jwt_detector",
121
+ "high_entropy_detector",
122
+ "identity_detectors",
123
+ "name_detector",
124
+ "dob_detector",
125
+ "passport_detector",
126
+ "address_detector",
127
+ # SSE
128
+ "SSEStream",
129
+ "create_sse_redact_stream",
130
+ "create_sse_restore_stream",
131
+ "sse_redact_iter",
132
+ "sse_restore_iter",
133
+ "asse_redact_iter",
134
+ "asse_restore_iter",
135
+ # audit
136
+ "AuditEvent",
137
+ "AuditSink",
138
+ "RegulationMapping",
139
+ "DEFAULT_REGULATION_MAPPINGS",
140
+ "ConsoleAuditSink",
141
+ "MemoryAuditSink",
142
+ # middleware
143
+ "wrap_openai",
144
+ "wrap_anthropic",
145
+ "wrap_httpx",
146
+ "SetherASGIMiddleware",
147
+ "SetherWSGIMiddleware",
148
+ ]
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from .sinks import ConsoleAuditSink, MemoryAuditSink
4
+ from .types import (
5
+ DEFAULT_REGULATION_MAPPINGS,
6
+ FRAMEWORKS,
7
+ AuditEvent,
8
+ AuditSink,
9
+ RegulationMapping,
10
+ )
11
+
12
+ __all__ = [
13
+ "AuditEvent",
14
+ "AuditSink",
15
+ "RegulationMapping",
16
+ "FRAMEWORKS",
17
+ "DEFAULT_REGULATION_MAPPINGS",
18
+ "ConsoleAuditSink",
19
+ "MemoryAuditSink",
20
+ ]
sether/audit/sinks.py ADDED
@@ -0,0 +1,48 @@
1
+ """Reference audit sinks.
2
+
3
+ Direct port of ``src/audit/console.ts``. Sinks are intentionally simple -- one
4
+ method. Anything more complex (batching, retries, structured forwarding) lives
5
+ in your own adapter.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import sys
12
+ from typing import Callable, List, Optional
13
+
14
+ from .types import AuditEvent
15
+
16
+
17
+ class ConsoleAuditSink:
18
+ """Writes one JSON line per event to stderr by default. A template for
19
+ production sinks (Datadog, Splunk, Logpush, R2, ...)."""
20
+
21
+ def __init__(
22
+ self,
23
+ write: Optional[Callable[[str], object]] = None,
24
+ pretty: bool = False,
25
+ ) -> None:
26
+ self._write: Callable[[str], object] = write or (lambda line: sys.stderr.write(line))
27
+ self._pretty = pretty
28
+
29
+ def write(self, event: AuditEvent) -> None:
30
+ data = event.to_dict()
31
+ text = json.dumps(data, indent=2, ensure_ascii=False) if self._pretty else json.dumps(data, ensure_ascii=False)
32
+ self._write(text + "\n")
33
+
34
+
35
+ class MemoryAuditSink:
36
+ """Accumulates events in memory. Used by tests and the in-browser sandbox."""
37
+
38
+ def __init__(self) -> None:
39
+ self.events: List[AuditEvent] = []
40
+
41
+ def write(self, event: AuditEvent) -> None:
42
+ self.events.append(event)
43
+
44
+ def clear(self) -> None:
45
+ self.events.clear()
46
+
47
+
48
+ __all__ = ["ConsoleAuditSink", "MemoryAuditSink"]
sether/audit/types.py ADDED
@@ -0,0 +1,104 @@
1
+ """Audit event schema.
2
+
3
+ Direct port of ``src/audit/types.ts``. Every redaction can emit one
4
+ :class:`AuditEvent`. The OSS package provides the type contract and reference
5
+ sinks; the hosted tier ingests these events for retention, compliance
6
+ reporting, and SIEM export. The JSON wire shape is the same on both sides
7
+ (camelCase keys), so a deployment can promote from local-only to hosted without
8
+ reshaping events.
9
+
10
+ **The original value is never carried in an event -- only its length.**
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Dict, List, Optional, Protocol, Tuple, runtime_checkable
17
+
18
+ # Allowed compliance frameworks (informational -- not enforced at runtime).
19
+ FRAMEWORKS = (
20
+ "GDPR", "SOC2", "HIPAA", "EU_AI_ACT", "NDPA", "CCPA",
21
+ "PCI_DSS", "ISO_27001", "OWASP_LLM", "OWASP_AGENTIC",
22
+ )
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class RegulationMapping:
27
+ framework: str # one of FRAMEWORKS
28
+ reference: str # e.g. "Art. 28", "CC6.7", "164.312"
29
+
30
+ def to_dict(self) -> Dict[str, str]:
31
+ return {"framework": self.framework, "reference": self.reference}
32
+
33
+
34
+ @dataclass
35
+ class AuditEvent:
36
+ """One redaction (or restoration) event."""
37
+
38
+ timestamp: str # ISO-8601 UTC
39
+ detector: str # e.g. "EMAIL" or "OPENAI_KEY"
40
+ value_length: int # length of the original value (never the value itself)
41
+ token: str # the token that replaced the value
42
+ action: str = "REDACTED" # "REDACTED" | "RESTORED"
43
+ tenant_id: Optional[str] = None
44
+ request_id: Optional[str] = None
45
+ destination: Optional[str] = None
46
+ regulation_mappings: Optional[List[RegulationMapping]] = None
47
+
48
+ def to_dict(self) -> Dict[str, object]:
49
+ """Serialise to the camelCase JSON wire shape, omitting unset optionals."""
50
+ out: Dict[str, object] = {
51
+ "timestamp": self.timestamp,
52
+ "detector": self.detector,
53
+ "valueLength": self.value_length,
54
+ "token": self.token,
55
+ "action": self.action,
56
+ }
57
+ if self.tenant_id is not None:
58
+ out["tenantId"] = self.tenant_id
59
+ if self.request_id is not None:
60
+ out["requestId"] = self.request_id
61
+ if self.destination is not None:
62
+ out["destination"] = self.destination
63
+ if self.regulation_mappings is not None:
64
+ out["regulationMappings"] = [m.to_dict() for m in self.regulation_mappings]
65
+ return out
66
+
67
+
68
+ @runtime_checkable
69
+ class AuditSink(Protocol):
70
+ def write(self, event: AuditEvent) -> None: ...
71
+
72
+
73
+ def _m(*pairs: Tuple[str, str]) -> List[RegulationMapping]:
74
+ return [RegulationMapping(framework, reference) for framework, reference in pairs]
75
+
76
+
77
+ # Default regulation mappings per detector type. Each entry is defensible
78
+ # against the actual regulation text.
79
+ DEFAULT_REGULATION_MAPPINGS: Dict[str, List[RegulationMapping]] = {
80
+ "EMAIL": _m(("GDPR", "Art. 28"), ("SOC2", "CC6.7")),
81
+ "PHONE": _m(("GDPR", "Art. 28"), ("SOC2", "CC6.7")),
82
+ "CC": _m(("PCI_DSS", "Req. 3.4"), ("SOC2", "CC6.7")),
83
+ "SSN": _m(("GDPR", "Art. 9"), ("HIPAA", "164.514")),
84
+ "IPV4": _m(("GDPR", "Recital 30")),
85
+ "IPV6": _m(("GDPR", "Recital 30")),
86
+ "IBAN": _m(("PCI_DSS", "Req. 3.4"), ("GDPR", "Art. 28")),
87
+ "AWS_KEY": _m(("SOC2", "CC6.1"), ("ISO_27001", "A.9.4.3")),
88
+ "OPENAI_KEY": _m(("SOC2", "CC6.1")),
89
+ "ANTHROPIC_KEY": _m(("SOC2", "CC6.1")),
90
+ "GITHUB_PAT": _m(("SOC2", "CC6.1")),
91
+ "SLACK_TOKEN": _m(("SOC2", "CC6.1")),
92
+ "STRIPE_KEY": _m(("PCI_DSS", "Req. 3.5"), ("SOC2", "CC6.1")),
93
+ "JWT": _m(("SOC2", "CC6.1")),
94
+ "HIGH_ENTROPY": _m(("SOC2", "CC6.1")),
95
+ }
96
+
97
+
98
+ __all__ = [
99
+ "FRAMEWORKS",
100
+ "RegulationMapping",
101
+ "AuditEvent",
102
+ "AuditSink",
103
+ "DEFAULT_REGULATION_MAPPINGS",
104
+ ]
sether/core.py ADDED
@@ -0,0 +1,97 @@
1
+ """The :class:`Sether` facade -- a shared vault wiring redact and restore."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import AsyncIterable, AsyncIterator, Iterable, Iterator, Optional, Sequence
6
+
7
+ from .detectors.basic import basic_detectors
8
+ from .detectors.types import Detector
9
+ from .stream.redact import (
10
+ RedactStream,
11
+ aredact_iter,
12
+ redact_iter,
13
+ redact_sync,
14
+ )
15
+ from .stream.restore import (
16
+ RestoreStream,
17
+ arestore_iter,
18
+ restore_iter,
19
+ restore_sync,
20
+ )
21
+ from .vault.memory import MemoryVault
22
+ from .vault.types import Vault
23
+
24
+ _DEFAULT_SAFE_DISTANCE = 256
25
+
26
+
27
+ class Sether:
28
+ """Streaming PII redaction layer.
29
+
30
+ The same instance shares its vault between redaction and restoration, which
31
+ is how the redact->restore round-trip identity is preserved across streaming
32
+ chunks.
33
+
34
+ :param detectors: detectors to run. Defaults to ``basic_detectors``.
35
+ :param vault: token vault. Defaults to a fresh :class:`MemoryVault`.
36
+ :param safe_distance_bytes: bytes held back at each chunk tail so a pattern
37
+ crossing a chunk boundary is still detected (default 256).
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ detectors: Optional[Sequence[Detector]] = None,
43
+ vault: Optional[Vault] = None,
44
+ safe_distance_bytes: int = _DEFAULT_SAFE_DISTANCE,
45
+ ) -> None:
46
+ self._detectors: Sequence[Detector] = tuple(detectors) if detectors is not None else basic_detectors
47
+ self._vault: Vault = vault if vault is not None else MemoryVault()
48
+ self._safe_distance = safe_distance_bytes
49
+
50
+ # --- one-shot ---------------------------------------------------------
51
+
52
+ def redact_sync(self, text: str) -> str:
53
+ return redact_sync(text, self._detectors, self._vault)
54
+
55
+ def restore_sync(self, text: str) -> str:
56
+ return restore_sync(text, self._vault)
57
+
58
+ # --- synchronous streaming -------------------------------------------
59
+
60
+ def redact_stream(self, chunks: Iterable[str]) -> Iterator[str]:
61
+ return redact_iter(chunks, self._detectors, self._vault, self._safe_distance)
62
+
63
+ def restore_stream(self, chunks: Iterable[str]) -> Iterator[str]:
64
+ return restore_iter(chunks, self._vault)
65
+
66
+ # --- asynchronous streaming ------------------------------------------
67
+
68
+ def aredact_stream(self, chunks: AsyncIterable[str]) -> AsyncIterator[str]:
69
+ return aredact_iter(chunks, self._detectors, self._vault, self._safe_distance)
70
+
71
+ def arestore_stream(self, chunks: AsyncIterable[str]) -> AsyncIterator[str]:
72
+ return arestore_iter(chunks, self._vault)
73
+
74
+ # --- low-level stateful transforms -----------------------------------
75
+
76
+ def new_redact_stream(self) -> RedactStream:
77
+ return RedactStream(self._detectors, self._vault, self._safe_distance)
78
+
79
+ def new_restore_stream(self) -> RestoreStream:
80
+ return RestoreStream(self._vault)
81
+
82
+ # --- accessors --------------------------------------------------------
83
+
84
+ @property
85
+ def vault(self) -> Vault:
86
+ return self._vault
87
+
88
+ @property
89
+ def detectors(self) -> Sequence[Detector]:
90
+ return self._detectors
91
+
92
+ @property
93
+ def safe_distance_bytes(self) -> int:
94
+ return self._safe_distance
95
+
96
+
97
+ __all__ = ["Sether"]
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from .basic import (
4
+ basic_detectors,
5
+ credit_card_detector,
6
+ email_detector,
7
+ iban_detector,
8
+ ipv4_detector,
9
+ ipv6_detector,
10
+ is_ipv6_address,
11
+ phone_detector,
12
+ ssn_detector,
13
+ )
14
+ from .identity import (
15
+ address_detector,
16
+ dob_detector,
17
+ identity_detectors,
18
+ name_detector,
19
+ passport_detector,
20
+ )
21
+ from .secrets import (
22
+ anthropic_key_detector,
23
+ aws_access_key_detector,
24
+ github_pat_detector,
25
+ high_entropy_detector,
26
+ jwt_detector,
27
+ openai_key_detector,
28
+ secrets_detectors,
29
+ slack_token_detector,
30
+ stripe_key_detector,
31
+ )
32
+ from .types import Detector, DetectorMatch
33
+
34
+ __all__ = [
35
+ "Detector",
36
+ "DetectorMatch",
37
+ # basic
38
+ "basic_detectors",
39
+ "email_detector",
40
+ "credit_card_detector",
41
+ "ssn_detector",
42
+ "ipv4_detector",
43
+ "ipv6_detector",
44
+ "iban_detector",
45
+ "phone_detector",
46
+ "is_ipv6_address",
47
+ # secrets
48
+ "secrets_detectors",
49
+ "aws_access_key_detector",
50
+ "openai_key_detector",
51
+ "anthropic_key_detector",
52
+ "github_pat_detector",
53
+ "slack_token_detector",
54
+ "stripe_key_detector",
55
+ "jwt_detector",
56
+ "high_entropy_detector",
57
+ # identity
58
+ "identity_detectors",
59
+ "name_detector",
60
+ "dob_detector",
61
+ "passport_detector",
62
+ "address_detector",
63
+ ]