agentreplay 0.1.2__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentreplay/privacy.py ADDED
@@ -0,0 +1,452 @@
1
+ # Copyright 2025 Sushanth (https://github.com/sushanthpy)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Privacy utilities for payload redaction and PII scrubbing.
17
+
18
+ Provides configurable redaction to protect sensitive data before sending
19
+ to the Agentreplay backend.
20
+
21
+ Example:
22
+ >>> from agentreplay import init
23
+ >>> from agentreplay.privacy import configure_privacy
24
+ >>>
25
+ >>> init()
26
+ >>>
27
+ >>> configure_privacy(
28
+ ... redact_patterns=[r"\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b"],
29
+ ... scrub_paths=["input.password", "output.api_key"],
30
+ ... hash_pii=True,
31
+ ... )
32
+ """
33
+
34
+ import re
35
+ import hashlib
36
+ import logging
37
+ from typing import Any, Dict, List, Optional, Pattern, Callable, Union
38
+ from dataclasses import dataclass, field
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # =============================================================================
43
+ # Privacy Configuration
44
+ # =============================================================================
45
+
46
+ @dataclass
47
+ class PrivacyConfig:
48
+ """Privacy configuration for payload redaction."""
49
+
50
+ # Enable/disable privacy features
51
+ enabled: bool = True
52
+
53
+ # Patterns to redact (compiled regex)
54
+ redact_patterns: List[Pattern] = field(default_factory=list)
55
+
56
+ # JSON paths to scrub entirely (e.g., "input.password")
57
+ scrub_paths: List[str] = field(default_factory=list)
58
+
59
+ # Whether to hash PII instead of replacing with [REDACTED]
60
+ hash_pii: bool = False
61
+
62
+ # Salt for PII hashing
63
+ hash_salt: str = ""
64
+
65
+ # Custom redaction function
66
+ custom_redactor: Optional[Callable[[str], str]] = None
67
+
68
+ # Replacement text for redacted content
69
+ redacted_text: str = "[REDACTED]"
70
+
71
+
72
+ # Global privacy config
73
+ _privacy_config: PrivacyConfig = PrivacyConfig()
74
+
75
+
76
+ # =============================================================================
77
+ # Built-in Patterns
78
+ # =============================================================================
79
+
80
+ # Common PII patterns
81
+ PATTERNS = {
82
+ "email": re.compile(
83
+ r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
84
+ re.IGNORECASE
85
+ ),
86
+ "credit_card": re.compile(
87
+ r'\b(?:\d{4}[-\s]?){3}\d{4}\b'
88
+ ),
89
+ "ssn": re.compile(
90
+ r'\b\d{3}-\d{2}-\d{4}\b'
91
+ ),
92
+ "phone_us": re.compile(
93
+ r'\b(?:\+1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
94
+ ),
95
+ "phone_intl": re.compile(
96
+ r'\b\+\d{1,3}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}\b'
97
+ ),
98
+ "api_key": re.compile(
99
+ r'\b(?:sk-|pk_|api_|key_|secret_)[A-Za-z0-9_-]{20,}\b',
100
+ re.IGNORECASE
101
+ ),
102
+ "bearer_token": re.compile(
103
+ r'Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+',
104
+ re.IGNORECASE
105
+ ),
106
+ "jwt": re.compile(
107
+ r'\beyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b'
108
+ ),
109
+ "password_field": re.compile(
110
+ r'(?i)(password|passwd|pwd|secret|token|api_key|apikey)["\']?\s*[:=]\s*["\']?[^"\'\s,}]+',
111
+ ),
112
+ "ip_address": re.compile(
113
+ r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
114
+ ),
115
+ }
116
+
117
+
118
+ # =============================================================================
119
+ # Configuration Functions
120
+ # =============================================================================
121
+
122
+ def configure_privacy(
123
+ *,
124
+ enabled: bool = True,
125
+ redact_patterns: Optional[List[Union[str, Pattern]]] = None,
126
+ scrub_paths: Optional[List[str]] = None,
127
+ hash_pii: bool = False,
128
+ hash_salt: str = "",
129
+ custom_redactor: Optional[Callable[[str], str]] = None,
130
+ redacted_text: str = "[REDACTED]",
131
+ use_builtin_patterns: bool = True,
132
+ ) -> None:
133
+ """Configure privacy settings for payload redaction.
134
+
135
+ Args:
136
+ enabled: Enable/disable privacy features
137
+ redact_patterns: Regex patterns (str or compiled) to redact
138
+ scrub_paths: JSON paths to completely remove (e.g., "input.password")
139
+ hash_pii: Hash PII values instead of replacing with [REDACTED]
140
+ hash_salt: Salt for PII hashing
141
+ custom_redactor: Custom function for redaction
142
+ redacted_text: Text to use for redacted content
143
+ use_builtin_patterns: Include built-in patterns for common PII
144
+
145
+ Example:
146
+ >>> configure_privacy(
147
+ ... redact_patterns=[r"secret-\\w+"],
148
+ ... scrub_paths=["input.api_key", "output.credentials"],
149
+ ... hash_pii=True,
150
+ ... )
151
+ """
152
+ global _privacy_config
153
+
154
+ patterns: List[Pattern] = []
155
+
156
+ # Add built-in patterns
157
+ if use_builtin_patterns:
158
+ patterns.extend(PATTERNS.values())
159
+
160
+ # Add custom patterns
161
+ if redact_patterns:
162
+ for pattern in redact_patterns:
163
+ if isinstance(pattern, str):
164
+ patterns.append(re.compile(pattern))
165
+ else:
166
+ patterns.append(pattern)
167
+
168
+ _privacy_config = PrivacyConfig(
169
+ enabled=enabled,
170
+ redact_patterns=patterns,
171
+ scrub_paths=scrub_paths or [],
172
+ hash_pii=hash_pii,
173
+ hash_salt=hash_salt,
174
+ custom_redactor=custom_redactor,
175
+ redacted_text=redacted_text,
176
+ )
177
+
178
+
179
+ def get_privacy_config() -> PrivacyConfig:
180
+ """Get current privacy configuration."""
181
+ return _privacy_config
182
+
183
+
184
+ def reset_privacy() -> None:
185
+ """Reset privacy configuration to defaults."""
186
+ global _privacy_config
187
+ _privacy_config = PrivacyConfig()
188
+
189
+
190
+ # =============================================================================
191
+ # Redaction Functions
192
+ # =============================================================================
193
+
194
+ def redact_payload(payload: Any) -> Any:
195
+ """Redact sensitive data from a payload.
196
+
197
+ Args:
198
+ payload: Any JSON-serializable data
199
+
200
+ Returns:
201
+ Redacted payload
202
+
203
+ Example:
204
+ >>> data = {"email": "user@example.com", "password": "secret123"}
205
+ >>> redacted = redact_payload(data)
206
+ >>> print(redacted)
207
+ {'email': '[REDACTED]', 'password': '[REDACTED]'}
208
+ """
209
+ if not _privacy_config.enabled:
210
+ return payload
211
+
212
+ return _redact_value(payload, path="")
213
+
214
+
215
+ def _redact_value(value: Any, path: str = "") -> Any:
216
+ """Recursively redact values in a data structure."""
217
+ # Check if path should be scrubbed entirely
218
+ if path and _should_scrub_path(path):
219
+ return _privacy_config.redacted_text
220
+
221
+ if isinstance(value, dict):
222
+ return {
223
+ k: _redact_value(v, f"{path}.{k}" if path else k)
224
+ for k, v in value.items()
225
+ }
226
+ elif isinstance(value, list):
227
+ return [
228
+ _redact_value(item, f"{path}[{i}]")
229
+ for i, item in enumerate(value)
230
+ ]
231
+ elif isinstance(value, str):
232
+ return _redact_string(value)
233
+ else:
234
+ return value
235
+
236
+
237
+ def _should_scrub_path(path: str) -> bool:
238
+ """Check if a path should be completely scrubbed."""
239
+ path_lower = path.lower()
240
+ for scrub_path in _privacy_config.scrub_paths:
241
+ if path_lower == scrub_path.lower() or path_lower.endswith(f".{scrub_path.lower()}"):
242
+ return True
243
+ return False
244
+
245
+
246
+ def _redact_string(value: str) -> str:
247
+ """Redact patterns from a string value."""
248
+ if not value:
249
+ return value
250
+
251
+ result = value
252
+
253
+ # Apply custom redactor first
254
+ if _privacy_config.custom_redactor:
255
+ result = _privacy_config.custom_redactor(result)
256
+
257
+ # Apply pattern-based redaction
258
+ for pattern in _privacy_config.redact_patterns:
259
+ if _privacy_config.hash_pii:
260
+ result = pattern.sub(
261
+ lambda m: _hash_value(m.group(0)),
262
+ result
263
+ )
264
+ else:
265
+ result = pattern.sub(_privacy_config.redacted_text, result)
266
+
267
+ return result
268
+
269
+
270
+ def _hash_value(value: str) -> str:
271
+ """Hash a PII value for consistent but anonymized tracking."""
272
+ salted = f"{_privacy_config.hash_salt}{value}"
273
+ hash_bytes = hashlib.sha256(salted.encode()).digest()
274
+ # Return first 8 chars of hex hash with prefix
275
+ return f"[HASH:{hash_bytes[:4].hex()}]"
276
+
277
+
278
+ # =============================================================================
279
+ # Convenience Functions
280
+ # =============================================================================
281
+
282
+ def redact_string(value: str) -> str:
283
+ """Redact patterns from a single string.
284
+
285
+ Args:
286
+ value: String to redact
287
+
288
+ Returns:
289
+ Redacted string
290
+ """
291
+ if not _privacy_config.enabled:
292
+ return value
293
+ return _redact_string(value)
294
+
295
+
296
+ def hash_pii(value: str, salt: Optional[str] = None) -> str:
297
+ """Hash a PII value for anonymous tracking.
298
+
299
+ Creates a consistent hash so you can track unique values
300
+ without storing the actual PII.
301
+
302
+ Args:
303
+ value: The PII value to hash
304
+ salt: Optional salt (defaults to configured salt)
305
+
306
+ Returns:
307
+ Hashed value like "[HASH:a1b2c3d4]"
308
+ """
309
+ salt = salt or _privacy_config.hash_salt
310
+ salted = f"{salt}{value}"
311
+ hash_bytes = hashlib.sha256(salted.encode()).digest()
312
+ return f"[HASH:{hash_bytes[:4].hex()}]"
313
+
314
+
315
+ def add_pattern(pattern: Union[str, Pattern], name: Optional[str] = None) -> None:
316
+ """Add a redaction pattern at runtime.
317
+
318
+ Args:
319
+ pattern: Regex pattern (string or compiled)
320
+ name: Optional name for the pattern
321
+
322
+ Example:
323
+ >>> add_pattern(r"secret-\\w+", name="custom_secret")
324
+ """
325
+ if isinstance(pattern, str):
326
+ pattern = re.compile(pattern)
327
+
328
+ _privacy_config.redact_patterns.append(pattern)
329
+
330
+ if name:
331
+ logger.debug(f"Added privacy pattern: {name}")
332
+
333
+
334
+ def add_scrub_path(path: str) -> None:
335
+ """Add a path to scrub at runtime.
336
+
337
+ Args:
338
+ path: JSON path to scrub (e.g., "input.credentials")
339
+ """
340
+ _privacy_config.scrub_paths.append(path)
341
+ logger.debug(f"Added scrub path: {path}")
342
+
343
+
344
+ # =============================================================================
345
+ # Mask Functions (for display)
346
+ # =============================================================================
347
+
348
+ def mask_email(email: str) -> str:
349
+ """Mask an email address for display.
350
+
351
+ Args:
352
+ email: Email address
353
+
354
+ Returns:
355
+ Masked email like "u***@example.com"
356
+ """
357
+ if "@" not in email:
358
+ return email
359
+
360
+ local, domain = email.rsplit("@", 1)
361
+ if len(local) <= 1:
362
+ masked_local = "*"
363
+ else:
364
+ masked_local = f"{local[0]}{'*' * (len(local) - 1)}"
365
+
366
+ return f"{masked_local}@{domain}"
367
+
368
+
369
+ def mask_phone(phone: str) -> str:
370
+ """Mask a phone number for display.
371
+
372
+ Args:
373
+ phone: Phone number
374
+
375
+ Returns:
376
+ Masked phone like "***-***-1234"
377
+ """
378
+ # Keep only digits
379
+ digits = re.sub(r'\D', '', phone)
380
+ if len(digits) < 4:
381
+ return "*" * len(phone)
382
+
383
+ return f"***-***-{digits[-4:]}"
384
+
385
+
386
+ def mask_credit_card(cc: str) -> str:
387
+ """Mask a credit card for display.
388
+
389
+ Args:
390
+ cc: Credit card number
391
+
392
+ Returns:
393
+ Masked card like "****-****-****-1234"
394
+ """
395
+ digits = re.sub(r'\D', '', cc)
396
+ if len(digits) < 4:
397
+ return "*" * len(cc)
398
+
399
+ return f"****-****-****-{digits[-4:]}"
400
+
401
+
402
+ # =============================================================================
403
+ # Context Manager for Temporary Privacy Settings
404
+ # =============================================================================
405
+
406
+ class privacy_context:
407
+ """Context manager for temporary privacy settings.
408
+
409
+ Example:
410
+ >>> with privacy_context(redact_patterns=[r"secret-\\w+"]):
411
+ ... # Additional patterns active only in this block
412
+ ... result = redact_payload(data)
413
+ """
414
+
415
+ def __init__(
416
+ self,
417
+ *,
418
+ redact_patterns: Optional[List[Union[str, Pattern]]] = None,
419
+ scrub_paths: Optional[List[str]] = None,
420
+ ):
421
+ self.extra_patterns = redact_patterns or []
422
+ self.extra_paths = scrub_paths or []
423
+ self._original_patterns: List[Pattern] = []
424
+ self._original_paths: List[str] = []
425
+
426
+ def __enter__(self):
427
+ global _privacy_config
428
+
429
+ # Save originals
430
+ self._original_patterns = _privacy_config.redact_patterns.copy()
431
+ self._original_paths = _privacy_config.scrub_paths.copy()
432
+
433
+ # Add extra patterns
434
+ for pattern in self.extra_patterns:
435
+ if isinstance(pattern, str):
436
+ _privacy_config.redact_patterns.append(re.compile(pattern))
437
+ else:
438
+ _privacy_config.redact_patterns.append(pattern)
439
+
440
+ # Add extra paths
441
+ _privacy_config.scrub_paths.extend(self.extra_paths)
442
+
443
+ return self
444
+
445
+ def __exit__(self, exc_type, exc_val, exc_tb):
446
+ global _privacy_config
447
+
448
+ # Restore originals
449
+ _privacy_config.redact_patterns = self._original_patterns
450
+ _privacy_config.scrub_paths = self._original_paths
451
+
452
+ return False