repr-cli 0.1.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repr/privacy.py ADDED
@@ -0,0 +1,333 @@
1
+ """
2
+ Privacy controls and audit logging.
3
+
4
+ Tracks what data has been sent to cloud services and provides
5
+ privacy guarantee explanations.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from .config import (
15
+ AUDIT_DIR,
16
+ CONFIG_DIR,
17
+ get_privacy_settings,
18
+ get_llm_config,
19
+ is_authenticated,
20
+ is_cloud_allowed,
21
+ load_config,
22
+ )
23
+
24
+
25
+ # Audit log file
26
+ AUDIT_LOG_FILE = AUDIT_DIR / "cloud_operations.json"
27
+
28
+
29
+ def ensure_audit_dir() -> None:
30
+ """Ensure audit directory exists."""
31
+ AUDIT_DIR.mkdir(parents=True, exist_ok=True)
32
+
33
+
34
+ def log_cloud_operation(
35
+ operation: str,
36
+ destination: str,
37
+ payload_summary: dict[str, Any],
38
+ bytes_sent: int = 0,
39
+ ) -> None:
40
+ """
41
+ Log a cloud operation for audit purposes.
42
+
43
+ Args:
44
+ operation: Type of operation (e.g., "cloud_generation", "push", "byok_generation")
45
+ destination: Where data was sent (e.g., "repr.dev", "api.openai.com")
46
+ payload_summary: Summary of what was sent (no actual content)
47
+ bytes_sent: Approximate bytes sent
48
+ """
49
+ ensure_audit_dir()
50
+
51
+ # Load existing log
52
+ log = load_audit_log()
53
+
54
+ # Add new entry
55
+ log.append({
56
+ "timestamp": datetime.now().isoformat(),
57
+ "operation": operation,
58
+ "destination": destination,
59
+ "payload_summary": payload_summary,
60
+ "bytes_sent": bytes_sent,
61
+ })
62
+
63
+ # Keep only last 1000 entries
64
+ if len(log) > 1000:
65
+ log = log[-1000:]
66
+
67
+ # Save
68
+ AUDIT_LOG_FILE.write_text(json.dumps(log, indent=2))
69
+
70
+
71
+ def load_audit_log() -> list[dict[str, Any]]:
72
+ """Load the audit log."""
73
+ if not AUDIT_LOG_FILE.exists():
74
+ return []
75
+
76
+ try:
77
+ return json.loads(AUDIT_LOG_FILE.read_text())
78
+ except (json.JSONDecodeError, IOError):
79
+ return []
80
+
81
+
82
+ def get_audit_summary(days: int = 30) -> dict[str, Any]:
83
+ """
84
+ Get summary of cloud operations for the last N days.
85
+
86
+ Args:
87
+ days: Number of days to include
88
+
89
+ Returns:
90
+ Summary dict with operation counts and details
91
+ """
92
+ log = load_audit_log()
93
+ cutoff = datetime.now() - timedelta(days=days)
94
+
95
+ # Filter to recent entries
96
+ recent = []
97
+ for entry in log:
98
+ try:
99
+ ts = datetime.fromisoformat(entry["timestamp"])
100
+ if ts >= cutoff:
101
+ recent.append(entry)
102
+ except (ValueError, KeyError):
103
+ continue
104
+
105
+ # Group by operation type
106
+ by_operation: dict[str, list] = {}
107
+ for entry in recent:
108
+ op = entry.get("operation", "unknown")
109
+ if op not in by_operation:
110
+ by_operation[op] = []
111
+ by_operation[op].append(entry)
112
+
113
+ # Calculate totals
114
+ total_bytes = sum(e.get("bytes_sent", 0) for e in recent)
115
+
116
+ return {
117
+ "period_days": days,
118
+ "total_operations": len(recent),
119
+ "total_bytes_sent": total_bytes,
120
+ "by_operation": {
121
+ op: {
122
+ "count": len(entries),
123
+ "bytes_sent": sum(e.get("bytes_sent", 0) for e in entries),
124
+ "recent": entries[-3:], # Last 3 entries
125
+ }
126
+ for op, entries in by_operation.items()
127
+ },
128
+ "destinations": list(set(e.get("destination", "") for e in recent)),
129
+ }
130
+
131
+
132
+ def get_privacy_explanation() -> dict[str, Any]:
133
+ """
134
+ Get comprehensive privacy explanation for current state.
135
+
136
+ Returns:
137
+ Dict with architecture guarantees, current settings, and policies
138
+ """
139
+ privacy_settings = get_privacy_settings()
140
+ llm_config = get_llm_config()
141
+ authenticated = is_authenticated()
142
+ cloud_allowed = is_cloud_allowed()
143
+
144
+ return {
145
+ "architecture": {
146
+ "guarantee": "Without login, nothing leaves your machine (enforced in code)",
147
+ "no_background_daemons": True,
148
+ "no_silent_uploads": True,
149
+ "all_network_foreground": True,
150
+ "no_telemetry_default": True,
151
+ },
152
+ "current_state": {
153
+ "authenticated": authenticated,
154
+ "mode": "cloud-enabled" if (authenticated and cloud_allowed) else "local only",
155
+ "privacy_lock": privacy_settings.get("lock_local_only", False),
156
+ "privacy_lock_permanent": privacy_settings.get("lock_permanent", False),
157
+ "telemetry_enabled": privacy_settings.get("telemetry_enabled", False),
158
+ },
159
+ "local_mode_network_policy": {
160
+ "allowed": ["127.0.0.1", "localhost", "::1", "Unix domain sockets"],
161
+ "blocked": ["All external network", "DNS to public", "HTTP(S) to internet"],
162
+ },
163
+ "cloud_mode_settings": {
164
+ "path_redaction_enabled": llm_config.get("cloud_redact_paths", True),
165
+ "diffs_disabled": not llm_config.get("cloud_send_diffs", False),
166
+ "email_redaction_enabled": llm_config.get("cloud_redact_emails", False),
167
+ "custom_redact_patterns": llm_config.get("cloud_redact_patterns", []),
168
+ "repo_allowlist": llm_config.get("cloud_allowlist_repos", []),
169
+ },
170
+ "data_retention": {
171
+ "story_deletion": "Immediate removal from repr.dev",
172
+ "account_deletion": "All data deleted within 30 days",
173
+ "backups": "Encrypted snapshots retained 90 days",
174
+ "local_data": "Never touched by cloud operations",
175
+ },
176
+ "ownership": {
177
+ "content_ownership": "Your content belongs to you",
178
+ "no_ownership_claim": "repr claims no ownership over generated stories",
179
+ },
180
+ }
181
+
182
+
183
+ def get_data_sent_history(limit: int = 20) -> list[dict[str, Any]]:
184
+ """
185
+ Get history of data sent to cloud.
186
+
187
+ Args:
188
+ limit: Maximum entries to return
189
+
190
+ Returns:
191
+ List of audit entries with human-readable details
192
+ """
193
+ log = load_audit_log()
194
+
195
+ # Format entries
196
+ formatted = []
197
+ for entry in reversed(log[-limit:]):
198
+ ts = entry.get("timestamp", "")
199
+ try:
200
+ dt = datetime.fromisoformat(ts)
201
+ relative = _relative_time(dt)
202
+ except ValueError:
203
+ relative = ts
204
+
205
+ formatted.append({
206
+ "timestamp": ts,
207
+ "relative_time": relative,
208
+ "operation": _format_operation(entry.get("operation", "")),
209
+ "destination": entry.get("destination", ""),
210
+ "summary": entry.get("payload_summary", {}),
211
+ "bytes_sent": entry.get("bytes_sent", 0),
212
+ })
213
+
214
+ return formatted
215
+
216
+
217
+ def _format_operation(op: str) -> str:
218
+ """Format operation name for display."""
219
+ return {
220
+ "cloud_generation": "Cloud LLM Generation",
221
+ "byok_generation": "BYOK Generation",
222
+ "push": "Story Push",
223
+ "sync": "Story Sync",
224
+ "profile_update": "Profile Update",
225
+ }.get(op, op.replace("_", " ").title())
226
+
227
+
228
+ def _relative_time(dt: datetime) -> str:
229
+ """Format datetime as relative time."""
230
+ now = datetime.now()
231
+ delta = now - dt
232
+
233
+ if delta.days > 365:
234
+ return f"{delta.days // 365}y ago"
235
+ elif delta.days > 30:
236
+ return f"{delta.days // 30}mo ago"
237
+ elif delta.days > 0:
238
+ return f"{delta.days}d ago"
239
+ elif delta.seconds > 3600:
240
+ return f"{delta.seconds // 3600}h ago"
241
+ elif delta.seconds > 60:
242
+ return f"{delta.seconds // 60}m ago"
243
+ else:
244
+ return "just now"
245
+
246
+
247
+ def check_cloud_permission(operation: str) -> tuple[bool, str | None]:
248
+ """
249
+ Check if a cloud operation is permitted.
250
+
251
+ Args:
252
+ operation: Operation type (e.g., "cloud_generation", "push")
253
+
254
+ Returns:
255
+ Tuple of (allowed, reason_if_blocked)
256
+ """
257
+ from .config import is_ci_mode, get_forced_mode
258
+
259
+ # CI mode blocks cloud
260
+ if is_ci_mode():
261
+ return False, "CI mode enabled (REPR_CI=true)"
262
+
263
+ # Forced local mode
264
+ if get_forced_mode() == "local":
265
+ return False, "Local mode forced (REPR_MODE=local)"
266
+
267
+ # Privacy lock
268
+ privacy = get_privacy_settings()
269
+ if privacy.get("lock_local_only"):
270
+ if privacy.get("lock_permanent"):
271
+ return False, "Cloud features permanently locked"
272
+ return False, "Local-only mode enabled (use `repr privacy unlock-local` to disable)"
273
+
274
+ # Authentication required
275
+ if not is_authenticated():
276
+ return False, "Not authenticated (run `repr login`)"
277
+
278
+ return True, None
279
+
280
+
281
+ def get_local_data_info() -> dict[str, Any]:
282
+ """
283
+ Get information about locally stored data.
284
+
285
+ Returns:
286
+ Dict with local data statistics
287
+ """
288
+ from .storage import STORIES_DIR, get_story_count
289
+
290
+ def dir_size(path: Path) -> int:
291
+ total = 0
292
+ if path.exists():
293
+ for f in path.rglob("*"):
294
+ if f.is_file():
295
+ total += f.stat().st_size
296
+ return total
297
+
298
+ stories_size = dir_size(STORIES_DIR) if STORIES_DIR.exists() else 0
299
+ audit_size = dir_size(AUDIT_DIR) if AUDIT_DIR.exists() else 0
300
+
301
+ return {
302
+ "stories": {
303
+ "count": get_story_count(),
304
+ "size_bytes": stories_size,
305
+ "path": str(STORIES_DIR),
306
+ },
307
+ "audit_log": {
308
+ "entries": len(load_audit_log()),
309
+ "size_bytes": audit_size,
310
+ "path": str(AUDIT_DIR),
311
+ },
312
+ "config": {
313
+ "path": str(CONFIG_DIR / "config.json"),
314
+ "size_bytes": (CONFIG_DIR / "config.json").stat().st_size if (CONFIG_DIR / "config.json").exists() else 0,
315
+ },
316
+ }
317
+
318
+
319
+ def clear_audit_log() -> int:
320
+ """
321
+ Clear the audit log.
322
+
323
+ Returns:
324
+ Number of entries cleared
325
+ """
326
+ log = load_audit_log()
327
+ count = len(log)
328
+
329
+ if AUDIT_LOG_FILE.exists():
330
+ AUDIT_LOG_FILE.unlink()
331
+
332
+ return count
333
+