misata 0.1.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
misata/audit.py ADDED
@@ -0,0 +1,415 @@
1
+ """
2
+ Enterprise Audit Logging for Misata.
3
+
4
+ This module provides:
5
+ - Complete audit trail of all data generation operations
6
+ - Session tracking with user context
7
+ - Compliance-ready export formats
8
+ - Data lineage tracking
9
+
10
+ This addresses the critic's concern: "No enterprise features"
11
+ """
12
+
13
+ import json
14
+ import sqlite3
15
+ import uuid
16
+ from contextlib import contextmanager
17
+ from dataclasses import dataclass
18
+ from datetime import datetime
19
+ from typing import Any, Dict, List, Optional
20
+ from pathlib import Path
21
+
22
+
23
+ @dataclass
24
+ class AuditEntry:
25
+ """A single audit log entry."""
26
+ timestamp: str
27
+ session_id: str
28
+ operation: str
29
+ details: Dict[str, Any]
30
+ user_id: Optional[str] = None
31
+ status: str = "success"
32
+ duration_ms: Optional[int] = None
33
+
34
+ def to_dict(self) -> Dict[str, Any]:
35
+ return {
36
+ "timestamp": self.timestamp,
37
+ "session_id": self.session_id,
38
+ "operation": self.operation,
39
+ "user_id": self.user_id,
40
+ "status": self.status,
41
+ "duration_ms": self.duration_ms,
42
+ "details": self.details
43
+ }
44
+
45
+
46
+ class AuditLogger:
47
+ """
48
+ Enterprise audit logging for compliance and debugging.
49
+
50
+ Tracks:
51
+ - Schema generations (LLM calls)
52
+ - Data generations (row counts, tables)
53
+ - User corrections (feedback)
54
+ - Validation results
55
+ - Export operations
56
+ """
57
+
58
+ def __init__(self, db_path: Optional[str] = None):
59
+ """
60
+ Initialize audit logger.
61
+
62
+ Args:
63
+ db_path: Path to SQLite database. Defaults to ~/.misata/audit.db
64
+ """
65
+ if db_path is None:
66
+ home = Path.home()
67
+ misata_dir = home / ".misata"
68
+ misata_dir.mkdir(exist_ok=True)
69
+ db_path = str(misata_dir / "audit.db")
70
+
71
+ self.db_path = db_path
72
+ self._init_db()
73
+ self._current_session: Optional[str] = None
74
+
75
+ def _init_db(self):
76
+ """Initialize database schema."""
77
+ conn = sqlite3.connect(self.db_path)
78
+ cursor = conn.cursor()
79
+
80
+ cursor.execute("""
81
+ CREATE TABLE IF NOT EXISTS audit_log (
82
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
83
+ timestamp TEXT NOT NULL,
84
+ session_id TEXT NOT NULL,
85
+ operation TEXT NOT NULL,
86
+ user_id TEXT,
87
+ status TEXT DEFAULT 'success',
88
+ duration_ms INTEGER,
89
+ details TEXT
90
+ )
91
+ """)
92
+
93
+ cursor.execute("""
94
+ CREATE TABLE IF NOT EXISTS sessions (
95
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
96
+ session_id TEXT UNIQUE NOT NULL,
97
+ start_time TEXT NOT NULL,
98
+ end_time TEXT,
99
+ user_id TEXT,
100
+ story TEXT,
101
+ tables_generated INTEGER DEFAULT 0,
102
+ rows_generated INTEGER DEFAULT 0,
103
+ corrections_count INTEGER DEFAULT 0,
104
+ status TEXT DEFAULT 'active'
105
+ )
106
+ """)
107
+
108
+ cursor.execute("""
109
+ CREATE INDEX IF NOT EXISTS idx_audit_session ON audit_log(session_id)
110
+ """)
111
+ cursor.execute("""
112
+ CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp)
113
+ """)
114
+
115
+ conn.commit()
116
+ conn.close()
117
+
118
+ def start_session(self, user_id: Optional[str] = None) -> str:
119
+ """
120
+ Start a new audit session.
121
+
122
+ Returns:
123
+ Session ID
124
+ """
125
+ session_id = str(uuid.uuid4())
126
+ self._current_session = session_id
127
+
128
+ conn = sqlite3.connect(self.db_path)
129
+ cursor = conn.cursor()
130
+
131
+ cursor.execute("""
132
+ INSERT INTO sessions (session_id, start_time, user_id)
133
+ VALUES (?, ?, ?)
134
+ """, (session_id, datetime.now().isoformat(), user_id))
135
+
136
+ conn.commit()
137
+ conn.close()
138
+
139
+ self.log("session_start", {"user_id": user_id})
140
+
141
+ return session_id
142
+
143
+ def end_session(self, session_id: Optional[str] = None):
144
+ """End an audit session."""
145
+ session_id = session_id or self._current_session
146
+ if not session_id:
147
+ return
148
+
149
+ conn = sqlite3.connect(self.db_path)
150
+ cursor = conn.cursor()
151
+
152
+ cursor.execute("""
153
+ UPDATE sessions SET end_time = ?, status = 'completed'
154
+ WHERE session_id = ?
155
+ """, (datetime.now().isoformat(), session_id))
156
+
157
+ conn.commit()
158
+ conn.close()
159
+
160
+ self.log("session_end", {})
161
+ self._current_session = None
162
+
163
+ def log(
164
+ self,
165
+ operation: str,
166
+ details: Dict[str, Any],
167
+ status: str = "success",
168
+ duration_ms: Optional[int] = None,
169
+ user_id: Optional[str] = None
170
+ ):
171
+ """
172
+ Log an operation.
173
+
174
+ Args:
175
+ operation: Type of operation (e.g., 'schema_generation', 'data_export')
176
+ details: Operation details
177
+ status: 'success', 'error', or 'warning'
178
+ duration_ms: Operation duration in milliseconds
179
+ user_id: Optional user identifier
180
+ """
181
+ session_id = self._current_session or "no_session"
182
+
183
+ conn = sqlite3.connect(self.db_path)
184
+ cursor = conn.cursor()
185
+
186
+ cursor.execute("""
187
+ INSERT INTO audit_log (timestamp, session_id, operation, user_id, status, duration_ms, details)
188
+ VALUES (?, ?, ?, ?, ?, ?, ?)
189
+ """, (
190
+ datetime.now().isoformat(),
191
+ session_id,
192
+ operation,
193
+ user_id,
194
+ status,
195
+ duration_ms,
196
+ json.dumps(details)
197
+ ))
198
+
199
+ conn.commit()
200
+ conn.close()
201
+
202
+ def log_schema_generation(self, story: str, tables_count: int, duration_ms: int):
203
+ """Log a schema generation operation."""
204
+ self.log("schema_generation", {
205
+ "story_length": len(story),
206
+ "story_preview": story[:100] + "..." if len(story) > 100 else story,
207
+ "tables_generated": tables_count
208
+ }, duration_ms=duration_ms)
209
+
210
+ # Update session
211
+ if self._current_session:
212
+ self._update_session(tables=tables_count)
213
+
214
+ def log_data_generation(self, tables: Dict[str, int], total_rows: int, duration_ms: int):
215
+ """Log a data generation operation."""
216
+ self.log("data_generation", {
217
+ "tables": tables,
218
+ "total_rows": total_rows
219
+ }, duration_ms=duration_ms)
220
+
221
+ if self._current_session:
222
+ self._update_session(rows=total_rows)
223
+
224
+ def log_correction(self, table: str, column: str, change: str):
225
+ """Log a user correction."""
226
+ self.log("user_correction", {
227
+ "table": table,
228
+ "column": column,
229
+ "change": change
230
+ })
231
+
232
+ if self._current_session:
233
+ conn = sqlite3.connect(self.db_path)
234
+ cursor = conn.cursor()
235
+ cursor.execute("""
236
+ UPDATE sessions SET corrections_count = corrections_count + 1
237
+ WHERE session_id = ?
238
+ """, (self._current_session,))
239
+ conn.commit()
240
+ conn.close()
241
+
242
+ def log_validation(self, passed: bool, score: float, issues_count: int):
243
+ """Log a validation result."""
244
+ self.log("validation", {
245
+ "passed": passed,
246
+ "score": score,
247
+ "issues_count": issues_count
248
+ }, status="success" if passed else "warning")
249
+
250
+ def log_export(self, format: str, tables: List[str], file_path: str):
251
+ """Log a data export."""
252
+ self.log("data_export", {
253
+ "format": format,
254
+ "tables": tables,
255
+ "file_path": file_path
256
+ })
257
+
258
+ def _update_session(self, tables: int = 0, rows: int = 0):
259
+ """Update session statistics."""
260
+ conn = sqlite3.connect(self.db_path)
261
+ cursor = conn.cursor()
262
+
263
+ if tables:
264
+ cursor.execute("""
265
+ UPDATE sessions SET tables_generated = tables_generated + ?
266
+ WHERE session_id = ?
267
+ """, (tables, self._current_session))
268
+
269
+ if rows:
270
+ cursor.execute("""
271
+ UPDATE sessions SET rows_generated = rows_generated + ?
272
+ WHERE session_id = ?
273
+ """, (rows, self._current_session))
274
+
275
+ conn.commit()
276
+ conn.close()
277
+
278
+ def get_session_logs(self, session_id: str) -> List[AuditEntry]:
279
+ """Get all logs for a session."""
280
+ conn = sqlite3.connect(self.db_path)
281
+ cursor = conn.cursor()
282
+
283
+ cursor.execute("""
284
+ SELECT timestamp, session_id, operation, user_id, status, duration_ms, details
285
+ FROM audit_log
286
+ WHERE session_id = ?
287
+ ORDER BY timestamp
288
+ """, (session_id,))
289
+
290
+ entries = []
291
+ for row in cursor.fetchall():
292
+ entries.append(AuditEntry(
293
+ timestamp=row[0],
294
+ session_id=row[1],
295
+ operation=row[2],
296
+ user_id=row[3],
297
+ status=row[4],
298
+ duration_ms=row[5],
299
+ details=json.loads(row[6]) if row[6] else {}
300
+ ))
301
+
302
+ conn.close()
303
+ return entries
304
+
305
+ def export_compliance_report(
306
+ self,
307
+ start_date: Optional[str] = None,
308
+ end_date: Optional[str] = None,
309
+ format: str = "json"
310
+ ) -> str:
311
+ """
312
+ Export compliance-ready audit report.
313
+
314
+ Args:
315
+ start_date: Filter start (ISO format)
316
+ end_date: Filter end (ISO format)
317
+ format: 'json' or 'csv'
318
+
319
+ Returns:
320
+ Report as string
321
+ """
322
+ conn = sqlite3.connect(self.db_path)
323
+ cursor = conn.cursor()
324
+
325
+ query = "SELECT * FROM audit_log WHERE 1=1"
326
+ params = []
327
+
328
+ if start_date:
329
+ query += " AND timestamp >= ?"
330
+ params.append(start_date)
331
+ if end_date:
332
+ query += " AND timestamp <= ?"
333
+ params.append(end_date)
334
+
335
+ query += " ORDER BY timestamp"
336
+ cursor.execute(query, params)
337
+
338
+ rows = cursor.fetchall()
339
+ columns = [desc[0] for desc in cursor.description]
340
+ conn.close()
341
+
342
+ if format == "json":
343
+ records = [dict(zip(columns, row)) for row in rows]
344
+ return json.dumps({
345
+ "report_type": "Misata Compliance Audit",
346
+ "generated_at": datetime.now().isoformat(),
347
+ "record_count": len(records),
348
+ "records": records
349
+ }, indent=2)
350
+
351
+ else: # csv
352
+ lines = [",".join(columns)]
353
+ for row in rows:
354
+ lines.append(",".join(str(v) if v else "" for v in row))
355
+ return "\n".join(lines)
356
+
357
+ def get_summary(self) -> Dict[str, Any]:
358
+ """Get audit summary statistics."""
359
+ conn = sqlite3.connect(self.db_path)
360
+ cursor = conn.cursor()
361
+
362
+ cursor.execute("SELECT COUNT(*) FROM audit_log")
363
+ total_ops = cursor.fetchone()[0]
364
+
365
+ cursor.execute("SELECT COUNT(*) FROM sessions")
366
+ total_sessions = cursor.fetchone()[0]
367
+
368
+ cursor.execute("""
369
+ SELECT operation, COUNT(*) FROM audit_log
370
+ GROUP BY operation ORDER BY COUNT(*) DESC LIMIT 5
371
+ """)
372
+ top_ops = cursor.fetchall()
373
+
374
+ cursor.execute("SELECT SUM(rows_generated) FROM sessions")
375
+ total_rows = cursor.fetchone()[0] or 0
376
+
377
+ conn.close()
378
+
379
+ return {
380
+ "total_operations": total_ops,
381
+ "total_sessions": total_sessions,
382
+ "total_rows_generated": total_rows,
383
+ "top_operations": dict(top_ops)
384
+ }
385
+
386
+
387
+ @contextmanager
388
+ def audited_session(user_id: Optional[str] = None):
389
+ """
390
+ Context manager for audited operations.
391
+
392
+ Usage:
393
+ with audited_session("user123") as audit:
394
+ audit.log_schema_generation(...)
395
+ audit.log_data_generation(...)
396
+ """
397
+ logger = AuditLogger()
398
+ session_id = logger.start_session(user_id)
399
+
400
+ try:
401
+ yield logger
402
+ finally:
403
+ logger.end_session(session_id)
404
+
405
+
406
+ # Global instance for convenience
407
+ _global_logger: Optional[AuditLogger] = None
408
+
409
+
410
+ def get_audit_logger() -> AuditLogger:
411
+ """Get or create global audit logger."""
412
+ global _global_logger
413
+ if _global_logger is None:
414
+ _global_logger = AuditLogger()
415
+ return _global_logger