misata 0.1.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- misata/__init__.py +48 -0
- misata/api.py +460 -0
- misata/audit.py +415 -0
- misata/benchmark.py +376 -0
- misata/cli.py +680 -0
- misata/codegen.py +153 -0
- misata/curve_fitting.py +106 -0
- misata/customization.py +256 -0
- misata/feedback.py +433 -0
- misata/formulas.py +362 -0
- misata/generators.py +247 -0
- misata/hybrid.py +398 -0
- misata/llm_parser.py +493 -0
- misata/noise.py +346 -0
- misata/schema.py +252 -0
- misata/semantic.py +185 -0
- misata/simulator.py +742 -0
- misata/story_parser.py +425 -0
- misata/templates/__init__.py +444 -0
- misata/validation.py +313 -0
- misata-0.1.0b0.dist-info/METADATA +291 -0
- misata-0.1.0b0.dist-info/RECORD +25 -0
- misata-0.1.0b0.dist-info/WHEEL +5 -0
- misata-0.1.0b0.dist-info/entry_points.txt +2 -0
- misata-0.1.0b0.dist-info/top_level.txt +1 -0
misata/audit.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enterprise Audit Logging for Misata.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Complete audit trail of all data generation operations
|
|
6
|
+
- Session tracking with user context
|
|
7
|
+
- Compliance-ready export formats
|
|
8
|
+
- Data lineage tracking
|
|
9
|
+
|
|
10
|
+
This addresses the critic's concern: "No enterprise features"
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import sqlite3
|
|
15
|
+
import uuid
|
|
16
|
+
from contextlib import contextmanager
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class AuditEntry:
|
|
25
|
+
"""A single audit log entry."""
|
|
26
|
+
timestamp: str
|
|
27
|
+
session_id: str
|
|
28
|
+
operation: str
|
|
29
|
+
details: Dict[str, Any]
|
|
30
|
+
user_id: Optional[str] = None
|
|
31
|
+
status: str = "success"
|
|
32
|
+
duration_ms: Optional[int] = None
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"timestamp": self.timestamp,
|
|
37
|
+
"session_id": self.session_id,
|
|
38
|
+
"operation": self.operation,
|
|
39
|
+
"user_id": self.user_id,
|
|
40
|
+
"status": self.status,
|
|
41
|
+
"duration_ms": self.duration_ms,
|
|
42
|
+
"details": self.details
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class AuditLogger:
|
|
47
|
+
"""
|
|
48
|
+
Enterprise audit logging for compliance and debugging.
|
|
49
|
+
|
|
50
|
+
Tracks:
|
|
51
|
+
- Schema generations (LLM calls)
|
|
52
|
+
- Data generations (row counts, tables)
|
|
53
|
+
- User corrections (feedback)
|
|
54
|
+
- Validation results
|
|
55
|
+
- Export operations
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
59
|
+
"""
|
|
60
|
+
Initialize audit logger.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
db_path: Path to SQLite database. Defaults to ~/.misata/audit.db
|
|
64
|
+
"""
|
|
65
|
+
if db_path is None:
|
|
66
|
+
home = Path.home()
|
|
67
|
+
misata_dir = home / ".misata"
|
|
68
|
+
misata_dir.mkdir(exist_ok=True)
|
|
69
|
+
db_path = str(misata_dir / "audit.db")
|
|
70
|
+
|
|
71
|
+
self.db_path = db_path
|
|
72
|
+
self._init_db()
|
|
73
|
+
self._current_session: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
def _init_db(self):
|
|
76
|
+
"""Initialize database schema."""
|
|
77
|
+
conn = sqlite3.connect(self.db_path)
|
|
78
|
+
cursor = conn.cursor()
|
|
79
|
+
|
|
80
|
+
cursor.execute("""
|
|
81
|
+
CREATE TABLE IF NOT EXISTS audit_log (
|
|
82
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
83
|
+
timestamp TEXT NOT NULL,
|
|
84
|
+
session_id TEXT NOT NULL,
|
|
85
|
+
operation TEXT NOT NULL,
|
|
86
|
+
user_id TEXT,
|
|
87
|
+
status TEXT DEFAULT 'success',
|
|
88
|
+
duration_ms INTEGER,
|
|
89
|
+
details TEXT
|
|
90
|
+
)
|
|
91
|
+
""")
|
|
92
|
+
|
|
93
|
+
cursor.execute("""
|
|
94
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
95
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
96
|
+
session_id TEXT UNIQUE NOT NULL,
|
|
97
|
+
start_time TEXT NOT NULL,
|
|
98
|
+
end_time TEXT,
|
|
99
|
+
user_id TEXT,
|
|
100
|
+
story TEXT,
|
|
101
|
+
tables_generated INTEGER DEFAULT 0,
|
|
102
|
+
rows_generated INTEGER DEFAULT 0,
|
|
103
|
+
corrections_count INTEGER DEFAULT 0,
|
|
104
|
+
status TEXT DEFAULT 'active'
|
|
105
|
+
)
|
|
106
|
+
""")
|
|
107
|
+
|
|
108
|
+
cursor.execute("""
|
|
109
|
+
CREATE INDEX IF NOT EXISTS idx_audit_session ON audit_log(session_id)
|
|
110
|
+
""")
|
|
111
|
+
cursor.execute("""
|
|
112
|
+
CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp)
|
|
113
|
+
""")
|
|
114
|
+
|
|
115
|
+
conn.commit()
|
|
116
|
+
conn.close()
|
|
117
|
+
|
|
118
|
+
def start_session(self, user_id: Optional[str] = None) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Start a new audit session.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Session ID
|
|
124
|
+
"""
|
|
125
|
+
session_id = str(uuid.uuid4())
|
|
126
|
+
self._current_session = session_id
|
|
127
|
+
|
|
128
|
+
conn = sqlite3.connect(self.db_path)
|
|
129
|
+
cursor = conn.cursor()
|
|
130
|
+
|
|
131
|
+
cursor.execute("""
|
|
132
|
+
INSERT INTO sessions (session_id, start_time, user_id)
|
|
133
|
+
VALUES (?, ?, ?)
|
|
134
|
+
""", (session_id, datetime.now().isoformat(), user_id))
|
|
135
|
+
|
|
136
|
+
conn.commit()
|
|
137
|
+
conn.close()
|
|
138
|
+
|
|
139
|
+
self.log("session_start", {"user_id": user_id})
|
|
140
|
+
|
|
141
|
+
return session_id
|
|
142
|
+
|
|
143
|
+
def end_session(self, session_id: Optional[str] = None):
|
|
144
|
+
"""End an audit session."""
|
|
145
|
+
session_id = session_id or self._current_session
|
|
146
|
+
if not session_id:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
conn = sqlite3.connect(self.db_path)
|
|
150
|
+
cursor = conn.cursor()
|
|
151
|
+
|
|
152
|
+
cursor.execute("""
|
|
153
|
+
UPDATE sessions SET end_time = ?, status = 'completed'
|
|
154
|
+
WHERE session_id = ?
|
|
155
|
+
""", (datetime.now().isoformat(), session_id))
|
|
156
|
+
|
|
157
|
+
conn.commit()
|
|
158
|
+
conn.close()
|
|
159
|
+
|
|
160
|
+
self.log("session_end", {})
|
|
161
|
+
self._current_session = None
|
|
162
|
+
|
|
163
|
+
def log(
|
|
164
|
+
self,
|
|
165
|
+
operation: str,
|
|
166
|
+
details: Dict[str, Any],
|
|
167
|
+
status: str = "success",
|
|
168
|
+
duration_ms: Optional[int] = None,
|
|
169
|
+
user_id: Optional[str] = None
|
|
170
|
+
):
|
|
171
|
+
"""
|
|
172
|
+
Log an operation.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
operation: Type of operation (e.g., 'schema_generation', 'data_export')
|
|
176
|
+
details: Operation details
|
|
177
|
+
status: 'success', 'error', or 'warning'
|
|
178
|
+
duration_ms: Operation duration in milliseconds
|
|
179
|
+
user_id: Optional user identifier
|
|
180
|
+
"""
|
|
181
|
+
session_id = self._current_session or "no_session"
|
|
182
|
+
|
|
183
|
+
conn = sqlite3.connect(self.db_path)
|
|
184
|
+
cursor = conn.cursor()
|
|
185
|
+
|
|
186
|
+
cursor.execute("""
|
|
187
|
+
INSERT INTO audit_log (timestamp, session_id, operation, user_id, status, duration_ms, details)
|
|
188
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
189
|
+
""", (
|
|
190
|
+
datetime.now().isoformat(),
|
|
191
|
+
session_id,
|
|
192
|
+
operation,
|
|
193
|
+
user_id,
|
|
194
|
+
status,
|
|
195
|
+
duration_ms,
|
|
196
|
+
json.dumps(details)
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
conn.commit()
|
|
200
|
+
conn.close()
|
|
201
|
+
|
|
202
|
+
def log_schema_generation(self, story: str, tables_count: int, duration_ms: int):
|
|
203
|
+
"""Log a schema generation operation."""
|
|
204
|
+
self.log("schema_generation", {
|
|
205
|
+
"story_length": len(story),
|
|
206
|
+
"story_preview": story[:100] + "..." if len(story) > 100 else story,
|
|
207
|
+
"tables_generated": tables_count
|
|
208
|
+
}, duration_ms=duration_ms)
|
|
209
|
+
|
|
210
|
+
# Update session
|
|
211
|
+
if self._current_session:
|
|
212
|
+
self._update_session(tables=tables_count)
|
|
213
|
+
|
|
214
|
+
def log_data_generation(self, tables: Dict[str, int], total_rows: int, duration_ms: int):
|
|
215
|
+
"""Log a data generation operation."""
|
|
216
|
+
self.log("data_generation", {
|
|
217
|
+
"tables": tables,
|
|
218
|
+
"total_rows": total_rows
|
|
219
|
+
}, duration_ms=duration_ms)
|
|
220
|
+
|
|
221
|
+
if self._current_session:
|
|
222
|
+
self._update_session(rows=total_rows)
|
|
223
|
+
|
|
224
|
+
def log_correction(self, table: str, column: str, change: str):
|
|
225
|
+
"""Log a user correction."""
|
|
226
|
+
self.log("user_correction", {
|
|
227
|
+
"table": table,
|
|
228
|
+
"column": column,
|
|
229
|
+
"change": change
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
if self._current_session:
|
|
233
|
+
conn = sqlite3.connect(self.db_path)
|
|
234
|
+
cursor = conn.cursor()
|
|
235
|
+
cursor.execute("""
|
|
236
|
+
UPDATE sessions SET corrections_count = corrections_count + 1
|
|
237
|
+
WHERE session_id = ?
|
|
238
|
+
""", (self._current_session,))
|
|
239
|
+
conn.commit()
|
|
240
|
+
conn.close()
|
|
241
|
+
|
|
242
|
+
def log_validation(self, passed: bool, score: float, issues_count: int):
|
|
243
|
+
"""Log a validation result."""
|
|
244
|
+
self.log("validation", {
|
|
245
|
+
"passed": passed,
|
|
246
|
+
"score": score,
|
|
247
|
+
"issues_count": issues_count
|
|
248
|
+
}, status="success" if passed else "warning")
|
|
249
|
+
|
|
250
|
+
def log_export(self, format: str, tables: List[str], file_path: str):
|
|
251
|
+
"""Log a data export."""
|
|
252
|
+
self.log("data_export", {
|
|
253
|
+
"format": format,
|
|
254
|
+
"tables": tables,
|
|
255
|
+
"file_path": file_path
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
def _update_session(self, tables: int = 0, rows: int = 0):
|
|
259
|
+
"""Update session statistics."""
|
|
260
|
+
conn = sqlite3.connect(self.db_path)
|
|
261
|
+
cursor = conn.cursor()
|
|
262
|
+
|
|
263
|
+
if tables:
|
|
264
|
+
cursor.execute("""
|
|
265
|
+
UPDATE sessions SET tables_generated = tables_generated + ?
|
|
266
|
+
WHERE session_id = ?
|
|
267
|
+
""", (tables, self._current_session))
|
|
268
|
+
|
|
269
|
+
if rows:
|
|
270
|
+
cursor.execute("""
|
|
271
|
+
UPDATE sessions SET rows_generated = rows_generated + ?
|
|
272
|
+
WHERE session_id = ?
|
|
273
|
+
""", (rows, self._current_session))
|
|
274
|
+
|
|
275
|
+
conn.commit()
|
|
276
|
+
conn.close()
|
|
277
|
+
|
|
278
|
+
def get_session_logs(self, session_id: str) -> List[AuditEntry]:
|
|
279
|
+
"""Get all logs for a session."""
|
|
280
|
+
conn = sqlite3.connect(self.db_path)
|
|
281
|
+
cursor = conn.cursor()
|
|
282
|
+
|
|
283
|
+
cursor.execute("""
|
|
284
|
+
SELECT timestamp, session_id, operation, user_id, status, duration_ms, details
|
|
285
|
+
FROM audit_log
|
|
286
|
+
WHERE session_id = ?
|
|
287
|
+
ORDER BY timestamp
|
|
288
|
+
""", (session_id,))
|
|
289
|
+
|
|
290
|
+
entries = []
|
|
291
|
+
for row in cursor.fetchall():
|
|
292
|
+
entries.append(AuditEntry(
|
|
293
|
+
timestamp=row[0],
|
|
294
|
+
session_id=row[1],
|
|
295
|
+
operation=row[2],
|
|
296
|
+
user_id=row[3],
|
|
297
|
+
status=row[4],
|
|
298
|
+
duration_ms=row[5],
|
|
299
|
+
details=json.loads(row[6]) if row[6] else {}
|
|
300
|
+
))
|
|
301
|
+
|
|
302
|
+
conn.close()
|
|
303
|
+
return entries
|
|
304
|
+
|
|
305
|
+
def export_compliance_report(
|
|
306
|
+
self,
|
|
307
|
+
start_date: Optional[str] = None,
|
|
308
|
+
end_date: Optional[str] = None,
|
|
309
|
+
format: str = "json"
|
|
310
|
+
) -> str:
|
|
311
|
+
"""
|
|
312
|
+
Export compliance-ready audit report.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
start_date: Filter start (ISO format)
|
|
316
|
+
end_date: Filter end (ISO format)
|
|
317
|
+
format: 'json' or 'csv'
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Report as string
|
|
321
|
+
"""
|
|
322
|
+
conn = sqlite3.connect(self.db_path)
|
|
323
|
+
cursor = conn.cursor()
|
|
324
|
+
|
|
325
|
+
query = "SELECT * FROM audit_log WHERE 1=1"
|
|
326
|
+
params = []
|
|
327
|
+
|
|
328
|
+
if start_date:
|
|
329
|
+
query += " AND timestamp >= ?"
|
|
330
|
+
params.append(start_date)
|
|
331
|
+
if end_date:
|
|
332
|
+
query += " AND timestamp <= ?"
|
|
333
|
+
params.append(end_date)
|
|
334
|
+
|
|
335
|
+
query += " ORDER BY timestamp"
|
|
336
|
+
cursor.execute(query, params)
|
|
337
|
+
|
|
338
|
+
rows = cursor.fetchall()
|
|
339
|
+
columns = [desc[0] for desc in cursor.description]
|
|
340
|
+
conn.close()
|
|
341
|
+
|
|
342
|
+
if format == "json":
|
|
343
|
+
records = [dict(zip(columns, row)) for row in rows]
|
|
344
|
+
return json.dumps({
|
|
345
|
+
"report_type": "Misata Compliance Audit",
|
|
346
|
+
"generated_at": datetime.now().isoformat(),
|
|
347
|
+
"record_count": len(records),
|
|
348
|
+
"records": records
|
|
349
|
+
}, indent=2)
|
|
350
|
+
|
|
351
|
+
else: # csv
|
|
352
|
+
lines = [",".join(columns)]
|
|
353
|
+
for row in rows:
|
|
354
|
+
lines.append(",".join(str(v) if v else "" for v in row))
|
|
355
|
+
return "\n".join(lines)
|
|
356
|
+
|
|
357
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
358
|
+
"""Get audit summary statistics."""
|
|
359
|
+
conn = sqlite3.connect(self.db_path)
|
|
360
|
+
cursor = conn.cursor()
|
|
361
|
+
|
|
362
|
+
cursor.execute("SELECT COUNT(*) FROM audit_log")
|
|
363
|
+
total_ops = cursor.fetchone()[0]
|
|
364
|
+
|
|
365
|
+
cursor.execute("SELECT COUNT(*) FROM sessions")
|
|
366
|
+
total_sessions = cursor.fetchone()[0]
|
|
367
|
+
|
|
368
|
+
cursor.execute("""
|
|
369
|
+
SELECT operation, COUNT(*) FROM audit_log
|
|
370
|
+
GROUP BY operation ORDER BY COUNT(*) DESC LIMIT 5
|
|
371
|
+
""")
|
|
372
|
+
top_ops = cursor.fetchall()
|
|
373
|
+
|
|
374
|
+
cursor.execute("SELECT SUM(rows_generated) FROM sessions")
|
|
375
|
+
total_rows = cursor.fetchone()[0] or 0
|
|
376
|
+
|
|
377
|
+
conn.close()
|
|
378
|
+
|
|
379
|
+
return {
|
|
380
|
+
"total_operations": total_ops,
|
|
381
|
+
"total_sessions": total_sessions,
|
|
382
|
+
"total_rows_generated": total_rows,
|
|
383
|
+
"top_operations": dict(top_ops)
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@contextmanager
|
|
388
|
+
def audited_session(user_id: Optional[str] = None):
|
|
389
|
+
"""
|
|
390
|
+
Context manager for audited operations.
|
|
391
|
+
|
|
392
|
+
Usage:
|
|
393
|
+
with audited_session("user123") as audit:
|
|
394
|
+
audit.log_schema_generation(...)
|
|
395
|
+
audit.log_data_generation(...)
|
|
396
|
+
"""
|
|
397
|
+
logger = AuditLogger()
|
|
398
|
+
session_id = logger.start_session(user_id)
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
yield logger
|
|
402
|
+
finally:
|
|
403
|
+
logger.end_session(session_id)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# Global instance for convenience
|
|
407
|
+
_global_logger: Optional[AuditLogger] = None
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def get_audit_logger() -> AuditLogger:
|
|
411
|
+
"""Get or create global audit logger."""
|
|
412
|
+
global _global_logger
|
|
413
|
+
if _global_logger is None:
|
|
414
|
+
_global_logger = AuditLogger()
|
|
415
|
+
return _global_logger
|