mcp-souschef 3.5.3__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1182 @@
1
+ """Database models and storage manager for SousChef."""
2
+
3
+ import contextlib
4
+ import gc
5
+ import hashlib
6
+ import importlib
7
+ import json
8
+ import sqlite3
9
+ from collections.abc import Iterator, Mapping
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from souschef.core.path_utils import _ensure_within_base_path, _normalize_path
15
+ from souschef.storage.config import build_postgres_dsn, load_database_settings
16
+
17
+
18
+ @dataclass
19
+ class AnalysisResult:
20
+ """Represents a cookbook analysis result."""
21
+
22
+ id: int | None
23
+ cookbook_name: str
24
+ cookbook_path: str
25
+ cookbook_version: str
26
+ complexity: str
27
+ estimated_hours: float
28
+ estimated_hours_with_souschef: float
29
+ recommendations: str
30
+ ai_provider: str | None
31
+ ai_model: str | None
32
+ analysis_data: str # JSON
33
+ created_at: str
34
+ cache_key: str | None = None
35
+ cookbook_blob_key: str | None = None # Blob storage key for original cookbook
36
+ content_fingerprint: str | None = None # SHA256 hash of cookbook content
37
+
38
+
39
+ @dataclass
40
+ class ConversionResult:
41
+ """Represents a cookbook conversion result."""
42
+
43
+ id: int | None
44
+ analysis_id: int | None
45
+ cookbook_name: str
46
+ output_type: str # 'playbook', 'role', 'collection'
47
+ status: str # 'success', 'partial', 'failed'
48
+ files_generated: int
49
+ blob_storage_key: str | None
50
+ conversion_data: str # JSON
51
+ created_at: str
52
+
53
+
54
+ def _analysis_from_row(row: Mapping[str, Any]) -> AnalysisResult:
55
+ """Convert a database row into an AnalysisResult."""
56
+
57
+ # Helper to safely get optional columns
58
+ def safe_get(key: str) -> Any:
59
+ if isinstance(row, dict):
60
+ return row.get(key)
61
+ try:
62
+ return row[key]
63
+ except (KeyError, IndexError):
64
+ return None
65
+
66
+ return AnalysisResult(
67
+ id=row["id"],
68
+ cookbook_name=row["cookbook_name"],
69
+ cookbook_path=row["cookbook_path"],
70
+ cookbook_version=row["cookbook_version"],
71
+ complexity=row["complexity"],
72
+ estimated_hours=row["estimated_hours"],
73
+ estimated_hours_with_souschef=row["estimated_hours_with_souschef"],
74
+ recommendations=row["recommendations"],
75
+ ai_provider=row["ai_provider"],
76
+ ai_model=row["ai_model"],
77
+ analysis_data=row["analysis_data"],
78
+ created_at=row["created_at"],
79
+ cache_key=(row.get("cache_key") if isinstance(row, dict) else row["cache_key"]),
80
+ cookbook_blob_key=safe_get("cookbook_blob_key"),
81
+ content_fingerprint=safe_get("content_fingerprint"),
82
+ )
83
+
84
+
85
+ def _conversion_from_row(row: Mapping[str, Any]) -> ConversionResult:
86
+ """Convert a database row into a ConversionResult."""
87
+ return ConversionResult(
88
+ id=row["id"],
89
+ analysis_id=row["analysis_id"],
90
+ cookbook_name=row["cookbook_name"],
91
+ output_type=row["output_type"],
92
+ status=row["status"],
93
+ files_generated=row["files_generated"],
94
+ blob_storage_key=row["blob_storage_key"],
95
+ conversion_data=row["conversion_data"],
96
+ created_at=row["created_at"],
97
+ )
98
+
99
+
100
+ def _hash_directory_contents(directory: Path) -> str:
101
+ """Hash the contents of a directory for cache invalidation."""
102
+ hasher = hashlib.sha256()
103
+
104
+ key_files: list[Path] = [directory / "metadata.rb"]
105
+ recipes_dir = directory / "recipes"
106
+ if recipes_dir.exists():
107
+ key_files.extend(sorted(recipes_dir.glob("*.rb")))
108
+
109
+ for file_path in key_files:
110
+ if file_path.exists() and file_path.is_file():
111
+ hasher.update(file_path.read_bytes())
112
+
113
+ return hasher.hexdigest()
114
+
115
+
116
+ def calculate_file_fingerprint(file_path: Path) -> str:
117
+ """
118
+ Calculate SHA256 fingerprint of a file.
119
+
120
+ This is used for content-based deduplication of uploaded archives.
121
+
122
+ Args:
123
+ file_path: Path to the file to fingerprint.
124
+
125
+ Returns:
126
+ SHA256 hash of the file contents as hex string.
127
+
128
+ """
129
+ hasher = hashlib.sha256()
130
+ with file_path.open("rb") as f:
131
+ # Read in chunks to handle large files efficiently
132
+ for chunk in iter(lambda: f.read(65536), b""):
133
+ hasher.update(chunk)
134
+ return hasher.hexdigest()
135
+
136
+
137
+ class StorageManager:
138
+ """Manages persistent storage for SousChef analysis and conversion data."""
139
+
140
+ def __init__(self, db_path: str | Path | None = None):
141
+ """
142
+ Initialise the storage manager.
143
+
144
+ Args:
145
+ db_path: Path to SQLite database. If None, uses default location.
146
+
147
+ """
148
+ if db_path is None:
149
+ db_path = self._get_default_db_path()
150
+ else:
151
+ db_path = _normalize_path(str(db_path))
152
+
153
+ self.db_path = db_path
154
+ self._ensure_database_exists()
155
+
156
+ def __enter__(self) -> "StorageManager":
157
+ """Context manager entry."""
158
+ return self
159
+
160
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
161
+ """Context manager exit with cleanup."""
162
+ self.close()
163
+
164
+ def close(self) -> None:
165
+ """Close all database connections and ensure cleanup."""
166
+ # Execute a dummy query to ensure all pending operations are complete
167
+ # This helps prevent ResourceWarnings in tests
168
+ if self.db_path.exists():
169
+ conn = None
170
+ try:
171
+ conn = sqlite3.connect(str(self.db_path))
172
+ conn.execute("SELECT 1")
173
+ conn.commit()
174
+ except Exception:
175
+ pass
176
+ finally:
177
+ if conn is not None:
178
+ conn.close()
179
+
180
+ @contextlib.contextmanager
181
+ def _connect(self) -> Iterator[sqlite3.Connection]:
182
+ """Create a SQLite connection with proper cleanup."""
183
+ conn = None
184
+ try:
185
+ conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
186
+ conn.row_factory = sqlite3.Row
187
+ conn.isolation_level = None
188
+ yield conn
189
+ finally:
190
+ if conn is not None:
191
+ try:
192
+ conn.execute("PRAGMA optimize")
193
+ except Exception:
194
+ pass
195
+ finally:
196
+ conn.close()
197
+
198
+ def _get_default_db_path(self) -> Path:
199
+ """Get the default database path in a secure location."""
200
+ import tempfile
201
+
202
+ # Use a data directory in temp for persistence across sessions
203
+ data_dir = Path(tempfile.gettempdir()) / ".souschef" / "data"
204
+ data_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
205
+
206
+ db_path = data_dir / "souschef.db"
207
+ # Ensure the path is within the allowed base directory
208
+ validated_path = _ensure_within_base_path(db_path, Path(tempfile.gettempdir()))
209
+ return validated_path
210
+
211
+ def _ensure_database_exists(self) -> None:
212
+ """Create database schema if it doesn't exist."""
213
+ with self._connect() as conn:
214
+ conn.execute(
215
+ """
216
+ CREATE TABLE IF NOT EXISTS analysis_results (
217
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
218
+ cookbook_name TEXT NOT NULL,
219
+ cookbook_path TEXT NOT NULL,
220
+ cookbook_version TEXT,
221
+ complexity TEXT,
222
+ estimated_hours REAL,
223
+ estimated_hours_with_souschef REAL,
224
+ recommendations TEXT,
225
+ ai_provider TEXT,
226
+ ai_model TEXT,
227
+ analysis_data TEXT,
228
+ cache_key TEXT UNIQUE,
229
+ cookbook_blob_key TEXT,
230
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
231
+ )
232
+ """
233
+ )
234
+
235
+ # Add cookbook_blob_key column if it doesn't exist (migration)
236
+ with contextlib.suppress(sqlite3.OperationalError):
237
+ conn.execute(
238
+ "ALTER TABLE analysis_results ADD COLUMN cookbook_blob_key TEXT"
239
+ )
240
+
241
+ # Add content_fingerprint column if it doesn't exist (migration)
242
+ with contextlib.suppress(sqlite3.OperationalError):
243
+ conn.execute(
244
+ "ALTER TABLE analysis_results ADD COLUMN content_fingerprint TEXT"
245
+ )
246
+
247
+ conn.execute(
248
+ """
249
+ CREATE TABLE IF NOT EXISTS conversion_results (
250
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
251
+ analysis_id INTEGER,
252
+ cookbook_name TEXT NOT NULL,
253
+ output_type TEXT,
254
+ status TEXT,
255
+ files_generated INTEGER,
256
+ blob_storage_key TEXT,
257
+ conversion_data TEXT,
258
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
259
+ FOREIGN KEY (analysis_id) REFERENCES analysis_results (id)
260
+ )
261
+ """
262
+ )
263
+
264
+ # Create indexes for common queries
265
+ conn.execute(
266
+ """
267
+ CREATE INDEX IF NOT EXISTS idx_analysis_cookbook
268
+ ON analysis_results(cookbook_name, created_at DESC)
269
+ """
270
+ )
271
+
272
+ conn.execute(
273
+ """
274
+ CREATE INDEX IF NOT EXISTS idx_analysis_cache
275
+ ON analysis_results(cache_key)
276
+ """
277
+ )
278
+
279
+ conn.execute(
280
+ """
281
+ CREATE INDEX IF NOT EXISTS idx_analysis_fingerprint
282
+ ON analysis_results(content_fingerprint)
283
+ """
284
+ )
285
+
286
+ conn.execute(
287
+ """
288
+ CREATE INDEX IF NOT EXISTS idx_conversion_analysis
289
+ ON conversion_results(analysis_id)
290
+ """
291
+ )
292
+ # Close connection explicitly after schema creation
293
+ gc.collect()
294
+
295
+ def generate_cache_key(
296
+ self,
297
+ cookbook_path: str,
298
+ ai_provider: str | None = None,
299
+ ai_model: str | None = None,
300
+ ) -> str:
301
+ """
302
+ Generate a cache key for analysis results.
303
+
304
+ Args:
305
+ cookbook_path: Path to the cookbook.
306
+ ai_provider: AI provider used (if any).
307
+ ai_model: AI model used (if any).
308
+
309
+ Returns:
310
+ Cache key as a hex string.
311
+
312
+ """
313
+ # Include cookbook path, AI settings, and content hash
314
+ key_parts = [
315
+ cookbook_path,
316
+ ai_provider or "none",
317
+ ai_model or "none",
318
+ ]
319
+
320
+ # Try to include a hash of the cookbook content for invalidation
321
+ try:
322
+ cookbook_dir = _normalize_path(cookbook_path)
323
+ content_hash = self._hash_directory_contents(cookbook_dir)
324
+ key_parts.append(content_hash)
325
+ except (ValueError, OSError):
326
+ # If the cookbook path is invalid or the contents cannot be read,
327
+ # fall back to a cache key that does not include a content
328
+ # fingerprint so caching still works, albeit with reduced
329
+ # granularity.
330
+ pass
331
+
332
+ combined = "|".join(key_parts)
333
+ return hashlib.sha256(combined.encode()).hexdigest()
334
+
335
+ def _hash_directory_contents(self, directory: Path) -> str:
336
+ """
337
+ Hash the contents of a directory for cache invalidation.
338
+
339
+ Args:
340
+ directory: Directory to hash.
341
+
342
+ Returns:
343
+ SHA256 hash of directory contents.
344
+
345
+ """
346
+ return _hash_directory_contents(directory)
347
+
348
+ def save_analysis(
349
+ self,
350
+ cookbook_name: str,
351
+ cookbook_path: str,
352
+ cookbook_version: str,
353
+ complexity: str,
354
+ estimated_hours: float,
355
+ estimated_hours_with_souschef: float,
356
+ recommendations: str,
357
+ analysis_data: dict[str, Any],
358
+ ai_provider: str | None = None,
359
+ ai_model: str | None = None,
360
+ cookbook_blob_key: str | None = None,
361
+ content_fingerprint: str | None = None,
362
+ ) -> int | None:
363
+ """
364
+ Save an analysis result to the database.
365
+
366
+ If content_fingerprint is provided, checks for existing analysis with same
367
+ fingerprint and returns existing ID instead of creating duplicate.
368
+
369
+ Args:
370
+ cookbook_name: Name of the cookbook.
371
+ cookbook_path: Path to the cookbook.
372
+ cookbook_version: Version of the cookbook.
373
+ complexity: Complexity level.
374
+ estimated_hours: Manual migration hours.
375
+ estimated_hours_with_souschef: AI-assisted hours.
376
+ recommendations: Analysis recommendations.
377
+ analysis_data: Full analysis data as dict.
378
+ ai_provider: AI provider used.
379
+ ai_model: AI model used.
380
+ cookbook_blob_key: Blob storage key for original cookbook archive.
381
+ content_fingerprint: SHA256 hash of cookbook content for deduplication.
382
+
383
+ Returns:
384
+ The ID of the saved or existing analysis result.
385
+
386
+ """
387
+ # Check for existing analysis with same fingerprint
388
+ if content_fingerprint:
389
+ existing = self.get_analysis_by_fingerprint(content_fingerprint)
390
+ if existing:
391
+ # Return existing analysis ID (deduplication)
392
+ return existing.id
393
+
394
+ cache_key = self.generate_cache_key(cookbook_path, ai_provider, ai_model)
395
+
396
+ with self._connect() as conn:
397
+ cursor = conn.execute(
398
+ """
399
+ INSERT INTO analysis_results (
400
+ cookbook_name, cookbook_path, cookbook_version,
401
+ complexity, estimated_hours, estimated_hours_with_souschef,
402
+ recommendations, ai_provider, ai_model,
403
+ analysis_data, cache_key, cookbook_blob_key,
404
+ content_fingerprint
405
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
406
+ """,
407
+ (
408
+ cookbook_name,
409
+ cookbook_path,
410
+ cookbook_version,
411
+ complexity,
412
+ estimated_hours,
413
+ estimated_hours_with_souschef,
414
+ recommendations,
415
+ ai_provider,
416
+ ai_model,
417
+ json.dumps(analysis_data),
418
+ cache_key,
419
+ cookbook_blob_key,
420
+ content_fingerprint,
421
+ ),
422
+ )
423
+ conn.commit()
424
+ return cursor.lastrowid or None
425
+
426
+ def get_analysis_by_fingerprint(
427
+ self, content_fingerprint: str
428
+ ) -> AnalysisResult | None:
429
+ """
430
+ Retrieve existing analysis result by content fingerprint.
431
+
432
+ Used for deduplication - if cookbook with same content was already
433
+ uploaded, returns the existing analysis instead of creating duplicate.
434
+
435
+ Args:
436
+ content_fingerprint: SHA256 hash of cookbook content.
437
+
438
+ Returns:
439
+ Existing AnalysisResult or None if not found.
440
+
441
+ """
442
+ with self._connect() as conn:
443
+ cursor = conn.execute(
444
+ """
445
+ SELECT * FROM analysis_results
446
+ WHERE content_fingerprint = ?
447
+ ORDER BY created_at DESC
448
+ LIMIT 1
449
+ """,
450
+ (content_fingerprint,),
451
+ )
452
+ row = cursor.fetchone()
453
+
454
+ if row:
455
+ return _analysis_from_row(row)
456
+
457
+ return None
458
+
459
+ def get_cached_analysis(
460
+ self,
461
+ cookbook_path: str,
462
+ ai_provider: str | None = None,
463
+ ai_model: str | None = None,
464
+ ) -> AnalysisResult | None:
465
+ """
466
+ Retrieve cached analysis result if available.
467
+
468
+ Args:
469
+ cookbook_path: Path to the cookbook.
470
+ ai_provider: AI provider used.
471
+ ai_model: AI model used.
472
+
473
+ Returns:
474
+ Cached AnalysisResult or None if not found.
475
+
476
+ """
477
+ cache_key = self.generate_cache_key(cookbook_path, ai_provider, ai_model)
478
+
479
+ with self._connect() as conn:
480
+ cursor = conn.execute(
481
+ """
482
+ SELECT * FROM analysis_results
483
+ WHERE cache_key = ?
484
+ ORDER BY created_at DESC
485
+ LIMIT 1
486
+ """,
487
+ (cache_key,),
488
+ )
489
+ row = cursor.fetchone()
490
+
491
+ if row:
492
+ return AnalysisResult(
493
+ id=row["id"],
494
+ cookbook_name=row["cookbook_name"],
495
+ cookbook_path=row["cookbook_path"],
496
+ cookbook_version=row["cookbook_version"],
497
+ complexity=row["complexity"],
498
+ estimated_hours=row["estimated_hours"],
499
+ estimated_hours_with_souschef=row["estimated_hours_with_souschef"],
500
+ recommendations=row["recommendations"],
501
+ ai_provider=row["ai_provider"],
502
+ ai_model=row["ai_model"],
503
+ analysis_data=row["analysis_data"],
504
+ created_at=row["created_at"],
505
+ cache_key=row["cache_key"],
506
+ )
507
+
508
+ return None
509
+
510
+ def save_conversion(
511
+ self,
512
+ cookbook_name: str,
513
+ output_type: str,
514
+ status: str,
515
+ files_generated: int,
516
+ conversion_data: dict[str, Any],
517
+ analysis_id: int | None = None,
518
+ blob_storage_key: str | None = None,
519
+ ) -> int | None:
520
+ """
521
+ Save a conversion result to the database.
522
+
523
+ Args:
524
+ cookbook_name: Name of the cookbook.
525
+ output_type: Output type (playbook, role, collection).
526
+ status: Conversion status (success, partial, failed).
527
+ files_generated: Number of files generated.
528
+ conversion_data: Full conversion data as dict.
529
+ analysis_id: Optional ID of associated analysis.
530
+ blob_storage_key: Optional key for blob storage.
531
+
532
+ Returns:
533
+ The ID of the saved conversion result.
534
+
535
+ """
536
+ with self._connect() as conn:
537
+ cursor = conn.execute(
538
+ """
539
+ INSERT INTO conversion_results (
540
+ analysis_id, cookbook_name, output_type,
541
+ status, files_generated, blob_storage_key,
542
+ conversion_data
543
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
544
+ """,
545
+ (
546
+ analysis_id,
547
+ cookbook_name,
548
+ output_type,
549
+ status,
550
+ files_generated,
551
+ blob_storage_key,
552
+ json.dumps(conversion_data),
553
+ ),
554
+ )
555
+ conn.commit()
556
+ return cursor.lastrowid or None
557
+
558
+ def get_analysis_history(
559
+ self, cookbook_name: str | None = None, limit: int = 50
560
+ ) -> list[AnalysisResult]:
561
+ """
562
+ Get analysis history.
563
+
564
+ Args:
565
+ cookbook_name: Filter by cookbook name (optional).
566
+ limit: Maximum number of results to return.
567
+
568
+ Returns:
569
+ List of AnalysisResult objects.
570
+
571
+ """
572
+ with self._connect() as conn:
573
+ if cookbook_name:
574
+ cursor = conn.execute(
575
+ """
576
+ SELECT * FROM analysis_results
577
+ WHERE cookbook_name = ?
578
+ ORDER BY created_at DESC
579
+ LIMIT ?
580
+ """,
581
+ (cookbook_name, limit),
582
+ )
583
+ else:
584
+ cursor = conn.execute(
585
+ """
586
+ SELECT * FROM analysis_results
587
+ ORDER BY created_at DESC
588
+ LIMIT ?
589
+ """,
590
+ (limit,),
591
+ )
592
+
593
+ rows = cursor.fetchall()
594
+ return [_analysis_from_row(row) for row in rows]
595
+
596
+ def get_conversion_history(
597
+ self, cookbook_name: str | None = None, limit: int = 50
598
+ ) -> list[ConversionResult]:
599
+ """
600
+ Get conversion history.
601
+
602
+ Args:
603
+ cookbook_name: Filter by cookbook name (optional).
604
+ limit: Maximum number of results to return.
605
+
606
+ Returns:
607
+ List of ConversionResult objects.
608
+
609
+ """
610
+ with self._connect() as conn:
611
+ if cookbook_name:
612
+ cursor = conn.execute(
613
+ """
614
+ SELECT * FROM conversion_results
615
+ WHERE cookbook_name = ?
616
+ ORDER BY created_at DESC
617
+ LIMIT ?
618
+ """,
619
+ (cookbook_name, limit),
620
+ )
621
+ else:
622
+ cursor = conn.execute(
623
+ """
624
+ SELECT * FROM conversion_results
625
+ ORDER BY created_at DESC
626
+ LIMIT ?
627
+ """,
628
+ (limit,),
629
+ )
630
+
631
+ rows = cursor.fetchall()
632
+ return [_conversion_from_row(row) for row in rows]
633
+
634
+ def get_conversions_by_analysis_id(
635
+ self, analysis_id: int
636
+ ) -> list[ConversionResult]:
637
+ """
638
+ Get conversions associated with a specific analysis.
639
+
640
+ Args:
641
+ analysis_id: ID of the analysis.
642
+
643
+ Returns:
644
+ List of ConversionResult objects.
645
+
646
+ """
647
+ with self._connect() as conn:
648
+ cursor = conn.execute(
649
+ """
650
+ SELECT * FROM conversion_results
651
+ WHERE analysis_id = ?
652
+ ORDER BY created_at DESC
653
+ """,
654
+ (analysis_id,),
655
+ )
656
+ rows = cursor.fetchall()
657
+ return [_conversion_from_row(row) for row in rows]
658
+
659
+ def get_statistics(self) -> dict[str, Any]:
660
+ """
661
+ Get overall statistics.
662
+
663
+ Returns:
664
+ Dictionary with statistical data.
665
+
666
+ """
667
+ with self._connect() as conn:
668
+ stats: dict[str, Any] = {}
669
+
670
+ # Analysis statistics
671
+ cursor = conn.execute(
672
+ """
673
+ SELECT
674
+ COUNT(*) as total,
675
+ COUNT(DISTINCT cookbook_name) as unique_cookbooks,
676
+ AVG(estimated_hours) as avg_manual_hours,
677
+ AVG(estimated_hours_with_souschef) as avg_ai_hours
678
+ FROM analysis_results
679
+ """
680
+ )
681
+ row = cursor.fetchone()
682
+ stats["total_analyses"] = row[0]
683
+ stats["unique_cookbooks_analysed"] = row[1]
684
+ stats["avg_manual_hours"] = row[2] or 0
685
+ stats["avg_ai_hours"] = row[3] or 0
686
+
687
+ # Conversion statistics
688
+ cursor = conn.execute(
689
+ """
690
+ SELECT
691
+ COUNT(*) as total,
692
+ SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as successful,
693
+ SUM(files_generated) as total_files
694
+ FROM conversion_results
695
+ """
696
+ )
697
+ row = cursor.fetchone()
698
+ stats["total_conversions"] = row[0]
699
+ stats["successful_conversions"] = row[1]
700
+ stats["total_files_generated"] = row[2] or 0
701
+
702
+ return stats
703
+
704
+
705
+ class PostgresStorageManager:
706
+ """Manages persistent storage for SousChef in PostgreSQL."""
707
+
708
+ def __init__(self, dsn: str):
709
+ """
710
+ Initialise the PostgreSQL storage manager.
711
+
712
+ Args:
713
+ dsn: PostgreSQL DSN string.
714
+
715
+ """
716
+ self.dsn = dsn
717
+ self._ensure_database_exists()
718
+
719
+ def _get_psycopg(self):
720
+ """Import psycopg and return the module."""
721
+ try:
722
+ return importlib.import_module("psycopg")
723
+ except ImportError as exc:
724
+ raise ImportError(
725
+ "psycopg is required for PostgreSQL storage. Install with: "
726
+ "pip install psycopg[binary]"
727
+ ) from exc
728
+
729
+ def _connect(self):
730
+ """Create a PostgreSQL connection with dict row factory."""
731
+ psycopg = self._get_psycopg()
732
+ return psycopg.connect(self.dsn, row_factory=psycopg.rows.dict_row)
733
+
734
+ def _prepare_sql(self, sql: str) -> str:
735
+ """Convert SQLite-style placeholders to PostgreSQL placeholders."""
736
+ return sql.replace("?", "%s")
737
+
738
+ def _ensure_database_exists(self) -> None:
739
+ """Create PostgreSQL schema if it doesn't exist."""
740
+ with self._connect() as conn:
741
+ conn.execute(
742
+ """
743
+ CREATE TABLE IF NOT EXISTS analysis_results (
744
+ id INTEGER GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
745
+ cookbook_name TEXT NOT NULL,
746
+ cookbook_path TEXT NOT NULL,
747
+ cookbook_version TEXT,
748
+ complexity TEXT,
749
+ estimated_hours DOUBLE PRECISION,
750
+ estimated_hours_with_souschef DOUBLE PRECISION,
751
+ recommendations TEXT,
752
+ ai_provider TEXT,
753
+ ai_model TEXT,
754
+ analysis_data TEXT,
755
+ cache_key TEXT UNIQUE,
756
+ cookbook_blob_key TEXT,
757
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
758
+ )
759
+ """
760
+ )
761
+
762
+ # Add cookbook_blob_key column if it doesn't exist (migration)
763
+ try:
764
+ conn.execute(
765
+ "ALTER TABLE analysis_results ADD COLUMN cookbook_blob_key TEXT"
766
+ )
767
+ conn.commit()
768
+ except Exception:
769
+ # Column may already exist; rollback and continue
770
+ conn.rollback()
771
+
772
+ # Add content_fingerprint column if it doesn't exist (migration)
773
+ try:
774
+ conn.execute(
775
+ "ALTER TABLE analysis_results ADD COLUMN content_fingerprint TEXT"
776
+ )
777
+ conn.commit()
778
+ except Exception:
779
+ # Column may already exist; rollback and continue
780
+ conn.rollback()
781
+
782
+ conn.execute(
783
+ """
784
+ CREATE TABLE IF NOT EXISTS conversion_results (
785
+ id INTEGER GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
786
+ analysis_id INTEGER,
787
+ cookbook_name TEXT NOT NULL,
788
+ output_type TEXT,
789
+ status TEXT,
790
+ files_generated INTEGER,
791
+ blob_storage_key TEXT,
792
+ conversion_data TEXT,
793
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
794
+ FOREIGN KEY (analysis_id) REFERENCES analysis_results (id)
795
+ )
796
+ """
797
+ )
798
+
799
+ conn.execute(
800
+ """
801
+ CREATE INDEX IF NOT EXISTS idx_analysis_cookbook
802
+ ON analysis_results(cookbook_name, created_at DESC)
803
+ """
804
+ )
805
+
806
+ conn.execute(
807
+ """
808
+ CREATE INDEX IF NOT EXISTS idx_analysis_cache
809
+ ON analysis_results(cache_key)
810
+ """
811
+ )
812
+
813
+ conn.execute(
814
+ """
815
+ CREATE INDEX IF NOT EXISTS idx_analysis_fingerprint
816
+ ON analysis_results(content_fingerprint)
817
+ """
818
+ )
819
+
820
+ conn.execute(
821
+ """
822
+ CREATE INDEX IF NOT EXISTS idx_conversion_analysis
823
+ ON conversion_results(analysis_id)
824
+ """
825
+ )
826
+
827
+ conn.commit()
828
+
829
+ def generate_cache_key(
830
+ self,
831
+ cookbook_path: str,
832
+ ai_provider: str | None = None,
833
+ ai_model: str | None = None,
834
+ ) -> str:
835
+ """Generate a cache key for analysis results."""
836
+ key_parts = [
837
+ cookbook_path,
838
+ ai_provider or "none",
839
+ ai_model or "none",
840
+ ]
841
+
842
+ try:
843
+ cookbook_dir = _normalize_path(cookbook_path)
844
+ content_hash = _hash_directory_contents(cookbook_dir)
845
+ key_parts.append(content_hash)
846
+ except (ValueError, OSError):
847
+ pass
848
+
849
+ combined = "|".join(key_parts)
850
+ return hashlib.sha256(combined.encode()).hexdigest()
851
+
852
+ def save_analysis(
853
+ self,
854
+ cookbook_name: str,
855
+ cookbook_path: str,
856
+ cookbook_version: str,
857
+ complexity: str,
858
+ estimated_hours: float,
859
+ estimated_hours_with_souschef: float,
860
+ recommendations: str,
861
+ analysis_data: dict[str, Any],
862
+ ai_provider: str | None = None,
863
+ ai_model: str | None = None,
864
+ cookbook_blob_key: str | None = None,
865
+ content_fingerprint: str | None = None,
866
+ ) -> int | None:
867
+ """
868
+ Save an analysis result to PostgreSQL.
869
+
870
+ If content_fingerprint is provided, checks for existing analysis with same
871
+ fingerprint and returns existing ID instead of creating duplicate.
872
+
873
+ Args:
874
+ cookbook_name: Name of the cookbook.
875
+ cookbook_path: Path to the cookbook.
876
+ cookbook_version: Version of the cookbook.
877
+ complexity: Complexity level.
878
+ estimated_hours: Manual migration hours.
879
+ estimated_hours_with_souschef: AI-assisted hours.
880
+ recommendations: Analysis recommendations.
881
+ analysis_data: Full analysis data as dict.
882
+ ai_provider: AI provider used.
883
+ ai_model: AI model used.
884
+ cookbook_blob_key: Blob storage key for original cookbook archive.
885
+ content_fingerprint: SHA256 hash of cookbook content for deduplication.
886
+
887
+ Returns:
888
+ The ID of the saved or existing analysis result.
889
+
890
+ """
891
+ # Check for existing analysis with same fingerprint
892
+ if content_fingerprint:
893
+ existing = self.get_analysis_by_fingerprint(content_fingerprint)
894
+ if existing:
895
+ # Return existing analysis ID (deduplication)
896
+ return existing.id
897
+
898
+ cache_key = self.generate_cache_key(cookbook_path, ai_provider, ai_model)
899
+
900
+ sql = self._prepare_sql(
901
+ """
902
+ INSERT INTO analysis_results (
903
+ cookbook_name, cookbook_path, cookbook_version,
904
+ complexity, estimated_hours, estimated_hours_with_souschef,
905
+ recommendations, ai_provider, ai_model,
906
+ analysis_data, cache_key, cookbook_blob_key,
907
+ content_fingerprint
908
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
909
+ RETURNING id
910
+ """
911
+ )
912
+
913
+ with self._connect() as conn:
914
+ cursor = conn.execute(
915
+ sql,
916
+ (
917
+ cookbook_name,
918
+ cookbook_path,
919
+ cookbook_version,
920
+ complexity,
921
+ estimated_hours,
922
+ estimated_hours_with_souschef,
923
+ recommendations,
924
+ ai_provider,
925
+ ai_model,
926
+ json.dumps(analysis_data),
927
+ cache_key,
928
+ cookbook_blob_key,
929
+ content_fingerprint,
930
+ ),
931
+ )
932
+ row = cursor.fetchone()
933
+ conn.commit()
934
+ if row:
935
+ return int(row["id"])
936
+ return None
937
+
938
+ def get_analysis_by_fingerprint(
939
+ self, content_fingerprint: str
940
+ ) -> AnalysisResult | None:
941
+ """
942
+ Retrieve existing analysis result by content fingerprint.
943
+
944
+ Used for deduplication - if cookbook with same content was already
945
+ uploaded, returns the existing analysis instead of creating duplicate.
946
+
947
+ Args:
948
+ content_fingerprint: SHA256 hash of cookbook content.
949
+
950
+ Returns:
951
+ Existing AnalysisResult or None if not found.
952
+
953
+ """
954
+ sql = self._prepare_sql(
955
+ """
956
+ SELECT * FROM analysis_results
957
+ WHERE content_fingerprint = ?
958
+ ORDER BY created_at DESC
959
+ LIMIT 1
960
+ """
961
+ )
962
+
963
+ with self._connect() as conn:
964
+ cursor = conn.execute(sql, (content_fingerprint,))
965
+ row = cursor.fetchone()
966
+ if row:
967
+ return _analysis_from_row(row)
968
+ return None
969
+
970
+ def get_cached_analysis(
971
+ self,
972
+ cookbook_path: str,
973
+ ai_provider: str | None = None,
974
+ ai_model: str | None = None,
975
+ ) -> AnalysisResult | None:
976
+ """Retrieve cached analysis result if available."""
977
+ cache_key = self.generate_cache_key(cookbook_path, ai_provider, ai_model)
978
+
979
+ sql = self._prepare_sql(
980
+ """
981
+ SELECT * FROM analysis_results
982
+ WHERE cache_key = ?
983
+ ORDER BY created_at DESC
984
+ LIMIT 1
985
+ """
986
+ )
987
+
988
+ with self._connect() as conn:
989
+ cursor = conn.execute(sql, (cache_key,))
990
+ row = cursor.fetchone()
991
+ if row:
992
+ return _analysis_from_row(row)
993
+ return None
994
+
995
+ def save_conversion(
996
+ self,
997
+ cookbook_name: str,
998
+ output_type: str,
999
+ status: str,
1000
+ files_generated: int,
1001
+ conversion_data: dict[str, Any],
1002
+ analysis_id: int | None = None,
1003
+ blob_storage_key: str | None = None,
1004
+ ) -> int | None:
1005
+ """Save a conversion result to PostgreSQL."""
1006
+ sql = self._prepare_sql(
1007
+ """
1008
+ INSERT INTO conversion_results (
1009
+ analysis_id, cookbook_name, output_type,
1010
+ status, files_generated, blob_storage_key,
1011
+ conversion_data
1012
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
1013
+ RETURNING id
1014
+ """
1015
+ )
1016
+
1017
+ with self._connect() as conn:
1018
+ cursor = conn.execute(
1019
+ sql,
1020
+ (
1021
+ analysis_id,
1022
+ cookbook_name,
1023
+ output_type,
1024
+ status,
1025
+ files_generated,
1026
+ blob_storage_key,
1027
+ json.dumps(conversion_data),
1028
+ ),
1029
+ )
1030
+ row = cursor.fetchone()
1031
+ conn.commit()
1032
+ if row:
1033
+ return int(row["id"])
1034
+ return None
1035
+
1036
+ def get_analysis_history(
1037
+ self, cookbook_name: str | None = None, limit: int = 50
1038
+ ) -> list[AnalysisResult]:
1039
+ """Get analysis history from PostgreSQL."""
1040
+ if cookbook_name:
1041
+ sql = self._prepare_sql(
1042
+ """
1043
+ SELECT * FROM analysis_results
1044
+ WHERE cookbook_name = ?
1045
+ ORDER BY created_at DESC
1046
+ LIMIT ?
1047
+ """
1048
+ )
1049
+ params: tuple[str | int, ...] = (cookbook_name, limit)
1050
+ else:
1051
+ sql = self._prepare_sql(
1052
+ """
1053
+ SELECT * FROM analysis_results
1054
+ ORDER BY created_at DESC
1055
+ LIMIT ?
1056
+ """
1057
+ )
1058
+ params = (limit,)
1059
+
1060
+ with self._connect() as conn:
1061
+ cursor = conn.execute(sql, params)
1062
+ rows = cursor.fetchall()
1063
+ return [_analysis_from_row(row) for row in rows]
1064
+
1065
+ def get_conversion_history(
1066
+ self, cookbook_name: str | None = None, limit: int = 50
1067
+ ) -> list[ConversionResult]:
1068
+ """Get conversion history from PostgreSQL."""
1069
+ if cookbook_name:
1070
+ sql = self._prepare_sql(
1071
+ """
1072
+ SELECT * FROM conversion_results
1073
+ WHERE cookbook_name = ?
1074
+ ORDER BY created_at DESC
1075
+ LIMIT ?
1076
+ """
1077
+ )
1078
+ params: tuple[str | int, ...] = (cookbook_name, limit)
1079
+ else:
1080
+ sql = self._prepare_sql(
1081
+ """
1082
+ SELECT * FROM conversion_results
1083
+ ORDER BY created_at DESC
1084
+ LIMIT ?
1085
+ """
1086
+ )
1087
+ params = (limit,)
1088
+
1089
+ with self._connect() as conn:
1090
+ cursor = conn.execute(sql, params)
1091
+ rows = cursor.fetchall()
1092
+ return [_conversion_from_row(row) for row in rows]
1093
+
1094
+ def get_conversions_by_analysis_id(
1095
+ self, analysis_id: int
1096
+ ) -> list[ConversionResult]:
1097
+ """
1098
+ Get all conversions for a specific analysis.
1099
+
1100
+ Args:
1101
+ analysis_id: The ID of the analysis.
1102
+
1103
+ Returns:
1104
+ List of conversion results for the analysis.
1105
+
1106
+ """
1107
+ sql = self._prepare_sql(
1108
+ """
1109
+ SELECT * FROM conversion_results
1110
+ WHERE analysis_id = ?
1111
+ ORDER BY created_at DESC
1112
+ """
1113
+ )
1114
+
1115
+ with self._connect() as conn:
1116
+ cursor = conn.execute(sql, (analysis_id,))
1117
+ rows = cursor.fetchall()
1118
+ return [_conversion_from_row(row) for row in rows]
1119
+
1120
+ def get_statistics(self) -> dict[str, Any]:
1121
+ """Get overall statistics from PostgreSQL."""
1122
+ with self._connect() as conn:
1123
+ stats: dict[str, Any] = {}
1124
+
1125
+ cursor = conn.execute(
1126
+ """
1127
+ SELECT
1128
+ COUNT(*) as total,
1129
+ COUNT(DISTINCT cookbook_name) as unique_cookbooks,
1130
+ AVG(estimated_hours) as avg_manual_hours,
1131
+ AVG(estimated_hours_with_souschef) as avg_ai_hours
1132
+ FROM analysis_results
1133
+ """
1134
+ )
1135
+ row = cursor.fetchone()
1136
+ stats["total_analyses"] = row["total"]
1137
+ stats["unique_cookbooks_analysed"] = row["unique_cookbooks"]
1138
+ stats["avg_manual_hours"] = row["avg_manual_hours"] or 0
1139
+ stats["avg_ai_hours"] = row["avg_ai_hours"] or 0
1140
+
1141
+ cursor = conn.execute(
1142
+ """
1143
+ SELECT
1144
+ COUNT(*) as total,
1145
+ SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as successful,
1146
+ SUM(files_generated) as total_files
1147
+ FROM conversion_results
1148
+ """
1149
+ )
1150
+ row = cursor.fetchone()
1151
+ stats["total_conversions"] = row["total"]
1152
+ stats["successful_conversions"] = row["successful"]
1153
+ stats["total_files_generated"] = row["total_files"] or 0
1154
+
1155
+ return stats
1156
+
1157
+
1158
+ # Singleton instance
1159
+ _storage_manager: StorageManager | PostgresStorageManager | None = None
1160
+
1161
+
1162
+ def get_storage_manager() -> StorageManager | PostgresStorageManager:
1163
+ """
1164
+ Get or create the singleton StorageManager instance.
1165
+
1166
+ Returns:
1167
+ StorageManager instance.
1168
+
1169
+ """
1170
+ global _storage_manager
1171
+ settings = load_database_settings()
1172
+
1173
+ if settings.backend == "postgres":
1174
+ if _storage_manager is None or not isinstance(
1175
+ _storage_manager, PostgresStorageManager
1176
+ ):
1177
+ _storage_manager = PostgresStorageManager(build_postgres_dsn(settings))
1178
+ return _storage_manager
1179
+
1180
+ if _storage_manager is None or isinstance(_storage_manager, PostgresStorageManager):
1181
+ _storage_manager = StorageManager(db_path=settings.sqlite_path)
1182
+ return _storage_manager