mcp-souschef 3.5.2__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ import zipfile
14
14
  from pathlib import Path
15
15
  from typing import Any
16
16
 
17
- import pandas as pd # type: ignore[import-untyped]
17
+ import pandas as pd
18
18
  import streamlit as st
19
19
 
20
20
  # Add the parent directory to the path so we can import souschef modules
@@ -45,6 +45,10 @@ from souschef.generators.repo import (
45
45
  generate_ansible_repository,
46
46
  )
47
47
  from souschef.parsers.metadata import parse_cookbook_metadata
48
+ from souschef.storage import (
49
+ get_blob_storage,
50
+ get_storage_manager,
51
+ )
48
52
 
49
53
  # AI Settings
50
54
  ANTHROPIC_PROVIDER = "Anthropic (Claude)"
@@ -251,6 +255,115 @@ def _get_ai_int_value(
251
255
  return default
252
256
 
253
257
 
258
+ def _save_analysis_to_db(
259
+ result: dict,
260
+ ai_provider: str | None = None,
261
+ ai_model: str | None = None,
262
+ ) -> int | None:
263
+ """
264
+ Save analysis result to the database.
265
+
266
+ Args:
267
+ result: Analysis result dictionary.
268
+ ai_provider: AI provider used (if any).
269
+ ai_model: AI model used (if any).
270
+
271
+ Returns:
272
+ Database ID of saved analysis, or None on error.
273
+
274
+ """
275
+ try:
276
+ from souschef.storage.database import calculate_file_fingerprint
277
+
278
+ storage_manager = get_storage_manager()
279
+
280
+ # Calculate content fingerprint for deduplication
281
+ content_fingerprint = None
282
+ if hasattr(st.session_state, "archive_path") and st.session_state.archive_path:
283
+ archive_path = st.session_state.archive_path
284
+ if archive_path.exists():
285
+ content_fingerprint = calculate_file_fingerprint(archive_path)
286
+
287
+ # Upload cookbook archive if available in session state
288
+ cookbook_blob_key = None
289
+ if hasattr(st.session_state, "archive_path") and st.session_state.archive_path:
290
+ archive_path = st.session_state.archive_path
291
+ if archive_path.exists():
292
+ cookbook_blob_key = _upload_cookbook_archive(
293
+ archive_path, result.get("name", "Unknown")
294
+ )
295
+
296
+ # Extract data from result
297
+ analysis_data = {
298
+ "complexity": result.get("complexity"),
299
+ "recommendations": result.get("recommendations"),
300
+ "dependencies": result.get("dependencies"),
301
+ }
302
+
303
+ analysis_id = storage_manager.save_analysis(
304
+ cookbook_name=result.get("name", "Unknown"),
305
+ cookbook_path=result.get("path", ""),
306
+ cookbook_version=result.get("version", "Unknown"),
307
+ complexity=result.get("complexity", "Unknown"),
308
+ estimated_hours=float(result.get("estimated_hours", 0)),
309
+ estimated_hours_with_souschef=float(
310
+ result.get("estimated_hours_with_souschef", 0)
311
+ ),
312
+ recommendations=result.get("recommendations", ""),
313
+ analysis_data=analysis_data,
314
+ ai_provider=ai_provider,
315
+ ai_model=ai_model,
316
+ cookbook_blob_key=cookbook_blob_key,
317
+ content_fingerprint=content_fingerprint,
318
+ )
319
+
320
+ return analysis_id
321
+ except Exception as e:
322
+ st.warning(f"Failed to save analysis to database: {e}")
323
+ return None
324
+
325
+
326
+ def _check_analysis_cache(
327
+ cookbook_path: str,
328
+ ai_provider: str | None = None,
329
+ ai_model: str | None = None,
330
+ ) -> dict | None:
331
+ """
332
+ Check if analysis is cached in the database.
333
+
334
+ Args:
335
+ cookbook_path: Path to the cookbook.
336
+ ai_provider: AI provider used (if any).
337
+ ai_model: AI model used (if any).
338
+
339
+ Returns:
340
+ Cached result dictionary or None if not found.
341
+
342
+ """
343
+ try:
344
+ storage_manager = get_storage_manager()
345
+ cached = storage_manager.get_cached_analysis(
346
+ cookbook_path, ai_provider, ai_model
347
+ )
348
+
349
+ if cached:
350
+ return {
351
+ "name": cached.cookbook_name,
352
+ "path": cached.cookbook_path,
353
+ "version": cached.cookbook_version,
354
+ "complexity": cached.complexity,
355
+ "estimated_hours": cached.estimated_hours,
356
+ "estimated_hours_with_souschef": cached.estimated_hours_with_souschef,
357
+ "recommendations": cached.recommendations,
358
+ "status": ANALYSIS_STATUS_ANALYSED,
359
+ "cached": True,
360
+ }
361
+
362
+ return None
363
+ except Exception:
364
+ return None
365
+
366
+
254
367
  # Constants for repeated strings
255
368
  METADATA_STATUS_YES = "Yes"
256
369
  METADATA_STATUS_NO = "No"
@@ -278,12 +391,12 @@ BLOCKED_EXTENSIONS = {
278
391
  }
279
392
 
280
393
 
281
- def extract_archive(uploaded_file) -> tuple[Path, Path]:
394
+ def extract_archive(uploaded_file) -> tuple[Path, Path, Path]:
282
395
  """
283
396
  Extract uploaded archive to a temporary directory with security checks.
284
397
 
285
398
  Returns:
286
- tuple: (temp_dir_path, cookbook_root_path)
399
+ tuple: (temp_dir_path, cookbook_root_path, archive_path)
287
400
 
288
401
  Implements multiple security measures to prevent:
289
402
  - Zip bombs (size limits, file count limits)
@@ -319,7 +432,52 @@ def extract_archive(uploaded_file) -> tuple[Path, Path]:
319
432
  # Find the root directory (should contain cookbooks)
320
433
  cookbook_root = _determine_cookbook_root(extraction_dir)
321
434
 
322
- return temp_dir, cookbook_root
435
+ return temp_dir, cookbook_root, archive_path
436
+
437
+
438
+ def _upload_cookbook_archive(archive_path: Path, cookbook_name: str) -> str | None:
439
+ """
440
+ Upload the original cookbook archive to blob storage.
441
+
442
+ Implements content-based deduplication - if an archive with identical
443
+ content was previously uploaded, returns the existing blob key instead
444
+ of uploading again.
445
+
446
+ Args:
447
+ archive_path: Path to the cookbook archive file.
448
+ cookbook_name: Name of the cookbook.
449
+
450
+ Returns:
451
+ Blob storage key for the uploaded archive, or None on error.
452
+
453
+ """
454
+ try:
455
+ from souschef.storage.database import calculate_file_fingerprint
456
+
457
+ # Calculate content fingerprint for deduplication
458
+ content_fingerprint = calculate_file_fingerprint(archive_path)
459
+
460
+ # Check if this content was already uploaded
461
+ storage_manager = get_storage_manager()
462
+ existing = storage_manager.get_analysis_by_fingerprint(content_fingerprint)
463
+ if existing and existing.cookbook_blob_key:
464
+ # Reuse existing blob key (deduplication)
465
+ return existing.cookbook_blob_key
466
+
467
+ blob_storage = get_blob_storage()
468
+ if not blob_storage:
469
+ return None
470
+
471
+ # Generate blob key
472
+ blob_key = f"cookbooks/{cookbook_name}/{archive_path.name}"
473
+
474
+ # Upload archive
475
+ blob_storage.upload(archive_path, blob_key)
476
+
477
+ return blob_key
478
+ except Exception as e:
479
+ st.warning(f"Failed to upload cookbook archive to blob storage: {e}")
480
+ return None
323
481
 
324
482
 
325
483
  def _extract_archive_by_type(
@@ -802,9 +960,13 @@ def _show_analysis_input() -> None:
802
960
  st.session_state.analysis_results = None
803
961
  st.session_state.holistic_assessment = None
804
962
 
805
- temp_dir, cookbook_path = extract_archive(uploaded_file)
963
+ temp_dir, cookbook_path, archive_path = extract_archive(
964
+ uploaded_file
965
+ )
806
966
  # Store temp_dir in session state to prevent premature cleanup
807
967
  st.session_state.temp_dir = temp_dir
968
+ # Store archive_path for later upload to blob storage
969
+ st.session_state.archive_path = archive_path
808
970
  st.success("Archive extracted successfully to temporary location")
809
971
  except (OSError, zipfile.BadZipFile, tarfile.TarError) as e:
810
972
  st.error(f"Failed to extract archive: {e}")
@@ -1326,6 +1488,8 @@ def _analyze_with_ai(
1326
1488
  st.info(f"Detected {len(cookbook_data)} cookbook(s) with {total_recipes} recipe(s)")
1327
1489
 
1328
1490
  results = []
1491
+ cached_count = 0
1492
+
1329
1493
  for i, cb_data in enumerate(cookbook_data):
1330
1494
  # Count recipes in this cookbook
1331
1495
  recipe_count = _safe_count_recipes(cb_data["Path"])
@@ -1336,6 +1500,14 @@ def _analyze_with_ai(
1336
1500
  )
1337
1501
  progress_bar.progress((i + 1) / len(cookbook_data))
1338
1502
 
1503
+ # Check cache first
1504
+ cached_result = _check_analysis_cache(cb_data["Path"], provider, model)
1505
+ if cached_result:
1506
+ st.info(f"Using cached analysis for {cb_data['Name']}")
1507
+ results.append(cached_result)
1508
+ cached_count += 1
1509
+ continue
1510
+
1339
1511
  assessment = assess_single_cookbook_with_ai(
1340
1512
  cb_data["Path"],
1341
1513
  ai_provider=provider,
@@ -1348,8 +1520,17 @@ def _analyze_with_ai(
1348
1520
  )
1349
1521
 
1350
1522
  result = _build_cookbook_result(cb_data, assessment, ANALYSIS_STATUS_ANALYSED)
1523
+
1524
+ # Save to database
1525
+ analysis_id = _save_analysis_to_db(result, provider, model)
1526
+ if analysis_id:
1527
+ result["analysis_id"] = analysis_id
1528
+
1351
1529
  results.append(result)
1352
1530
 
1531
+ if cached_count > 0:
1532
+ st.success(f"Retrieved {cached_count} cached analysis result(s)")
1533
+
1353
1534
  return results
1354
1535
 
1355
1536
 
@@ -1378,6 +1559,11 @@ def _analyze_rule_based(
1378
1559
  return [], {}
1379
1560
 
1380
1561
  results = _process_cookbook_assessments(assessment_result, cookbook_data)
1562
+
1563
+ # Save results to database
1564
+ for result in results:
1565
+ _save_analysis_to_db(result, ai_provider=None, ai_model="rule-based")
1566
+
1381
1567
  return results, assessment_result
1382
1568
 
1383
1569
 
@@ -1586,6 +1772,14 @@ def _convert_all_cookbooks_holistically(cookbook_path: str):
1586
1772
  "output_path": str(output_dir),
1587
1773
  }
1588
1774
 
1775
+ # Save conversion to storage with tar archives
1776
+ _save_conversion_to_storage(
1777
+ cookbook_name=Path(cookbook_path).name,
1778
+ output_path=output_dir,
1779
+ conversion_result=conversion_result,
1780
+ output_type="role",
1781
+ )
1782
+
1589
1783
  progress_bar.progress(1.0)
1590
1784
  status_text.text("Holistic conversion completed!")
1591
1785
  st.success("Holistically converted all cookbooks to Ansible roles!")
@@ -1607,6 +1801,89 @@ def _convert_all_cookbooks_holistically(cookbook_path: str):
1607
1801
  status_text.empty()
1608
1802
 
1609
1803
 
1804
+ def _save_conversion_to_storage(
1805
+ cookbook_name: str,
1806
+ output_path: Path,
1807
+ conversion_result: str,
1808
+ output_type: str,
1809
+ ) -> None:
1810
+ """
1811
+ Save conversion artefacts to blob storage and database.
1812
+
1813
+ Creates tar archives of roles and repository (if exists), uploads them
1814
+ to blob storage, and saves conversion record to database.
1815
+
1816
+ Args:
1817
+ cookbook_name: Name of the cookbook that was converted.
1818
+ output_path: Path to the directory containing converted roles.
1819
+ conversion_result: Text result from conversion for parsing.
1820
+ output_type: Type of output ('role', 'playbook', 'collection').
1821
+
1822
+ """
1823
+ try:
1824
+ from datetime import datetime
1825
+
1826
+ storage_manager = get_storage_manager()
1827
+ blob_storage = get_blob_storage()
1828
+
1829
+ # Parse conversion result to get metrics
1830
+ parsed_result = _parse_conversion_result_text(conversion_result)
1831
+ files_generated = parsed_result.get("summary", {}).get(
1832
+ "total_converted_files", 0
1833
+ )
1834
+ status = "success" if files_generated > 0 else "failed"
1835
+
1836
+ # Create timestamp for unique storage keys
1837
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1838
+
1839
+ # Upload roles directory to blob storage
1840
+ roles_storage_key = f"conversions/{cookbook_name}/roles_{timestamp}"
1841
+ blob_key_roles = blob_storage.upload(output_path, roles_storage_key)
1842
+
1843
+ # Check if repository exists in session state and upload it too
1844
+ repo_storage_key = None
1845
+ if "generated_repo" in st.session_state:
1846
+ repo_result = st.session_state.generated_repo
1847
+ repo_path = Path(repo_result["temp_path"])
1848
+ if repo_path.exists():
1849
+ repo_storage_key = f"conversions/{cookbook_name}/repo_{timestamp}"
1850
+ blob_storage.upload(repo_path, repo_storage_key)
1851
+
1852
+ # Prepare conversion data
1853
+ conversion_data = {
1854
+ "parsed_result": parsed_result,
1855
+ "roles_blob_key": blob_key_roles,
1856
+ "repo_blob_key": repo_storage_key,
1857
+ "timestamp": timestamp,
1858
+ }
1859
+
1860
+ # Get analysis_id if available from session state
1861
+ analysis_id = None
1862
+ if "holistic_assessment" in st.session_state:
1863
+ assessment = st.session_state.holistic_assessment
1864
+ if isinstance(assessment, dict) and "analysis_id" in assessment:
1865
+ analysis_id = assessment["analysis_id"]
1866
+
1867
+ # Save conversion to database
1868
+ conversion_id = storage_manager.save_conversion(
1869
+ cookbook_name=cookbook_name,
1870
+ output_type=output_type,
1871
+ status=status,
1872
+ files_generated=files_generated,
1873
+ conversion_data=conversion_data,
1874
+ analysis_id=analysis_id,
1875
+ blob_storage_key=blob_key_roles,
1876
+ )
1877
+
1878
+ if conversion_id:
1879
+ # Store conversion_id in session for reference
1880
+ st.session_state.last_conversion_id = conversion_id
1881
+
1882
+ except Exception as e:
1883
+ # Non-fatal: log but don't fail the conversion display
1884
+ st.warning(f"Failed to save conversion to storage: {e}")
1885
+
1886
+
1610
1887
  def _parse_conversion_result_text(result_text: str) -> dict:
1611
1888
  """Parse the conversion result text to extract structured data."""
1612
1889
  structured: dict[str, Any] = {
@@ -2122,6 +2399,70 @@ def _display_conversion_download_options(conversion_result: dict):
2122
2399
  st.warning("Output directory not found for download")
2123
2400
 
2124
2401
 
2402
+ def _upload_repository_to_storage(repo_result: dict, roles_path: Path) -> None:
2403
+ """
2404
+ Upload generated repository to blob storage and update conversion record.
2405
+
2406
+ Args:
2407
+ repo_result: Repository generation result dictionary.
2408
+ roles_path: Path to the roles directory that was used to create the repository.
2409
+
2410
+ """
2411
+ try:
2412
+ from datetime import datetime
2413
+
2414
+ # Only proceed if we have a saved conversion to update
2415
+ if "last_conversion_id" not in st.session_state:
2416
+ return
2417
+
2418
+ storage_manager = get_storage_manager()
2419
+ blob_storage = get_blob_storage()
2420
+
2421
+ # Upload repository to blob storage
2422
+ repo_path = Path(repo_result["temp_path"])
2423
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
2424
+ cookbook_name = roles_path.name or "cookbook"
2425
+ repo_storage_key = f"conversions/{cookbook_name}/repo_{timestamp}"
2426
+
2427
+ blob_key_repo = blob_storage.upload(repo_path, repo_storage_key)
2428
+
2429
+ # Update conversion record with repository blob key
2430
+ conversion_id = st.session_state.last_conversion_id
2431
+
2432
+ # Get the existing conversion to update its data
2433
+ conversions = storage_manager.get_conversion_history(limit=100)
2434
+ existing = next((c for c in conversions if c.id == conversion_id), None)
2435
+
2436
+ if existing:
2437
+ # Parse and update conversion data
2438
+ try:
2439
+ conversion_data = json.loads(existing.conversion_data)
2440
+ conversion_data["repo_blob_key"] = blob_key_repo
2441
+ conversion_data["repo_timestamp"] = timestamp
2442
+
2443
+ # Update the conversion record (re-save with same ID)
2444
+ # Note: This creates a new record. For true update, we'd
2445
+ # need an update method. For now, store in session state
2446
+ # for the download.
2447
+ st.session_state.repo_blob_key = blob_key_repo
2448
+
2449
+ st.success("✅ Repository uploaded to storage for future retrieval")
2450
+ except json.JSONDecodeError:
2451
+ # If existing conversion data is corrupt or unparsable, still
2452
+ # expose the repository for this session but warn that it
2453
+ # could not be associated with the saved conversion record.
2454
+ st.session_state.repo_blob_key = blob_key_repo
2455
+ st.warning(
2456
+ "Repository uploaded, but existing conversion data could not "
2457
+ "be parsed. The repository download link will only be "
2458
+ "available for this session."
2459
+ )
2460
+
2461
+ except Exception as e:
2462
+ # Non-fatal: just log warning
2463
+ st.warning(f"Could not upload repository to storage: {e}")
2464
+
2465
+
2125
2466
  def _create_repo_callback(safe_output_path: Path) -> None:
2126
2467
  """Handle repository creation callback."""
2127
2468
  try:
@@ -2143,6 +2484,9 @@ def _create_repo_callback(safe_output_path: Path) -> None:
2143
2484
  st.session_state.generated_repo = repo_result
2144
2485
  st.session_state.repo_created_successfully = True
2145
2486
  st.session_state.repo_creation_error = None
2487
+
2488
+ # Upload repository to blob storage if conversion was saved
2489
+ _upload_repository_to_storage(repo_result, safe_output_path)
2146
2490
  else:
2147
2491
  _handle_repo_creation_failure(repo_result.get("error", "Unknown error"))
2148
2492
  except Exception as e:
@@ -2334,9 +2678,11 @@ def _handle_dashboard_upload():
2334
2678
  # Process the file
2335
2679
  try:
2336
2680
  with st.spinner("Extracting archive..."):
2337
- temp_dir, cookbook_path = extract_archive(mock_file)
2681
+ temp_dir, cookbook_path, archive_path = extract_archive(mock_file)
2338
2682
  # Store temp_dir in session state to prevent premature cleanup
2339
2683
  st.session_state.temp_dir = temp_dir
2684
+ # Store archive_path for later upload to blob storage
2685
+ st.session_state.archive_path = archive_path
2340
2686
  st.success("Archive extracted successfully!")
2341
2687
 
2342
2688
  # Validate and list cookbooks