mcp-souschef 3.0.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
souschef/assessment.py CHANGED
@@ -6,11 +6,17 @@ generating migration plans, analyzing dependencies, and validating conversions.
6
6
  """
7
7
 
8
8
  import json
9
+ import os
9
10
  import re
10
11
  from pathlib import Path
11
12
  from typing import Any
12
13
 
13
- from souschef.core import METADATA_FILENAME, _normalize_path, _safe_join
14
+ from souschef.core import (
15
+ METADATA_FILENAME,
16
+ _ensure_within_base_path,
17
+ _normalize_path,
18
+ _safe_join,
19
+ )
14
20
  from souschef.core.errors import format_error_with_context
15
21
  from souschef.core.metrics import (
16
22
  ComplexityLevel,
@@ -18,6 +24,8 @@ from souschef.core.metrics import (
18
24
  categorize_complexity,
19
25
  estimate_effort_for_complexity,
20
26
  )
27
+ from souschef.core.path_utils import _validated_candidate, safe_glob
28
+ from souschef.core.url_validation import validate_user_provided_url
21
29
  from souschef.core.validation import (
22
30
  ValidationEngine,
23
31
  ValidationLevel,
@@ -27,18 +35,36 @@ from souschef.core.validation import (
27
35
 
28
36
  # Optional AI provider imports
29
37
  try:
30
- import requests # type: ignore[import-untyped]
38
+ import requests
31
39
  except ImportError:
32
- requests = None
40
+ requests = None # type: ignore[assignment]
33
41
 
34
42
  try:
35
- from ibm_watsonx_ai import ( # type: ignore[import-not-found]
36
- APIClient,
37
- )
43
+ from ibm_watsonx_ai import APIClient # type: ignore[import-not-found]
38
44
  except ImportError:
39
45
  APIClient = None
40
46
 
41
47
 
48
+ def _normalize_cookbook_root(cookbook_path: Path | str) -> Path:
49
+ """
50
+ Normalise cookbook paths.
51
+
52
+ Args:
53
+ cookbook_path: User-provided cookbook path.
54
+
55
+ Returns:
56
+ A resolved Path.
57
+
58
+ Raises:
59
+ ValueError: If the path cannot be normalised.
60
+
61
+ """
62
+ # Normalise the path (resolves symlinks, expands ~, etc.)
63
+ # Safety for accessing files within this cookbook is enforced per-operation
64
+ # using _ensure_within_base_path with the cookbook dir as the base
65
+ return _normalize_path(cookbook_path)
66
+
67
+
42
68
  # Optimised patterns to avoid catastrophic backtracking in resource parsing
43
69
  RESOURCE_BLOCK_PATTERN = re.compile(r"\w{1,100}\s+['\"]([^'\"\r\n]{0,200})['\"]\s+do")
44
70
 
@@ -227,7 +253,6 @@ def _parse_and_assess_cookbooks(cookbook_paths: str) -> tuple[list, str | None]:
227
253
 
228
254
  cookbook_assessments = []
229
255
  for cookbook_path in valid_paths:
230
- # deepcode ignore PT: path normalized via _normalize_path
231
256
  assessment = _assess_single_cookbook(cookbook_path)
232
257
  cookbook_assessments.append(assessment)
233
258
 
@@ -330,7 +355,8 @@ def analyse_cookbook_dependencies(
330
355
  dependency_depth: Analysis depth (direct, transitive, full)
331
356
 
332
357
  Returns:
333
- Dependency analysis with migration order recommendations
358
+ Dependency analysis with migration order recommendations.
359
+
334
360
 
335
361
  """
336
362
  try:
@@ -342,15 +368,21 @@ def analyse_cookbook_dependencies(
342
368
  f"Suggestion: Use one of {', '.join(valid_depths)}"
343
369
  )
344
370
 
345
- cookbook_path_obj = _normalize_path(cookbook_path)
346
- if not cookbook_path_obj.exists():
371
+ # Validate and normalise user-provided path
372
+ # Containment is enforced at filesystem operation level
373
+ try:
374
+ normalized_input: Path = _normalize_path(cookbook_path)
375
+ except (ValueError, OSError) as e:
376
+ return f"Error: Invalid cookbook path '{cookbook_path}': {e}"
377
+
378
+ if not normalized_input.exists():
347
379
  return (
348
380
  f"Error: Cookbook path not found: {cookbook_path}\n\n"
349
381
  "Suggestion: Check that the path exists and points to a cookbook directory"
350
382
  )
351
383
 
352
- # Analyze dependencies
353
- dependency_analysis = _analyse_cookbook_dependencies_detailed(cookbook_path_obj)
384
+ # Analyze dependencies using normalized path
385
+ dependency_analysis = _analyse_cookbook_dependencies_detailed(normalized_input)
354
386
 
355
387
  # Determine migration order
356
388
  migration_order = _determine_migration_order(dependency_analysis)
@@ -359,7 +391,7 @@ def analyse_cookbook_dependencies(
359
391
  circular_deps = _identify_circular_dependencies(dependency_analysis)
360
392
 
361
393
  return f"""# Cookbook Dependency Analysis
362
- # Cookbook: {cookbook_path_obj.name}
394
+ # Cookbook: {normalized_input.name}
363
395
  # Analysis Depth: {dependency_depth}
364
396
 
365
397
  ## Dependency Overview:
@@ -559,7 +591,9 @@ def _parse_cookbook_paths(cookbook_paths: str) -> list[Any]:
559
591
  List of valid Path objects (may be empty)
560
592
 
561
593
  """
562
- paths = [_normalize_path(path.strip()) for path in cookbook_paths.split(",")]
594
+ paths = [
595
+ _normalize_cookbook_root(path.strip()) for path in cookbook_paths.split(",")
596
+ ]
563
597
  valid_paths = [p for p in paths if p.exists()]
564
598
  return valid_paths
565
599
 
@@ -587,7 +621,6 @@ def _analyse_cookbook_metrics(
587
621
  }
588
622
 
589
623
  for cookbook_path in valid_paths:
590
- # deepcode ignore PT: path normalized via _normalize_path
591
624
  assessment = _assess_single_cookbook(cookbook_path)
592
625
  cookbook_assessments.append(assessment)
593
626
 
@@ -657,35 +690,70 @@ def _format_assessment_report(
657
690
  """
658
691
 
659
692
 
660
- def _count_cookbook_artifacts(cookbook_path: Path) -> dict[str, int]:
693
+ def _count_cookbook_artifacts(cookbook_path: Path) -> dict[str, int]: # noqa: C901
661
694
  """Count comprehensive cookbook artifacts including all Chef components."""
662
- # deepcode ignore PT: path normalized via _normalize_path in caller
663
- cookbook_path = (
664
- Path(cookbook_path) if not isinstance(cookbook_path, Path) else cookbook_path
665
- )
695
+ # Note: cookbook_path is expected to be pre-validated
696
+ base = cookbook_path
697
+
698
+ # Helper function to safely glob within a directory
699
+ def _glob_safe(directory: Path, pattern: str) -> int:
700
+ """Count files matching a glob pattern within a directory."""
701
+ if not directory.exists() or not directory.is_dir():
702
+ return 0
703
+ try:
704
+ return len(list(directory.glob(pattern)))
705
+ except (OSError, ValueError):
706
+ return 0
666
707
 
667
- # Basic directory counts
668
- # cookbook_path already normalized by caller
669
- recipes_dir = (
670
- cookbook_path / "recipes"
671
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
672
- recipe_count = len(list(recipes_dir.glob("*.rb"))) if recipes_dir.exists() else 0
673
-
674
- templates_dir = (
675
- cookbook_path / "templates"
676
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
677
- template_count = (
678
- len(list(templates_dir.glob("**/*.erb"))) if templates_dir.exists() else 0
679
- )
708
+ # Helper function to check existence safely
709
+ def _exists_safe(path: Path) -> bool:
710
+ """Check if a path exists."""
711
+ try:
712
+ return path.exists()
713
+ except (OSError, ValueError):
714
+ return False
680
715
 
681
- files_dir = cookbook_path / "files"
682
- file_count = len(list(files_dir.glob("**/*"))) if files_dir.exists() else 0
716
+ # All paths are safe-joined to the validated base
717
+ recipes_dir: Path = _safe_join(base, "recipes")
718
+ recipe_count: int = _glob_safe(recipes_dir, "*.rb")
683
719
 
684
- # Additional Chef components
685
- attributes_dir = cookbook_path / "attributes"
686
- attributes_count = (
687
- len(list(attributes_dir.glob("*.rb"))) if attributes_dir.exists() else 0
688
- )
720
+ templates_dir: Path = _safe_join(base, "templates")
721
+ template_count: int = _glob_safe(templates_dir, "**/*.erb")
722
+
723
+ files_dir: Path = _safe_join(base, "files")
724
+ file_count: int = _glob_safe(files_dir, "**/*")
725
+
726
+ attributes_dir: Path = _safe_join(base, "attributes")
727
+ attributes_count: int = _glob_safe(attributes_dir, "*.rb")
728
+
729
+ libraries_dir: Path = _safe_join(base, "libraries")
730
+ libraries_count: int = _glob_safe(libraries_dir, "*.rb")
731
+
732
+ definitions_dir: Path = _safe_join(base, "definitions")
733
+ definitions_count: int = _glob_safe(definitions_dir, "*.rb")
734
+
735
+ resources_dir: Path = _safe_join(base, "resources")
736
+ resources_count: int = _glob_safe(resources_dir, "*.rb")
737
+
738
+ providers_dir: Path = _safe_join(base, "providers")
739
+ providers_count: int = _glob_safe(providers_dir, "*.rb")
740
+
741
+ berksfile: Path = _safe_join(base, "Berksfile")
742
+ has_berksfile: bool = _exists_safe(berksfile)
743
+
744
+ chefignore: Path = _safe_join(base, "chefignore")
745
+ has_chefignore: bool = _exists_safe(chefignore)
746
+
747
+ thorfile: Path = _safe_join(base, "Thorfile")
748
+ has_thorfile: bool = _exists_safe(thorfile)
749
+
750
+ kitchen_yml: Path = _safe_join(base, ".kitchen.yml")
751
+ kitchen_yaml: Path = _safe_join(base, "kitchen.yml")
752
+ has_kitchen_yml: bool = _exists_safe(kitchen_yml) or _exists_safe(kitchen_yaml)
753
+
754
+ test_dir: Path = _safe_join(base, "test")
755
+ spec_dir: Path = _safe_join(base, "spec")
756
+ has_test_dir: bool = _exists_safe(test_dir) or _exists_safe(spec_dir)
689
757
 
690
758
  libraries_dir = cookbook_path / "libraries"
691
759
  libraries_count = (
@@ -737,10 +805,8 @@ def _count_cookbook_artifacts(cookbook_path: Path) -> dict[str, int]:
737
805
 
738
806
  def _analyse_recipe_complexity(cookbook_path: Path) -> dict[str, int]:
739
807
  """Analyse recipe files and other cookbook components for resource counts, Ruby blocks, and custom resources."""
740
- # deepcode ignore PT: path normalized via _normalize_path in caller
741
- cookbook_path = (
742
- Path(cookbook_path) if not isinstance(cookbook_path, Path) else cookbook_path
743
- )
808
+ # Note: cookbook_path is expected to be pre-validated at function entry points
809
+ # Do not call _normalize_cookbook_root here as it's already a validated Path
744
810
 
745
811
  resource_count = 0
746
812
  custom_resources = 0
@@ -774,56 +840,75 @@ def _analyze_recipes(cookbook_path: Path) -> tuple[int, int, int]:
774
840
  ruby_blocks = 0
775
841
  custom_resources = 0
776
842
 
777
- # cookbook_path already normalized by caller
778
- recipes_dir = (
779
- cookbook_path / "recipes"
780
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
781
- if recipes_dir.exists():
782
- for recipe_file in recipes_dir.glob("*.rb"):
783
- try:
784
- content = recipe_file.read_text(encoding="utf-8", errors="ignore")
785
- # Count Chef resources
786
- resources = len(RESOURCE_BLOCK_PATTERN.findall(content))
787
- ruby_blocks += len(
788
- re.findall(
789
- r"ruby_block|execute|bash|script", content, re.IGNORECASE
790
- )
791
- )
792
- custom_resources += len(
793
- re.findall(
794
- r"custom_resource|provides|use_inline_resources|lwrp_resource",
795
- content,
796
- )
843
+ # Note: cookbook_path is expected to be pre-validated
844
+ # Use it directly with _safe_join to access recipes directory
845
+ recipes_dir: Path = _safe_join(cookbook_path, "recipes")
846
+ try:
847
+ recipe_files: list[Path] = (
848
+ list(recipes_dir.glob("*.rb")) if recipes_dir.exists() else []
849
+ )
850
+ except (OSError, ValueError):
851
+ recipe_files = []
852
+
853
+ for recipe_file in recipe_files:
854
+ try:
855
+ # Validate each glob result
856
+ validated_file: Path = _validated_candidate(recipe_file, cookbook_path)
857
+ except ValueError:
858
+ continue
859
+ try:
860
+ content = validated_file.read_text(encoding="utf-8", errors="ignore")
861
+ resources = len(RESOURCE_BLOCK_PATTERN.findall(content))
862
+ ruby_blocks += len(
863
+ re.findall(r"ruby_block|execute|bash|script", content, re.IGNORECASE)
864
+ )
865
+ custom_resources += len(
866
+ re.findall(
867
+ r"custom_resource|provides|use_inline_resources|lwrp_resource",
868
+ content,
797
869
  )
798
- resource_count += resources
799
- except Exception:
800
- continue
870
+ )
871
+ resource_count += resources
872
+ except Exception:
873
+ continue
801
874
 
802
875
  return resource_count, ruby_blocks, custom_resources
803
876
 
804
877
 
805
878
  def _analyze_attributes(cookbook_path: Path) -> int:
806
- """Analyze attribute files for complexity."""
879
+ """Analyse attribute files for complexity."""
807
880
  attribute_complexity = 0
808
881
 
809
- attributes_dir = (
810
- cookbook_path / "attributes"
811
- ) # deepcode ignore PT: path normalized via _normalize_path
812
- if attributes_dir.exists():
813
- for attr_file in attributes_dir.glob("*.rb"):
814
- try:
815
- content = attr_file.read_text(encoding="utf-8", errors="ignore")
816
- # Count attribute assignments and complex expressions
817
- # Use simpler regex patterns to avoid ReDoS vulnerabilities
818
- assignments = len(
819
- re.findall(r"^\s*\w+\s*(?:\[\w*\])?\s*=", content, re.MULTILINE)
820
- )
821
- complex_expressions = len(
822
- re.findall(r"(?:node|default|override)\[", content)
882
+ # Note: cookbook_path is expected to be pre-validated
883
+ attributes_dir: Path = _safe_join(cookbook_path, "attributes")
884
+ try:
885
+ attr_files: list[Path] = (
886
+ list(attributes_dir.glob("*.rb")) if attributes_dir.exists() else []
887
+ )
888
+ except (OSError, ValueError):
889
+ attr_files = []
890
+
891
+ for attr_file in attr_files:
892
+ try:
893
+ # Validate each glob result
894
+ validated_file: Path = _validated_candidate(attr_file, cookbook_path)
895
+ except ValueError:
896
+ continue
897
+ try:
898
+ content = validated_file.read_text(encoding="utf-8", errors="ignore")
899
+ assignments = len(
900
+ re.findall(
901
+ r"^[ \t]{0,20}\w+[ \t]{0,10}(?:\[\w*\])?[ \t]{0,10}=",
902
+ content,
903
+ re.MULTILINE,
823
904
  )
824
- attribute_complexity += assignments + complex_expressions
825
- except Exception:
826
- continue
905
+ )
906
+ complex_expressions = len(
907
+ re.findall(r"(?:node|default|override)\[", content)
908
+ )
909
+ attribute_complexity += assignments + complex_expressions
910
+ except Exception:
911
+ continue
827
912
 
828
913
  return attribute_complexity
829
914
 
@@ -832,77 +917,91 @@ def _analyze_templates(cookbook_path: Path) -> int:
832
917
  """Analyze template files for ERB complexity."""
833
918
  erb_templates = 0
834
919
 
835
- # cookbook_path already normalized by caller
836
- templates_dir = (
837
- cookbook_path / "templates"
838
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
839
- if templates_dir.exists():
840
- for template_file in templates_dir.glob("**/*.erb"):
841
- try:
842
- content = template_file.read_text(encoding="utf-8", errors="ignore")
843
- # Count ERB expressions and complex logic
844
- erb_expressions = len(re.findall(r"<%.*?%>", content))
845
- erb_templates += erb_expressions
846
- except Exception:
847
- continue
920
+ # Note: cookbook_path is expected to be pre-validated
921
+ templates_dir: Path = _safe_join(cookbook_path, "templates")
922
+ try:
923
+ template_files: list[Path] = (
924
+ list(templates_dir.glob("**/*.erb")) if templates_dir.exists() else []
925
+ )
926
+ except (OSError, ValueError):
927
+ template_files = []
928
+
929
+ for template_file in template_files:
930
+ try:
931
+ # Validate each glob result
932
+ validated_file: Path = _validated_candidate(template_file, cookbook_path)
933
+ except ValueError:
934
+ continue
935
+
936
+ try:
937
+ content = validated_file.read_text(encoding="utf-8", errors="ignore")
938
+ erb_expressions = len(re.findall(r"<%.*?%>", content))
939
+ erb_templates += erb_expressions
940
+ except Exception:
941
+ continue
848
942
 
849
943
  return erb_templates
850
944
 
851
945
 
852
946
  def _analyze_libraries(cookbook_path: Path) -> int:
853
- """Analyze library files for complexity."""
947
+ """Analyse library files for complexity."""
854
948
  library_complexity = 0
855
949
 
856
- # cookbook_path already normalized by caller
857
- libraries_dir = (
858
- cookbook_path / "libraries"
859
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
860
- if libraries_dir.exists():
861
- for lib_file in libraries_dir.glob("*.rb"):
862
- try:
863
- content = lib_file.read_text(encoding="utf-8", errors="ignore")
864
- # Count class definitions, methods, and complex Ruby constructs
865
- classes = len(re.findall(r"class\s+\w+", content))
866
- methods = len(re.findall(r"def\s+\w+", content))
867
- library_complexity += classes * 2 + methods
868
- except Exception:
869
- continue
950
+ # Note: cookbook_path is expected to be pre-validated
951
+ libraries_dir: Path = _safe_join(cookbook_path, "libraries")
952
+ try:
953
+ lib_files: list[Path] = (
954
+ safe_glob(libraries_dir, "*.rb", cookbook_path)
955
+ if libraries_dir.exists()
956
+ else []
957
+ )
958
+ except (OSError, ValueError):
959
+ lib_files = []
960
+
961
+ for lib_file in lib_files:
962
+ try:
963
+ # lib_file is already validated by safe_glob
964
+ content = lib_file.read_text(encoding="utf-8", errors="ignore")
965
+ classes = len(re.findall(r"class\s+\w+", content))
966
+ methods = len(re.findall(r"def\s+\w+", content))
967
+ library_complexity += classes * 2 + methods
968
+ except Exception:
969
+ continue
870
970
 
871
971
  return library_complexity
872
972
 
873
973
 
874
974
  def _count_definitions(cookbook_path: Path) -> int:
875
975
  """Count definition files."""
876
- # cookbook_path already normalized by caller
877
- definitions_dir = (
878
- cookbook_path / "definitions"
879
- ) # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected
880
- if definitions_dir.exists():
881
- return len(list(definitions_dir.glob("*.rb")))
882
- return 0
976
+ # Note: cookbook_path is expected to be pre-validated
977
+ definitions_dir: Path = _safe_join(cookbook_path, "definitions")
978
+ try:
979
+ def_files: list[Path] = (
980
+ safe_glob(definitions_dir, "*.rb", cookbook_path)
981
+ if definitions_dir.exists()
982
+ else []
983
+ )
984
+ except (OSError, ValueError):
985
+ def_files = []
986
+ return len(def_files)
883
987
 
884
988
 
885
989
  def _parse_berksfile(cookbook_path: Path) -> dict[str, Any]:
886
990
  """Parse Berksfile for dependency information."""
887
- # deepcode ignore PT: path normalized via _normalize_path in caller
888
- cookbook_path = (
889
- Path(cookbook_path) if not isinstance(cookbook_path, Path) else cookbook_path
890
- )
891
- berksfile = cookbook_path / "Berksfile"
991
+ base = _normalize_cookbook_root(cookbook_path)
992
+ berksfile_path = _safe_join(base, "Berksfile")
892
993
 
893
- if not berksfile.exists():
994
+ if not berksfile_path.exists():
894
995
  return {"dependencies": [], "external_cookbooks": [], "complexity": 0}
895
996
 
896
997
  try:
897
- content = berksfile.read_text(encoding="utf-8", errors="ignore")
998
+ content = berksfile_path.read_text(encoding="utf-8", errors="ignore")
898
999
 
899
- # Parse cookbook dependencies
900
1000
  cookbook_deps = re.findall(r'cookbook\s+[\'"]([^\'"]+)[\'"]', content)
901
1001
  external_deps = re.findall(
902
1002
  r'cookbook\s+[\'"]([^\'"]+)[\'"]\s*,\s*[\'"]([^\'"]+)[\'"]', content
903
1003
  )
904
1004
 
905
- # Count complex dependency specifications (with version constraints, git sources, etc.)
906
1005
  complex_deps = len(re.findall(r'cookbook\s+[\'"]([^\'"]+)[\'"]\s*,', content))
907
1006
  git_sources = len(re.findall(r"git:", content))
908
1007
  path_sources = len(re.findall(r"path:", content))
@@ -920,21 +1019,20 @@ def _parse_berksfile(cookbook_path: Path) -> dict[str, Any]:
920
1019
 
921
1020
  def _parse_chefignore(cookbook_path) -> dict[str, Any]:
922
1021
  """Parse chefignore file for ignore patterns."""
923
- cookbook_path = Path(cookbook_path)
924
- chefignore = cookbook_path / "chefignore"
1022
+ base = _normalize_cookbook_root(cookbook_path)
1023
+ chefignore_path = _ensure_within_base_path(_safe_join(base, "chefignore"), base)
925
1024
 
926
- if not chefignore.exists():
1025
+ if not chefignore_path.exists():
927
1026
  return {"patterns": [], "complexity": 0}
928
1027
 
929
1028
  try:
930
- content = chefignore.read_text(encoding="utf-8", errors="ignore")
1029
+ content = chefignore_path.read_text(encoding="utf-8", errors="ignore")
931
1030
  lines = [
932
1031
  line.strip()
933
1032
  for line in content.split("\n")
934
1033
  if line.strip() and not line.startswith("#")
935
1034
  ]
936
1035
 
937
- # Count complex patterns (wildcards, directories, etc.)
938
1036
  wildcard_patterns = len([p for p in lines if "*" in p or "?" in p])
939
1037
  directory_patterns = len([p for p in lines if p.endswith("/") or "/" in p])
940
1038
 
@@ -950,16 +1048,15 @@ def _parse_chefignore(cookbook_path) -> dict[str, Any]:
950
1048
 
951
1049
  def _parse_thorfile(cookbook_path) -> dict[str, Any]:
952
1050
  """Parse Thorfile for Thor tasks."""
953
- cookbook_path = Path(cookbook_path)
954
- thorfile = cookbook_path / "Thorfile"
1051
+ base = _normalize_cookbook_root(cookbook_path)
1052
+ thorfile_path = _ensure_within_base_path(_safe_join(base, "Thorfile"), base)
955
1053
 
956
- if not thorfile.exists():
1054
+ if not thorfile_path.exists():
957
1055
  return {"tasks": [], "complexity": 0}
958
1056
 
959
1057
  try:
960
- content = thorfile.read_text(encoding="utf-8", errors="ignore")
1058
+ content = thorfile_path.read_text(encoding="utf-8", errors="ignore")
961
1059
 
962
- # Count Thor tasks and methods
963
1060
  tasks = len(re.findall(r'desc\s+[\'"]([^\'"]+)[\'"]', content))
964
1061
  methods = len(re.findall(r"def\s+\w+", content))
965
1062
 
@@ -975,10 +1072,10 @@ def _parse_thorfile(cookbook_path) -> dict[str, Any]:
975
1072
 
976
1073
  def _parse_metadata_file(cookbook_path) -> dict[str, Any]:
977
1074
  """Parse metadata.rb for cookbook information."""
978
- cookbook_path = Path(cookbook_path)
979
- metadata_file = cookbook_path / "metadata.rb"
1075
+ base = _normalize_cookbook_root(cookbook_path)
1076
+ metadata_path = _ensure_within_base_path(_safe_join(base, "metadata.rb"), base)
980
1077
 
981
- if not metadata_file.exists():
1078
+ if not metadata_path.exists():
982
1079
  return {
983
1080
  "name": "",
984
1081
  "version": "",
@@ -988,9 +1085,8 @@ def _parse_metadata_file(cookbook_path) -> dict[str, Any]:
988
1085
  }
989
1086
 
990
1087
  try:
991
- content = metadata_file.read_text(encoding="utf-8", errors="ignore")
1088
+ content = metadata_path.read_text(encoding="utf-8", errors="ignore")
992
1089
 
993
- # Extract basic metadata
994
1090
  name_match = re.search(r'name\s+[\'"]([^\'"]+)[\'"]', content)
995
1091
  version_match = re.search(r'version\s+[\'"]([^\'"]+)[\'"]', content)
996
1092
 
@@ -1074,8 +1170,7 @@ def _determine_migration_priority(complexity_score: int) -> str:
1074
1170
 
1075
1171
  def _assess_single_cookbook(cookbook_path: Path) -> dict:
1076
1172
  """Assess complexity of a single cookbook."""
1077
- # cookbook_path is already normalized to a Path object
1078
- cookbook = cookbook_path
1173
+ cookbook = _normalize_cookbook_root(cookbook_path)
1079
1174
 
1080
1175
  # Collect metrics
1081
1176
  artifact_counts = _count_cookbook_artifacts(cookbook)
@@ -1106,17 +1201,28 @@ def _assess_single_cookbook(cookbook_path: Path) -> dict:
1106
1201
 
1107
1202
 
1108
1203
  def _format_overall_metrics(metrics: dict) -> str:
1109
- """Format overall migration metrics."""
1204
+ """Format overall migration metrics with manual and SousChef-assisted estimates."""
1205
+ effort_metrics = EffortMetrics(metrics["estimated_effort_days"])
1206
+
1110
1207
  return f"""• Total Cookbooks: {metrics["total_cookbooks"]}
1111
1208
  • Total Recipes: {metrics["total_recipes"]}
1112
1209
  • Total Resources: {metrics["total_resources"]}
1113
1210
  • Average Complexity: {metrics.get("avg_complexity", 0):.1f}/100
1114
- • Estimated Total Effort: {metrics["estimated_effort_days"]:.1f} person-days
1115
- Estimated Duration: {EffortMetrics(metrics["estimated_effort_days"]).estimated_weeks_range}"""
1211
+
1212
+ ## Manual Migration Effort (Without SousChef):
1213
+ • Estimated Effort: {metrics["estimated_effort_days"]:.1f} person-days ({effort_metrics.estimated_hours:.0f} hours)
1214
+ • Estimated Duration: {effort_metrics.estimated_weeks_range}
1215
+ • Team Required: {max(2, int(metrics["estimated_effort_days"] / 15))} developers
1216
+
1217
+ ## AI-Assisted Migration (With SousChef):
1218
+ • Estimated Effort: {effort_metrics.estimated_days_with_souschef:.1f} person-days ({effort_metrics.estimated_hours_with_souschef:.0f} hours)
1219
+ • Estimated Duration: {effort_metrics.estimated_weeks_range_with_souschef}
1220
+ • Team Required: {max(1, int(effort_metrics.estimated_days_with_souschef / 15))} developers
1221
+ • **Time Saved: {effort_metrics.time_saved:.1f} days ({effort_metrics.efficiency_gain_percent}% faster)**"""
1116
1222
 
1117
1223
 
1118
1224
  def _format_cookbook_assessments(assessments: list) -> str:
1119
- """Format individual cookbook assessments."""
1225
+ """Format individual cookbook assessments with manual and AI-assisted estimates."""
1120
1226
  if not assessments:
1121
1227
  return "No cookbooks assessed."
1122
1228
 
@@ -1132,13 +1238,14 @@ def _format_cookbook_assessments(assessments: list) -> str:
1132
1238
  formatted = []
1133
1239
  for assessment in assessments:
1134
1240
  priority_icon = _get_priority_icon(assessment["migration_priority"])
1241
+ effort_metrics = EffortMetrics(assessment["estimated_effort_days"])
1242
+
1135
1243
  formatted.append(f"""### {assessment["cookbook_name"]} {priority_icon}
1136
1244
  • Complexity Score: {assessment["complexity_score"]:.1f}/100
1137
- Estimated Effort: {assessment["estimated_effort_days"]} days
1138
- Recipes: {assessment["metrics"]["recipe_count"]}
1139
- Resources: {assessment["metrics"]["resource_count"]}
1140
- Custom Resources: {assessment["metrics"]["custom_resources"]}
1141
- • Challenges: {len(assessment["challenges"])}""")
1245
+ Recipes: {assessment["metrics"]["recipe_count"]} | Resources: {assessment["metrics"]["resource_count"]} | Custom Resources: {assessment["metrics"]["custom_resources"]}
1246
+ Manual Effort: {assessment["estimated_effort_days"]:.1f} days ({effort_metrics.estimated_weeks_range})
1247
+ With SousChef: {effort_metrics.estimated_days_with_souschef:.1f} days ({effort_metrics.estimated_weeks_range_with_souschef}) - Save {effort_metrics.time_saved:.1f} days
1248
+ Migration Challenges: {len(assessment["challenges"])}""")
1142
1249
 
1143
1250
  return "\n\n".join(formatted)
1144
1251
 
@@ -1351,60 +1458,144 @@ def _assess_migration_risks(assessments: list, target_platform: str) -> str:
1351
1458
 
1352
1459
 
1353
1460
  def _estimate_resource_requirements(metrics: dict, target_platform: str) -> str:
1354
- """Estimate resource requirements for migration."""
1461
+ """Estimate resource requirements for migration with and without SousChef."""
1355
1462
  total_effort = metrics["estimated_effort_days"]
1463
+ effort_metrics = EffortMetrics(total_effort)
1356
1464
 
1357
- # Team size recommendations
1465
+ # Manual migration requirements
1358
1466
  if total_effort < 20:
1359
- team_size = "1 developer + 1 reviewer"
1360
- timeline = "4-6 weeks"
1467
+ manual_team = "1 developer + 1 reviewer"
1468
+ manual_timeline = "4-6 weeks"
1361
1469
  elif total_effort < 50:
1362
- team_size = "2 developers + 1 senior reviewer"
1363
- timeline = "6-10 weeks"
1470
+ manual_team = "2 developers + 1 senior reviewer"
1471
+ manual_timeline = "6-10 weeks"
1364
1472
  else:
1365
- team_size = "3-4 developers + 1 tech lead + 1 architect"
1366
- timeline = "10-16 weeks"
1473
+ manual_team = "3-4 developers + 1 tech lead + 1 architect"
1474
+ manual_timeline = "10-16 weeks"
1475
+
1476
+ # SousChef-assisted requirements
1477
+ souschef_effort = effort_metrics.estimated_days_with_souschef
1478
+ if souschef_effort < 20:
1479
+ souschef_team = "1 developer (with SousChef AI assistance)"
1480
+ souschef_timeline = "2-3 weeks"
1481
+ elif souschef_effort < 50:
1482
+ souschef_team = "1-2 developers + 1 reviewer (with SousChef)"
1483
+ souschef_timeline = "3-5 weeks"
1484
+ else:
1485
+ souschef_team = "2-3 developers + 1 tech lead (with SousChef)"
1486
+ souschef_timeline = "5-8 weeks"
1367
1487
 
1368
- return f""" **Team Size:** {team_size}
1369
- • **Estimated Timeline:** {timeline}
1370
- • **Total Effort:** {total_effort:.1f} person-days
1488
+ return f"""## Manual Migration (Without AI Assistance):
1489
+ • **Team Size:** {manual_team}
1490
+ • **Estimated Timeline:** {manual_timeline}
1491
+ • **Total Effort:** {total_effort:.1f} person-days ({effort_metrics.estimated_hours:.0f} hours)
1492
+ • **Infrastructure:** {target_platform.replace("_", "/").upper()} environment
1493
+ • **Testing:** Dedicated test environment required
1494
+ • **Training:** 3-5 days Ansible/AWX training for team
1495
+
1496
+ ## AI-Assisted Migration (With SousChef):
1497
+ • **Team Size:** {souschef_team}
1498
+ • **Estimated Timeline:** {souschef_timeline}
1499
+ • **Total Effort:** {souschef_effort:.1f} person-days ({effort_metrics.estimated_hours_with_souschef:.0f} hours)
1500
+ • **Time Savings:** {effort_metrics.time_saved:.1f} days ({effort_metrics.efficiency_gain_percent}% reduction)
1371
1501
  • **Infrastructure:** {target_platform.replace("_", "/").upper()} environment
1372
- • **Testing:** Dedicated test environment recommended
1373
- • **Training:** 2-3 days Ansible/AWX training for team"""
1502
+ • **Testing:** Automated validation + human review
1503
+ • **Training:** 1-2 days SousChef usage + Ansible basics
1504
+
1505
+ **SousChef Benefits:**
1506
+ • Automated boilerplate conversion (60-70% of work)
1507
+ • Built-in best practices validation
1508
+ • Faster iteration cycles
1509
+ • Consistent output quality
1510
+ • Reduced human error"""
1511
+
1374
1512
 
1513
+ def _analyse_cookbook_dependencies_detailed(cookbook_path: Path | str) -> dict:
1514
+ """
1515
+ Analyse cookbook dependencies in detail.
1516
+
1517
+ Args:
1518
+ cookbook_path: Path to the cookbook (may be string or Path).
1519
+
1520
+ Returns:
1521
+ Dictionary with dependency information.
1522
+
1523
+ Raises:
1524
+ ValueError: If the path is invalid.
1525
+
1526
+ """
1527
+ # Normalize the input path
1528
+ base_path: Path = _normalize_path(cookbook_path)
1529
+
1530
+ # Validate basic accessibility
1531
+ if not base_path.exists():
1532
+ msg = f"Cookbook path does not exist: {cookbook_path}"
1533
+ raise ValueError(msg)
1534
+ if not base_path.is_dir():
1535
+ msg = f"Cookbook path is not a directory: {cookbook_path}"
1536
+ raise ValueError(msg)
1537
+
1538
+ # Collect dependencies from metadata and Berksfile
1539
+ direct_dependencies = _collect_metadata_dependencies(base_path)
1540
+ external_dependencies = _collect_berks_dependencies(base_path)
1541
+ community_cookbooks = _identify_community_cookbooks_from_list(
1542
+ direct_dependencies + external_dependencies
1543
+ )
1375
1544
 
1376
- def _analyse_cookbook_dependencies_detailed(cookbook_path) -> dict:
1377
- """Analyze cookbook dependencies in detail."""
1378
- analysis = {
1379
- "cookbook_name": cookbook_path.name,
1380
- "direct_dependencies": [],
1545
+ return {
1546
+ "cookbook_name": base_path.name,
1547
+ "direct_dependencies": direct_dependencies,
1381
1548
  "transitive_dependencies": [],
1382
- "external_dependencies": [],
1383
- "community_cookbooks": [],
1549
+ "external_dependencies": external_dependencies,
1550
+ "community_cookbooks": community_cookbooks,
1384
1551
  "circular_dependencies": [],
1385
1552
  }
1386
1553
 
1387
- # Read metadata.rb for dependencies
1388
- metadata_file = _safe_join(cookbook_path, METADATA_FILENAME)
1389
- if metadata_file.exists():
1390
- with metadata_file.open("r", encoding="utf-8", errors="ignore") as f:
1391
- content = f.read()
1392
1554
 
1393
- # Parse dependencies
1555
+ def _collect_metadata_dependencies(base_path: Path) -> list[str]:
1556
+ """Collect dependency declarations from metadata.rb with containment checks."""
1557
+ # Build metadata path safely within the cookbook
1558
+ metadata_path: Path = _safe_join(base_path, METADATA_FILENAME)
1394
1559
 
1395
- depends_matches = re.findall(r'depends\s+[\'"]([^\'"]+)[\'"]', content)
1396
- analysis["direct_dependencies"] = depends_matches
1560
+ if not metadata_path.is_file():
1561
+ return []
1562
+
1563
+ try:
1564
+ # Validate metadata_path is within base_path
1565
+ _validated_candidate(metadata_path, base_path)
1566
+ except ValueError:
1567
+ # metadata.rb is outside cookbook root
1568
+ return []
1397
1569
 
1398
- # Read Berksfile for additional dependencies
1399
- berksfile = _safe_join(cookbook_path, "Berksfile")
1400
- if berksfile.exists():
1401
- with berksfile.open("r", encoding="utf-8", errors="ignore") as f:
1402
- content = f.read()
1570
+ with metadata_path.open(encoding="utf-8", errors="ignore") as f:
1571
+ content = f.read()
1403
1572
 
1404
- cookbook_matches = re.findall(r'cookbook\s+[\'"]([^\'"]+)[\'"]', content)
1405
- analysis["external_dependencies"].extend(cookbook_matches)
1573
+ return re.findall(r'depends\s+[\'"]([^\'"]+)[\'"]', content)
1406
1574
 
1407
- # Identify community cookbooks (common ones)
1575
+
1576
+ def _collect_berks_dependencies(base_path: Path) -> list[str]:
1577
+ """Collect dependency declarations from Berksfile with containment checks."""
1578
+ # Build Berksfile path safely within the cookbook
1579
+ berksfile_path: Path = _safe_join(base_path, "Berksfile")
1580
+
1581
+ if not berksfile_path.is_file():
1582
+ return []
1583
+
1584
+ try:
1585
+ # Validate berksfile_path is within base_path
1586
+ _validated_candidate(berksfile_path, base_path)
1587
+ except ValueError:
1588
+ # Berksfile is outside cookbook root
1589
+ return []
1590
+
1591
+ with berksfile_path.open(encoding="utf-8", errors="ignore") as f:
1592
+ content = f.read()
1593
+
1594
+ return re.findall(r'cookbook\s+[\'"]([^\'"]+)[\'"]', content)
1595
+
1596
+
1597
+ def _identify_community_cookbooks_from_list(dependencies: list[str]) -> list[str]:
1598
+ """Return dependencies considered community cookbooks based on patterns."""
1408
1599
  community_cookbook_patterns = [
1409
1600
  "apache2",
1410
1601
  "nginx",
@@ -1421,12 +1612,11 @@ def _analyse_cookbook_dependencies_detailed(cookbook_path) -> dict:
1421
1612
  "users",
1422
1613
  ]
1423
1614
 
1424
- all_deps = analysis["direct_dependencies"] + analysis["external_dependencies"]
1425
- for dep in all_deps:
1426
- if any(pattern in dep.lower() for pattern in community_cookbook_patterns):
1427
- analysis["community_cookbooks"].append(dep)
1428
-
1429
- return analysis
1615
+ return [
1616
+ dep
1617
+ for dep in dependencies
1618
+ if any(pattern in dep.lower() for pattern in community_cookbook_patterns)
1619
+ ]
1430
1620
 
1431
1621
 
1432
1622
  def _determine_migration_order(dependency_analysis: dict) -> list:
@@ -1933,7 +2123,7 @@ def assess_single_cookbook_with_ai(
1933
2123
  """
1934
2124
  try:
1935
2125
  cookbook_path_obj = _normalize_path(cookbook_path)
1936
- if not cookbook_path_obj.exists():
2126
+ if not cookbook_path_obj.exists(): # Read-only check on normalized path
1937
2127
  return {"error": f"Cookbook path not found: {cookbook_path}"}
1938
2128
 
1939
2129
  # Check if AI is available
@@ -2203,8 +2393,7 @@ def _assess_single_cookbook_with_ai(
2203
2393
  base_url: str = "",
2204
2394
  ) -> dict:
2205
2395
  """Assess complexity of a single cookbook using AI analysis."""
2206
- # cookbook_path is already normalized to a Path object
2207
- cookbook = cookbook_path
2396
+ cookbook = _normalize_cookbook_root(cookbook_path)
2208
2397
 
2209
2398
  # Collect basic metrics (same as rule-based)
2210
2399
  artifact_counts = _count_cookbook_artifacts(cookbook)
@@ -2331,10 +2520,15 @@ Provide your analysis in JSON format with keys: complexity_score, estimated_effo
2331
2520
 
2332
2521
  def _get_recipe_content_sample(cookbook_path: Path) -> str:
2333
2522
  """Get a sample of ALL recipe content for AI analysis."""
2334
- recipes_dir = _safe_join(cookbook_path, "recipes")
2335
- if not recipes_dir.exists():
2523
+ # Inline guard directly adjacent to sink
2524
+ base = os.path.realpath(str(cookbook_path)) # noqa: PTH111
2525
+ recipes_dir_str = os.path.realpath(os.path.join(base, "recipes")) # noqa: PTH111, PTH118
2526
+ if os.path.commonpath([base, recipes_dir_str]) != base:
2527
+ raise RuntimeError("Path traversal")
2528
+ if not os.path.exists(recipes_dir_str): # noqa: PTH110
2336
2529
  return "No recipes directory found"
2337
2530
 
2531
+ recipes_dir = Path(recipes_dir_str)
2338
2532
  recipe_files = list(recipes_dir.glob("*.rb"))
2339
2533
  if not recipe_files:
2340
2534
  return "No recipe files found"
@@ -2372,12 +2566,16 @@ def _get_recipe_content_sample(cookbook_path: Path) -> str:
2372
2566
 
2373
2567
  def _get_metadata_content(cookbook_path: Path) -> str:
2374
2568
  """Get metadata content for AI analysis."""
2375
- metadata_file = _safe_join(cookbook_path, METADATA_FILENAME)
2376
- if not metadata_file.exists():
2569
+ # Inline guard directly adjacent to sink
2570
+ base = os.path.realpath(str(cookbook_path)) # noqa: PTH111
2571
+ metadata_file_str = os.path.realpath(os.path.join(base, METADATA_FILENAME)) # noqa: PTH111, PTH118
2572
+ if os.path.commonpath([base, metadata_file_str]) != base:
2573
+ raise RuntimeError("Path traversal")
2574
+ if not os.path.exists(metadata_file_str): # noqa: PTH110
2377
2575
  return "No metadata.rb found"
2378
2576
 
2379
2577
  try:
2380
- return metadata_file.read_text(encoding="utf-8", errors="ignore")
2578
+ return Path(metadata_file_str).read_text(encoding="utf-8", errors="ignore")
2381
2579
  except Exception:
2382
2580
  return "Could not read metadata"
2383
2581
 
@@ -2399,8 +2597,21 @@ def _call_ai_api(
2399
2597
  elif ai_provider == "openai":
2400
2598
  return _call_openai_api(prompt, api_key, model, temperature, max_tokens)
2401
2599
  elif ai_provider == "watson":
2600
+ validated_url = None
2601
+ if base_url:
2602
+ try:
2603
+ validated_url = validate_user_provided_url(base_url)
2604
+ except ValueError:
2605
+ return None
2606
+
2402
2607
  return _call_watson_api(
2403
- prompt, api_key, model, temperature, max_tokens, project_id, base_url
2608
+ prompt,
2609
+ api_key,
2610
+ model,
2611
+ temperature,
2612
+ max_tokens,
2613
+ project_id,
2614
+ validated_url,
2404
2615
  )
2405
2616
  else:
2406
2617
  return None