anyscale 0.26.47__py3-none-any.whl → 0.26.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. anyscale/__init__.py +0 -7
  2. anyscale/_private/anyscale_client/anyscale_client.py +1 -208
  3. anyscale/_private/anyscale_client/common.py +0 -55
  4. anyscale/_private/anyscale_client/fake_anyscale_client.py +19 -46
  5. anyscale/_private/docgen/__main__.py +24 -45
  6. anyscale/_private/docgen/generator.py +32 -16
  7. anyscale/_private/docgen/generator_legacy.py +58 -6
  8. anyscale/_private/docgen/models.md +3 -2
  9. anyscale/_private/workload/workload_config.py +16 -8
  10. anyscale/_private/workload/workload_sdk.py +22 -5
  11. anyscale/client/README.md +4 -1
  12. anyscale/client/openapi_client/__init__.py +2 -1
  13. anyscale/client/openapi_client/api/default_api.py +253 -4
  14. anyscale/client/openapi_client/models/__init__.py +2 -1
  15. anyscale/client/openapi_client/models/{alert_type.py → alert_issue_type.py} +8 -20
  16. anyscale/client/openapi_client/models/baseimagesenum.py +1 -2
  17. anyscale/client/openapi_client/models/cloud.py +31 -3
  18. anyscale/client/openapi_client/models/cloud_deployment.py +30 -3
  19. anyscale/client/openapi_client/models/cloud_with_cloud_resource.py +29 -1
  20. anyscale/client/openapi_client/models/cloud_with_cloud_resource_gcp.py +29 -1
  21. anyscale/client/openapi_client/models/dataset_metrics.py +6 -6
  22. anyscale/client/openapi_client/models/dataset_state.py +2 -1
  23. anyscale/client/openapi_client/models/describe_system_workload_response.py +32 -6
  24. anyscale/client/openapi_client/models/experimental_workspace.py +29 -1
  25. anyscale/client/openapi_client/models/experimental_workspaces_sort_field.py +2 -1
  26. anyscale/client/openapi_client/models/operator_metrics.py +8 -9
  27. anyscale/client/openapi_client/models/operator_status.py +102 -0
  28. anyscale/client/openapi_client/models/organization_usage_alert.py +20 -20
  29. anyscale/client/openapi_client/models/supportedbaseimagesenum.py +1 -2
  30. anyscale/cloud/models.py +330 -0
  31. anyscale/commands/cloud_commands.py +132 -43
  32. anyscale/commands/command_examples.py +54 -134
  33. anyscale/commands/compute_config_commands.py +7 -11
  34. anyscale/compute_config/__init__.py +2 -16
  35. anyscale/compute_config/_private/compute_config_sdk.py +27 -17
  36. anyscale/compute_config/commands.py +14 -44
  37. anyscale/compute_config/models.py +49 -26
  38. anyscale/controllers/cloud_controller.py +289 -171
  39. anyscale/controllers/cloud_file_storage_utils.py +204 -0
  40. anyscale/controllers/kubernetes_verifier.py +1567 -0
  41. anyscale/job/_private/job_sdk.py +17 -8
  42. anyscale/job/models.py +1 -1
  43. anyscale/scripts.py +0 -2
  44. anyscale/sdk/anyscale_client/models/baseimagesenum.py +1 -2
  45. anyscale/sdk/anyscale_client/models/cloud.py +31 -3
  46. anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +1 -2
  47. anyscale/shared_anyscale_utils/utils/id_gen.py +1 -0
  48. anyscale/version.py +1 -1
  49. anyscale/workspace/models.py +14 -7
  50. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/METADATA +1 -1
  51. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/RECORD +56 -70
  52. anyscale/commands/llm/dataset_commands.py +0 -269
  53. anyscale/commands/llm/group.py +0 -15
  54. anyscale/commands/llm/models_commands.py +0 -123
  55. anyscale/controllers/llm/__init__.py +0 -0
  56. anyscale/controllers/llm/models_controller.py +0 -144
  57. anyscale/llm/__init__.py +0 -2
  58. anyscale/llm/dataset/__init__.py +0 -2
  59. anyscale/llm/dataset/_private/__init__.py +0 -0
  60. anyscale/llm/dataset/_private/docs.py +0 -63
  61. anyscale/llm/dataset/_private/models.py +0 -71
  62. anyscale/llm/dataset/_private/sdk.py +0 -147
  63. anyscale/llm/model/__init__.py +0 -2
  64. anyscale/llm/model/_private/models_sdk.py +0 -62
  65. anyscale/llm/model/commands.py +0 -93
  66. anyscale/llm/model/models.py +0 -171
  67. anyscale/llm/model/sdk.py +0 -62
  68. anyscale/llm/sdk.py +0 -27
  69. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/WHEEL +0 -0
  70. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/entry_points.txt +0 -0
  71. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/licenses/LICENSE +0 -0
  72. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/licenses/NOTICE +0 -0
  73. {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/top_level.txt +0 -0
@@ -80,6 +80,7 @@ from anyscale.controllers.cloud_functional_verification_controller import (
80
80
  CloudFunctionalVerificationController,
81
81
  CloudFunctionalVerificationType,
82
82
  )
83
+ from anyscale.controllers.kubernetes_verifier import KubernetesCloudDeploymentVerifier
83
84
  from anyscale.formatters import clouds_formatter
84
85
  from anyscale.job._private.job_sdk import (
85
86
  HA_JOB_STATE_TO_JOB_STATE,
@@ -197,10 +198,11 @@ class CloudController(BaseController):
197
198
  ).result
198
199
  ]
199
200
  else:
200
- clouds = self.api_client.list_clouds_api_v2_clouds_get().results
201
- clouds_output = clouds[:max_items]
201
+ clouds = self.api_client.list_clouds_api_v2_clouds_get(
202
+ count=max_items
203
+ ).results
202
204
  output = clouds_formatter.format_clouds_output(
203
- clouds=clouds_output, json_format=False
205
+ clouds=clouds[:max_items], json_format=False
204
206
  )
205
207
  return str(output)
206
208
 
@@ -1422,6 +1424,30 @@ class CloudController(BaseController):
1422
1424
  cloud_id, CloudProviders.AWS, functions_to_verify, yes,
1423
1425
  )
1424
1426
 
1427
+ def get_cloud_deployment(
1428
+ self, cloud_id: str, cloud_deployment_id: str
1429
+ ) -> CloudDeployment:
1430
+ try:
1431
+ return self.api_client.get_cloud_deployment_api_v2_clouds_cloud_id_deployment_get(
1432
+ cloud_id=cloud_id, cloud_deployment_id=cloud_deployment_id,
1433
+ ).result
1434
+ except Exception as e: # noqa: BLE001
1435
+ raise ClickException(
1436
+ f"Failed to get cloud deployment {cloud_deployment_id} for cloud {cloud_id}. Error: {e}"
1437
+ )
1438
+
1439
+ # Avoid displaying fields with empty values (since the values for optional fields default to None).
1440
+ def _remove_empty_values(self, d):
1441
+ if isinstance(d, dict):
1442
+ return {
1443
+ k: self._remove_empty_values(v)
1444
+ for k, v in d.items()
1445
+ if self._remove_empty_values(v)
1446
+ }
1447
+ if isinstance(d, list):
1448
+ return [self._remove_empty_values(v) for v in d]
1449
+ return d
1450
+
1425
1451
  def get_cloud_deployments(self, cloud_id: str) -> Dict[str, Any]:
1426
1452
  cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(
1427
1453
  cloud_id=cloud_id,
@@ -1441,24 +1467,38 @@ class CloudController(BaseController):
1441
1467
  f"Failed to get cloud deployments for cloud {cloud.name} ({cloud_id}). Error: {e}"
1442
1468
  )
1443
1469
 
1444
- # Avoid displaying fields with empty values (since the values for optional fields default to None).
1445
- def remove_empty_values(d):
1446
- if isinstance(d, dict):
1447
- return {
1448
- k: remove_empty_values(v)
1449
- for k, v in d.items()
1450
- if remove_empty_values(v)
1451
- }
1452
- return d
1453
-
1454
1470
  return {
1455
1471
  "id": cloud_id,
1456
1472
  "name": cloud.name,
1457
1473
  "deployments": [
1458
- remove_empty_values(deployment.to_dict()) for deployment in deployments
1474
+ self._remove_empty_values(deployment.to_dict())
1475
+ for deployment in deployments
1459
1476
  ],
1460
1477
  }
1461
1478
 
1479
+ def get_cloud_deployment_dict_by_name(
1480
+ self, cloud_name: str, cloud_deployment_name: Optional[str]
1481
+ ) -> Dict[str, Any]:
1482
+ cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
1483
+
1484
+ result = self.get_cloud_deployments(cloud_id)
1485
+ deployments = result.get("deployments", [])
1486
+ if len(deployments) == 0:
1487
+ raise ClickException(f"Cloud {cloud_name} has no cloud deployments.")
1488
+
1489
+ if cloud_deployment_name is None:
1490
+ if len(deployments) > 1:
1491
+ self.log.warning(
1492
+ f"Cloud {cloud_name} has {len(deployments)} deployments, only the primary deployment will be returned."
1493
+ )
1494
+ return deployments[0]
1495
+
1496
+ for deployment in deployments:
1497
+ if deployment.get("name") == cloud_deployment_name:
1498
+ return deployment
1499
+
1500
+ raise ClickException(f"Cloud deployment {cloud_deployment_name} not found.")
1501
+
1462
1502
  def update_aws_anyscale_iam_role(
1463
1503
  self,
1464
1504
  cloud_id: str,
@@ -1559,9 +1599,11 @@ class CloudController(BaseController):
1559
1599
  )
1560
1600
 
1561
1601
  # Get EFS mount target IP.
1562
- if deployment.file_storage:
1602
+ if (
1603
+ deployment.file_storage
1604
+ and FileStorage(**deployment.file_storage).file_storage_id
1605
+ ):
1563
1606
  file_storage = FileStorage(**deployment.file_storage)
1564
- assert file_storage.file_storage_id
1565
1607
 
1566
1608
  try:
1567
1609
  boto3_session = boto3.Session(region_name=deployment.region)
@@ -1668,7 +1710,7 @@ class CloudController(BaseController):
1668
1710
 
1669
1711
  deployment.gcp_config = gcp_config
1670
1712
 
1671
- def add_cloud_deployment(
1713
+ def create_cloud_deployment(
1672
1714
  self,
1673
1715
  cloud_name: str,
1674
1716
  spec_file: str,
@@ -1730,11 +1772,15 @@ class CloudController(BaseController):
1730
1772
  raise ClickException(f"Failed to add cloud deployment: {e}")
1731
1773
 
1732
1774
  self.log.info(
1733
- f"Successfully added deployment{' ' + new_deployment.name if new_deployment.name else ''} to cloud {existing_spec['name']}!"
1775
+ f"Successfully created cloud deployment{' ' + new_deployment.name if new_deployment.name else ''} in cloud {existing_spec['name']}!"
1734
1776
  )
1735
1777
 
1736
- def update_cloud_deployments( # noqa: PLR0912, C901
1737
- self, spec_file: str, skip_verification: bool = False, yes: bool = False,
1778
+ def update_cloud_deployment( # noqa: PLR0912
1779
+ self,
1780
+ cloud: str,
1781
+ spec_file: str,
1782
+ skip_verification: bool = False,
1783
+ yes: bool = False,
1738
1784
  ):
1739
1785
  # Read the spec file.
1740
1786
  path = pathlib.Path(spec_file)
@@ -1744,76 +1790,70 @@ class CloudController(BaseController):
1744
1790
  raise ClickException(f"{spec_file} is not a file.")
1745
1791
 
1746
1792
  spec = yaml.safe_load(path.read_text())
1747
- if not all(k in spec for k in ["id", "name", "deployments"]):
1793
+ try:
1794
+ updated_deployment = CloudDeployment(**spec)
1795
+ except Exception as e: # noqa: BLE001
1796
+ raise ClickException(f"Failed to parse cloud deployment: {e}")
1797
+
1798
+ if not updated_deployment.cloud_deployment_id:
1748
1799
  raise ClickException(
1749
- "Cloud ID, name, and deployments must be specified in the spec file."
1800
+ "The cloud deployment must include a cloud_deployment_id."
1750
1801
  )
1751
1802
 
1752
- # Get the existing spec.
1753
- existing_spec = self.get_cloud_deployments(cloud_id=spec["id"],)
1754
- if existing_spec["name"] != spec["name"]:
1755
- raise ClickException("Changing the name of a cloud is not supported.")
1803
+ # Get the existing cloud deployment.
1804
+ cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud)
1805
+ existing_deployment = self.get_cloud_deployment(
1806
+ cloud_id=cloud_id,
1807
+ cloud_deployment_id=updated_deployment.cloud_deployment_id,
1808
+ )
1809
+ if (
1810
+ updated_deployment.provider == CloudProviders.PCP
1811
+ or existing_deployment.provider == CloudProviders.PCP
1812
+ ):
1813
+ raise ClickException(
1814
+ "Please use the `anyscale machine-pool` CLI to update machine pools."
1815
+ )
1756
1816
 
1757
- # Diff the existing and new specs
1758
- diff = self._generate_diff(existing_spec["deployments"], spec["deployments"])
1817
+ # Diff the existing and new cloud deployments.
1818
+ diff = self._generate_diff(
1819
+ self._remove_empty_values(existing_deployment.to_dict()),
1820
+ self._remove_empty_values(updated_deployment.to_dict()),
1821
+ )
1759
1822
  if not diff:
1760
1823
  self.log.info("No changes detected.")
1761
1824
  return
1762
1825
 
1763
- existing_deployments = {
1764
- deployment["cloud_deployment_id"]: CloudDeployment(**deployment)
1765
- for deployment in existing_spec["deployments"]
1766
- }
1767
-
1768
- updated_deployments: List[CloudDeployment] = []
1769
- for d in spec["deployments"]:
1770
- try:
1771
- deployment = CloudDeployment(**d)
1772
- except Exception as e: # noqa: BLE001
1773
- raise ClickException(f"Failed to parse deployment: {e}")
1774
-
1775
- if not deployment.cloud_deployment_id:
1776
- raise ClickException(
1777
- "All cloud deployments must include a cloud_deployment_id."
1778
- )
1779
- if deployment.cloud_deployment_id not in existing_deployments:
1780
- raise ClickException(
1781
- f"Cloud deployment {deployment.cloud_deployment_id} not found."
1782
- )
1783
- if deployment.provider == CloudProviders.PCP:
1784
- raise ClickException(
1785
- "Please use the `anyscale machine-pool` CLI to update machine pools."
1786
- )
1787
- if deployment != existing_deployments[deployment.cloud_deployment_id]:
1788
- updated_deployments.append(deployment)
1789
-
1790
- # Preprocess the deployments if necessary.
1791
- for deployment in updated_deployments:
1792
- if deployment.provider == CloudProviders.AWS:
1793
- self._preprocess_aws(cloud_id=spec["id"], deployment=deployment)
1794
- elif deployment.provider == CloudProviders.GCP:
1795
- self._preprocess_gcp(deployment=deployment)
1796
- if not skip_verification and not self.verify_cloud_deployment(
1797
- cloud_id=spec["id"], cloud_deployment=deployment
1798
- ):
1799
- raise ClickException(
1800
- f"Verification failed for cloud deployment {deployment.name}."
1801
- )
1826
+ # Preprocess the deployment if necessary.
1827
+ if updated_deployment.provider == CloudProviders.AWS:
1828
+ self._preprocess_aws(cloud_id=cloud_id, deployment=updated_deployment)
1829
+ elif updated_deployment.provider == CloudProviders.GCP:
1830
+ self._preprocess_gcp(deployment=updated_deployment)
1831
+ # Skip verification for Kubernetes stacks or if explicitly requested
1832
+ if updated_deployment.compute_stack == ComputeStack.K8S:
1833
+ self.log.info("Skipping verification for Kubernetes compute stack.")
1834
+ elif not skip_verification and not self.verify_cloud_deployment(
1835
+ cloud_id=cloud_id, cloud_deployment=updated_deployment
1836
+ ):
1837
+ raise ClickException(
1838
+ f"Verification failed for cloud deployment {updated_deployment.name}."
1839
+ )
1802
1840
 
1803
1841
  # Log the diff and confirm.
1804
1842
  self.log.info(f"Detected the following changes:\n{diff}")
1805
1843
 
1806
- confirm("Would you like to proceed with updating this cloud?", yes)
1844
+ confirm("Would you like to proceed with updating this cloud deployment?", yes)
1807
1845
 
1808
- # Update the deployments.
1846
+ # Update the deployment.
1809
1847
  try:
1810
- self.api_client.update_cloud_deployments_api_v2_clouds_cloud_id_deployments_put(
1811
- cloud_id=spec["id"], cloud_deployment=updated_deployments,
1848
+ self.api_client.update_cloud_deployment_api_v2_clouds_cloud_id_update_deployment_put(
1849
+ cloud_id=cloud_id, cloud_deployment=updated_deployment,
1812
1850
  )
1813
1851
  except Exception as e: # noqa: BLE001
1814
- raise ClickException(f"Failed to update cloud deployments: {e}")
1852
+ raise ClickException(f"Failed to update cloud deployment: {e}")
1815
1853
 
1816
- self.log.info(f"Successfully updated cloud {spec['name']}!")
1854
+ self.log.info(
1855
+ f"Successfully updated cloud deployment {updated_deployment.name or updated_deployment.cloud_deployment_id} in cloud {cloud}."
1856
+ )
1817
1857
 
1818
1858
  def remove_cloud_deployment(
1819
1859
  self, cloud_name: str, deployment_name: str, yes: bool,
@@ -1864,6 +1904,9 @@ class CloudController(BaseController):
1864
1904
  spec_file: Optional[str] = None,
1865
1905
  ):
1866
1906
  """Update a cloud's configuration."""
1907
+ if enable_log_ingestion is None and spec_file is None:
1908
+ return
1909
+
1867
1910
  cloud_id, cloud_name = get_cloud_id_and_name(
1868
1911
  self.api_client, cloud_id, cloud_name
1869
1912
  )
@@ -1913,6 +1956,33 @@ class CloudController(BaseController):
1913
1956
 
1914
1957
  self.log.info(f"Updated default cloud to {cloud_name}")
1915
1958
 
1959
+ def update_system_cluster_config(
1960
+ self,
1961
+ cloud_name: Optional[str],
1962
+ cloud_id: Optional[str],
1963
+ system_cluster_enabled: Optional[bool],
1964
+ ) -> None:
1965
+ """Update system cluster configuration for a cloud."""
1966
+ if system_cluster_enabled is None:
1967
+ return
1968
+
1969
+ cloud_id, cloud_name = get_cloud_id_and_name(
1970
+ self.api_client, cloud_id, cloud_name
1971
+ )
1972
+
1973
+ self.api_client.update_system_cluster_config_api_v2_clouds_cloud_id_update_system_cluster_config_put(
1974
+ cloud_id=cloud_id, is_enabled=system_cluster_enabled,
1975
+ )
1976
+ if system_cluster_enabled:
1977
+ self.log.info(f"Successfully enabled system cluster for cloud {cloud_id}")
1978
+ else:
1979
+ self.log.info(
1980
+ f"Successfully disabled system cluster for cloud {cloud_id}\n"
1981
+ "Note: if the system cluster is currently running, it will continue to run until it is terminated.\n"
1982
+ "To terminate the system cluster, use the CLI command: "
1983
+ f"anyscale cloud terminate-system-cluster --cloud-id {cloud_id} --wait"
1984
+ )
1985
+
1916
1986
  def _passed_or_failed_str_from_bool(self, is_passing: bool) -> str:
1917
1987
  return "PASSED" if is_passing else "FAILED"
1918
1988
 
@@ -1956,7 +2026,7 @@ class CloudController(BaseController):
1956
2026
  yes: bool = False,
1957
2027
  ) -> bool:
1958
2028
  """
1959
- Verifies a cloud by name or id.
2029
+ Verifies a cloud by name or id, including all cloud deployments.
1960
2030
 
1961
2031
  Note: If your changes involve operations that may require additional permissions
1962
2032
  (for example, `boto3_session.client("efs").describe_backup_policy`), it's important
@@ -1971,13 +2041,9 @@ class CloudController(BaseController):
1971
2041
  self.api_client, cloud_id, cloud_name
1972
2042
  )
1973
2043
 
1974
- cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(cloud_id).result
2044
+ assert cloud_id is not None
1975
2045
 
1976
- if cloud.compute_stack == ComputeStack.K8S:
1977
- self.log.error(
1978
- "The cloud verify command is not supported for Kubernetes clouds."
1979
- )
1980
- return False
2046
+ cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(cloud_id).result
1981
2047
 
1982
2048
  if cloud.state in (CloudState.DELETING, CloudState.DELETED):
1983
2049
  self.log.info(
@@ -1985,13 +2051,16 @@ class CloudController(BaseController):
1985
2051
  )
1986
2052
  return False
1987
2053
 
1988
- cloud_resource = get_cloud_resource_by_cloud_id(
1989
- cloud_id, cloud.provider, self.api_client
1990
- )
1991
- if cloud_resource is None:
1992
- self.log.error(
1993
- f"This cloud {cloud_name}({cloud_id}) does not contain resource records. Please delete this cloud and create a new one."
1994
- )
2054
+ try:
2055
+ deployments = self.api_client.get_cloud_deployments_api_v2_clouds_cloud_id_deployments_get(
2056
+ cloud_id=cloud_id,
2057
+ ).results
2058
+ except Exception as e: # noqa: BLE001
2059
+ self.log.error(f"Failed to retrieve cloud deployments: {e}")
2060
+ return False
2061
+
2062
+ if not deployments:
2063
+ self.log.error("No cloud deployments found for this cloud")
1995
2064
  return False
1996
2065
 
1997
2066
  self.cloud_event_producer.init_trace_context(
@@ -2003,99 +2072,100 @@ class CloudController(BaseController):
2003
2072
  CloudAnalyticsEventName.COMMAND_START, succeeded=True
2004
2073
  )
2005
2074
 
2006
- if cloud.provider == "AWS":
2007
- if boto3_session is None:
2008
- boto3_session = boto3.Session(region_name=cloud.region)
2009
- if not self.verify_aws_cloud_resources_for_create_cloud_resource(
2010
- cloud_resource=cloud_resource,
2011
- boto3_session=boto3_session,
2012
- region=cloud.region,
2013
- cloud_id=cloud_id, # type: ignore
2014
- is_bring_your_own_resource=cloud.is_bring_your_own_resource,
2015
- is_private_network=cloud.is_private_cloud
2016
- if cloud.is_private_cloud
2017
- else False,
2018
- strict=strict,
2019
- _use_strict_iam_permissions=_use_strict_iam_permissions,
2020
- ):
2021
- self.cloud_event_producer.produce(
2022
- CloudAnalyticsEventName.RESOURCES_VERIFIED,
2023
- succeeded=False,
2024
- logger=self.log,
2075
+ deployment_results = []
2076
+ for deployment in deployments:
2077
+ try:
2078
+ deployment_name = deployment.name or deployment.cloud_deployment_id
2079
+
2080
+ self.log.info(f"Verifying deployment: {deployment_name}")
2081
+ result = self.verify_cloud_deployment(
2082
+ cloud_id,
2083
+ deployment,
2084
+ strict=strict,
2085
+ _use_strict_iam_permissions=_use_strict_iam_permissions,
2086
+ boto3_session=boto3_session,
2025
2087
  )
2026
- return False
2027
- elif cloud.provider == "GCP":
2028
- credentials_dict = json.loads(cloud.credentials)
2029
- project_id = credentials_dict["project_id"]
2030
- host_project_id = credentials_dict.get("host_project_id")
2031
- if not self.verify_gcp_cloud_resources_from_create_cloud_resource(
2032
- cloud_resource=cloud_resource,
2033
- project_id=project_id,
2034
- host_project_id=host_project_id,
2035
- region=cloud.region,
2036
- cloud_id=cloud_id, # type: ignore
2037
- yes=False,
2038
- strict=strict,
2039
- is_private_service_cloud=cloud.is_private_service_cloud,
2040
- ):
2041
- self.cloud_event_producer.produce(
2042
- CloudAnalyticsEventName.RESOURCES_VERIFIED,
2043
- succeeded=False,
2044
- logger=self.log,
2088
+ deployment_results.append((deployment_name, result))
2089
+
2090
+ except (ValueError, TypeError, KeyError, AttributeError, RuntimeError) as e:
2091
+ deployment_name = getattr(deployment, "name", None) or getattr(
2092
+ deployment, "cloud_deployment_id", "unknown"
2045
2093
  )
2046
- return False
2047
- else:
2048
- self.log.error(
2049
- f"This cloud {cloud_name}({cloud_id}) does not have a valid cloud provider."
2050
- )
2051
- self.cloud_event_producer.produce(
2052
- CloudAnalyticsEventName.RESOURCES_VERIFIED,
2053
- succeeded=False,
2054
- internal_error="invalid cloud provider",
2055
- )
2056
- return False
2094
+ self.log.error(f"Failed to verify deployment {deployment_name}: {e}")
2095
+ deployment_results.append((deployment_name, False))
2096
+
2097
+ self._print_deployment_verification_results(deployment_results)
2098
+
2099
+ overall_success = all(result for _, result in deployment_results)
2057
2100
 
2058
2101
  self.cloud_event_producer.produce(
2059
- CloudAnalyticsEventName.RESOURCES_VERIFIED, succeeded=True
2102
+ CloudAnalyticsEventName.RESOURCES_VERIFIED, succeeded=overall_success,
2060
2103
  )
2061
2104
 
2062
- if len(functions_to_verify) == 0:
2063
- return True
2105
+ if not overall_success:
2106
+ return False
2107
+
2108
+ if len(functions_to_verify) > 0:
2109
+ return CloudFunctionalVerificationController(
2110
+ self.cloud_event_producer, self.log
2111
+ ).start_verification(cloud_id, cloud.provider, functions_to_verify, yes=yes)
2064
2112
 
2065
- return CloudFunctionalVerificationController(
2066
- self.cloud_event_producer, self.log
2067
- ).start_verification(cloud_id, cloud.provider, functions_to_verify, yes=yes)
2113
+ return True
2068
2114
 
2069
2115
  def verify_cloud_deployment(
2070
- self, cloud_id: str, cloud_deployment: CloudDeployment
2116
+ self,
2117
+ cloud_id: str,
2118
+ cloud_deployment: CloudDeployment,
2119
+ strict: bool = False,
2120
+ _use_strict_iam_permissions: bool = False, # This should only be used in testing.
2121
+ boto3_session: Optional[boto3.Session] = None,
2071
2122
  ) -> bool:
2072
- if cloud_deployment.compute_stack != ComputeStack.VM:
2073
- # Verification is only supported for VM stack.
2074
- return True
2075
-
2076
- if cloud_deployment.provider == CloudProviders.AWS:
2077
- return self.verify_aws_cloud_resources_for_cloud_deployment(
2078
- cloud_id=cloud_id, cloud_deployment=cloud_deployment,
2079
- )
2080
- elif cloud_deployment.provider == CloudProviders.GCP:
2081
- return self.verify_gcp_cloud_resources_from_cloud_deployment(
2082
- cloud_id=cloud_id, cloud_deployment=cloud_deployment,
2123
+ if cloud_deployment.compute_stack == ComputeStack.VM:
2124
+ if cloud_deployment.provider == CloudProviders.AWS:
2125
+ return self.verify_aws_cloud_resources_for_cloud_deployment(
2126
+ cloud_id=cloud_id,
2127
+ cloud_deployment=cloud_deployment,
2128
+ strict=strict,
2129
+ _use_strict_iam_permissions=_use_strict_iam_permissions,
2130
+ boto3_session=boto3_session,
2131
+ )
2132
+ elif cloud_deployment.provider == CloudProviders.GCP:
2133
+ return self.verify_gcp_cloud_resources_from_cloud_deployment(
2134
+ cloud_id=cloud_id, cloud_deployment=cloud_deployment, strict=strict,
2135
+ )
2136
+ else:
2137
+ raise ValueError(
2138
+ f"Unsupported cloud provider: {cloud_deployment.provider}"
2139
+ )
2140
+ elif cloud_deployment.compute_stack == ComputeStack.K8S:
2141
+ return KubernetesCloudDeploymentVerifier(self.log, self.api_client).verify(
2142
+ cloud_deployment
2083
2143
  )
2084
2144
  else:
2085
- raise ValueError(f"Unsupported cloud provider: {cloud_deployment.provider}")
2145
+ raise ValueError(
2146
+ f"Unsupported compute stack: {cloud_deployment.compute_stack}"
2147
+ )
2086
2148
 
2087
2149
  def verify_aws_cloud_resources_for_cloud_deployment(
2088
- self, cloud_id: str, cloud_deployment: CloudDeployment,
2150
+ self,
2151
+ cloud_id: str,
2152
+ cloud_deployment: CloudDeployment,
2153
+ strict: bool = False,
2154
+ _use_strict_iam_permissions: bool = False, # This should only be used in testing.
2155
+ boto3_session: Optional[boto3.Session] = None,
2089
2156
  ) -> bool:
2090
2157
  assert cloud_deployment.region
2091
2158
  assert cloud_deployment.aws_config
2092
2159
  aws_config = cloud_deployment.aws_config
2093
2160
  file_storage = cloud_deployment.file_storage
2094
- object_storage = (
2095
- ObjectStorage(**cloud_deployment.object_storage)
2096
- if cloud_deployment.object_storage
2097
- else None
2098
- )
2161
+ object_storage = cloud_deployment.object_storage
2162
+
2163
+ # Convert dict to ObjectStorage object if needed
2164
+ if object_storage is not None and isinstance(object_storage, dict):
2165
+ object_storage = ObjectStorage(**object_storage)
2166
+
2167
+ if boto3_session is None:
2168
+ boto3_session = boto3.Session(region_name=cloud_deployment.region)
2099
2169
 
2100
2170
  return self.verify_aws_cloud_resources(
2101
2171
  aws_vpc_id=aws_config.vpc_id,
@@ -2111,18 +2181,44 @@ class CloudController(BaseController):
2111
2181
  if file_storage and file_storage.mount_targets
2112
2182
  else None,
2113
2183
  aws_cloudformation_stack_id=None,
2114
- memorydb_cluster_config=AWSMemoryDBClusterConfig(
2115
- id=aws_config.memorydb_cluster_name,
2116
- endpoint=aws_config.memorydb_cluster_endpoint,
2184
+ memorydb_cluster_config=self._get_memorydb_config_for_verification(
2185
+ aws_config, cloud_deployment.region
2117
2186
  ),
2118
- boto3_session=boto3.Session(region_name=cloud_deployment.region),
2187
+ boto3_session=boto3_session,
2119
2188
  region=cloud_deployment.region,
2120
2189
  cloud_id=cloud_id,
2121
2190
  is_bring_your_own_resource=True,
2122
2191
  is_private_network=cloud_deployment.networking_mode
2123
2192
  == NetworkingMode.PRIVATE,
2193
+ strict=strict,
2194
+ _use_strict_iam_permissions=_use_strict_iam_permissions,
2124
2195
  )
2125
2196
 
2197
+ def _get_memorydb_config_for_verification(
2198
+ self, aws_config, region: str
2199
+ ) -> Optional[AWSMemoryDBClusterConfig]:
2200
+ """Get MemoryDB cluster config for verification, fetching endpoint from AWS if needed."""
2201
+ if not aws_config.memorydb_cluster_name:
2202
+ return None
2203
+
2204
+ # If we already have the endpoint, use it
2205
+ if aws_config.memorydb_cluster_endpoint:
2206
+ return AWSMemoryDBClusterConfig(
2207
+ id=aws_config.memorydb_cluster_name,
2208
+ endpoint=aws_config.memorydb_cluster_endpoint,
2209
+ )
2210
+
2211
+ # Otherwise, fetch it from AWS
2212
+ try:
2213
+ return _get_memorydb_cluster_config(
2214
+ aws_config.memorydb_cluster_name, region, self.log,
2215
+ )
2216
+ except Exception as e: # noqa: BLE001
2217
+ self.log.warning(
2218
+ f"Could not fetch MemoryDB cluster config for {aws_config.memorydb_cluster_name}: {e}"
2219
+ )
2220
+ return None
2221
+
2126
2222
  def verify_aws_cloud_resources_for_create_cloud_resource( # noqa: PLR0913
2127
2223
  self,
2128
2224
  cloud_resource: CreateCloudResource,
@@ -2385,6 +2481,28 @@ class CloudController(BaseController):
2385
2481
  f"{quota_error_str}\n\nFor instructions on how to increase quotas, visit this link: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html#request-increase"
2386
2482
  )
2387
2483
 
2484
+ def _print_deployment_verification_results(
2485
+ self, deployment_results: List[Tuple[str, bool]]
2486
+ ) -> None:
2487
+ """Print verification results for multiple deployments"""
2488
+ self.log.info("=" * 60)
2489
+ self.log.info("DEPLOYMENT VERIFICATION RESULTS:")
2490
+ self.log.info("=" * 60)
2491
+
2492
+ for deployment_name, success in deployment_results:
2493
+ status = "PASSED" if success else "FAILED"
2494
+ self.log.info(f"{deployment_name}: {status}")
2495
+
2496
+ self.log.info("=" * 60)
2497
+
2498
+ passed_count = sum(1 for _, success in deployment_results if success)
2499
+ total_count = len(deployment_results)
2500
+
2501
+ if passed_count == total_count:
2502
+ self.log.info(
2503
+ f"Overall Result: ALL {total_count} deployments verified successfully"
2504
+ )
2505
+
2388
2506
  def register_azure_or_generic_cloud( # noqa: PLR0913
2389
2507
  self,
2390
2508
  name: str,
@@ -2828,17 +2946,16 @@ class CloudController(BaseController):
2828
2946
  ).start_verification(cloud_id, CloudProviders.AWS, functions_to_verify, yes)
2829
2947
 
2830
2948
  def verify_gcp_cloud_resources_from_cloud_deployment(
2831
- self, cloud_id: str, cloud_deployment: CloudDeployment,
2949
+ self, cloud_id: str, cloud_deployment: CloudDeployment, strict: bool = False
2832
2950
  ) -> bool:
2833
2951
  assert cloud_deployment.region
2834
2952
  assert cloud_deployment.gcp_config
2835
2953
  gcp_config = cloud_deployment.gcp_config
2836
2954
  file_storage = cloud_deployment.file_storage
2837
- object_storage = (
2838
- ObjectStorage(**cloud_deployment.object_storage)
2839
- if cloud_deployment.object_storage
2840
- else None
2841
- )
2955
+ object_storage = cloud_deployment.object_storage
2956
+
2957
+ if object_storage is not None and isinstance(object_storage, dict):
2958
+ object_storage = ObjectStorage(**object_storage)
2842
2959
  return self.verify_gcp_cloud_resources(
2843
2960
  project_id=gcp_config.project_id,
2844
2961
  vpc_id=gcp_config.vpc_name,
@@ -2860,6 +2977,7 @@ class CloudController(BaseController):
2860
2977
  region=cloud_deployment.region,
2861
2978
  cloud_id=cloud_id,
2862
2979
  host_project_id=gcp_config.host_project_id,
2980
+ strict=strict,
2863
2981
  )
2864
2982
 
2865
2983
  def verify_gcp_cloud_resources_from_create_cloud_resource(