anyscale 0.26.51__py3-none-any.whl → 0.26.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. anyscale/_private/anyscale_client/README.md +1 -1
  2. anyscale/_private/anyscale_client/anyscale_client.py +178 -46
  3. anyscale/_private/anyscale_client/common.py +61 -2
  4. anyscale/_private/anyscale_client/fake_anyscale_client.py +145 -8
  5. anyscale/_private/docgen/__main__.py +42 -31
  6. anyscale/_private/docgen/generator.py +63 -28
  7. anyscale/_private/docgen/models.md +4 -2
  8. anyscale/_private/sdk/__init__.py +124 -1
  9. anyscale/_private/workload/workload_config.py +4 -6
  10. anyscale/_private/workload/workload_sdk.py +105 -12
  11. anyscale/client/README.md +13 -11
  12. anyscale/client/openapi_client/__init__.py +3 -3
  13. anyscale/client/openapi_client/api/default_api.py +512 -316
  14. anyscale/client/openapi_client/models/__init__.py +3 -3
  15. anyscale/client/openapi_client/models/aws_config.py +2 -2
  16. anyscale/client/openapi_client/models/baseimagesenum.py +158 -1
  17. anyscale/client/openapi_client/models/cloud_data_bucket_presigned_url_request.py +31 -3
  18. anyscale/client/openapi_client/models/cloud_deployment.py +37 -36
  19. anyscale/client/openapi_client/models/cloud_resource.py +59 -3
  20. anyscale/client/openapi_client/models/cloud_resource_gcp.py +59 -3
  21. anyscale/client/openapi_client/models/create_cloud_resource.py +59 -3
  22. anyscale/client/openapi_client/models/create_cloud_resource_gcp.py +59 -3
  23. anyscale/client/openapi_client/models/create_resource_notification.py +31 -3
  24. anyscale/client/openapi_client/models/{decorated_cloud_deployment.py → decorated_cloud_resource.py} +124 -96
  25. anyscale/client/openapi_client/models/{clouddeployment_list_response.py → decoratedcloudresource_list_response.py} +15 -15
  26. anyscale/client/openapi_client/models/{decoratedclouddeployment_response.py → decoratedcloudresource_response.py} +11 -11
  27. anyscale/client/openapi_client/models/file_storage.py +4 -4
  28. anyscale/client/openapi_client/models/gcp_config.py +2 -2
  29. anyscale/client/openapi_client/models/ha_job_error_types.py +9 -2
  30. anyscale/client/openapi_client/models/object_storage.py +4 -4
  31. anyscale/client/openapi_client/models/ray_runtime_env_config.py +57 -1
  32. anyscale/client/openapi_client/models/resource_alert_event_type.py +2 -1
  33. anyscale/client/openapi_client/models/resource_notification.py +29 -1
  34. anyscale/client/openapi_client/models/supportedbaseimagesenum.py +155 -1
  35. anyscale/client/openapi_client/models/workload_info.py +31 -3
  36. anyscale/client/openapi_client/models/workload_state_info.py +29 -1
  37. anyscale/cloud/models.py +40 -43
  38. anyscale/commands/cloud_commands.py +93 -88
  39. anyscale/commands/command_examples.py +37 -49
  40. anyscale/commands/exec_commands.py +12 -1
  41. anyscale/commands/list_commands.py +42 -12
  42. anyscale/commands/project_commands.py +399 -115
  43. anyscale/commands/schedule_commands.py +22 -11
  44. anyscale/commands/service_commands.py +11 -6
  45. anyscale/commands/util.py +94 -1
  46. anyscale/commands/workspace_commands.py +92 -38
  47. anyscale/compute_config/__init__.py +1 -1
  48. anyscale/compute_config/_private/compute_config_sdk.py +8 -11
  49. anyscale/compute_config/commands.py +3 -3
  50. anyscale/compute_config/models.py +30 -30
  51. anyscale/controllers/cloud_controller.py +361 -360
  52. anyscale/controllers/kubernetes_verifier.py +1 -1
  53. anyscale/job/_private/job_sdk.py +41 -23
  54. anyscale/job/models.py +1 -1
  55. anyscale/project/__init__.py +101 -1
  56. anyscale/project/_private/project_sdk.py +90 -2
  57. anyscale/project/commands.py +188 -1
  58. anyscale/project/models.py +198 -2
  59. anyscale/sdk/anyscale_client/models/baseimagesenum.py +158 -1
  60. anyscale/sdk/anyscale_client/models/ray_runtime_env_config.py +57 -1
  61. anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +155 -1
  62. anyscale/service/_private/service_sdk.py +2 -1
  63. anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
  64. anyscale/util.py +3 -0
  65. anyscale/utils/runtime_env.py +3 -1
  66. anyscale/version.py +1 -1
  67. anyscale/workspace/commands.py +114 -23
  68. anyscale/workspace/models.py +3 -5
  69. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/METADATA +1 -1
  70. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/RECORD +75 -75
  71. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/WHEEL +0 -0
  72. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/entry_points.txt +0 -0
  73. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/licenses/LICENSE +0 -0
  74. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/licenses/NOTICE +0 -0
  75. {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/top_level.txt +0 -0
@@ -42,11 +42,13 @@ from anyscale.client.openapi_client.models import (
42
42
  ComputeStack,
43
43
  CreateCloudResource,
44
44
  CreateCloudResourceGCP,
45
+ DecoratedCloudResource,
45
46
  EditableCloudResource,
46
47
  EditableCloudResourceGCP,
47
48
  FileStorage,
48
49
  GCPConfig,
49
50
  GCPFileStoreConfig,
51
+ KubernetesConfig,
50
52
  NetworkingMode,
51
53
  NFSMountTarget,
52
54
  ObjectStorage,
@@ -1425,18 +1427,6 @@ class CloudController(BaseController):
1425
1427
  cloud_id, CloudProviders.AWS, functions_to_verify, yes,
1426
1428
  )
1427
1429
 
1428
- def get_cloud_deployment(
1429
- self, cloud_id: str, cloud_deployment_id: str
1430
- ) -> CloudDeployment:
1431
- try:
1432
- return self.api_client.get_cloud_deployment_api_v2_clouds_cloud_id_deployment_get(
1433
- cloud_id=cloud_id, cloud_deployment_id=cloud_deployment_id,
1434
- ).result
1435
- except Exception as e: # noqa: BLE001
1436
- raise ClickException(
1437
- f"Failed to get cloud deployment {cloud_deployment_id} for cloud {cloud_id}. Error: {e}"
1438
- )
1439
-
1440
1430
  # Avoid displaying fields with empty values (since the values for optional fields default to None).
1441
1431
  def _remove_empty_values(self, d):
1442
1432
  if isinstance(d, dict):
@@ -1449,56 +1439,52 @@ class CloudController(BaseController):
1449
1439
  return [self._remove_empty_values(v) for v in d]
1450
1440
  return d
1451
1441
 
1452
- def get_cloud_deployments(self, cloud_id: str) -> Dict[str, Any]:
1442
+ def get_decorated_cloud_resources(
1443
+ self, cloud_id: str
1444
+ ) -> List[DecoratedCloudResource]:
1453
1445
  cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(
1454
1446
  cloud_id=cloud_id,
1455
1447
  ).result
1456
1448
 
1457
1449
  if cloud.is_aioa:
1458
1450
  raise ValueError(
1459
- "Listing cloud deployments is only supported for customer-hosted clouds."
1451
+ "Listing cloud resources is only supported for customer-hosted clouds."
1460
1452
  )
1461
1453
 
1462
1454
  try:
1463
- deployments = self.api_client.get_cloud_deployments_api_v2_clouds_cloud_id_deployments_get(
1455
+ return self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
1464
1456
  cloud_id=cloud_id,
1465
1457
  ).results
1466
1458
  except Exception as e: # noqa: BLE001
1467
1459
  raise ClickException(
1468
- f"Failed to get cloud deployments for cloud {cloud.name} ({cloud_id}). Error: {e}"
1460
+ f"Failed to get cloud resources for cloud {cloud.name} ({cloud_id}). Error: {e}"
1469
1461
  )
1470
1462
 
1471
- return {
1472
- "id": cloud_id,
1473
- "name": cloud.name,
1474
- "deployments": [
1475
- self._remove_empty_values(deployment.to_dict())
1476
- for deployment in deployments
1477
- ],
1478
- }
1479
-
1480
- def get_cloud_deployment_dict_by_name(
1481
- self, cloud_name: str, cloud_deployment_name: Optional[str]
1482
- ) -> Dict[str, Any]:
1483
- cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
1484
-
1485
- result = self.get_cloud_deployments(cloud_id)
1486
- deployments = result.get("deployments", [])
1487
- if len(deployments) == 0:
1488
- raise ClickException(f"Cloud {cloud_name} has no cloud deployments.")
1489
-
1490
- if cloud_deployment_name is None:
1491
- if len(deployments) > 1:
1492
- self.log.warning(
1493
- f"Cloud {cloud_name} has {len(deployments)} deployments, only the primary deployment will be returned."
1494
- )
1495
- return deployments[0]
1496
-
1497
- for deployment in deployments:
1498
- if deployment.get("name") == cloud_deployment_name:
1499
- return deployment
1500
-
1501
- raise ClickException(f"Cloud deployment {cloud_deployment_name} not found.")
1463
+ def get_formatted_cloud_resources(self, cloud_id: str) -> List[Any]:
1464
+ cloud_resources = self.get_decorated_cloud_resources(cloud_id)
1465
+ formatted_cloud_resources = [
1466
+ self._remove_empty_values(cloud_resource.to_dict())
1467
+ for cloud_resource in cloud_resources
1468
+ ]
1469
+ # Remove the deprecated cloud_deployment_id field.
1470
+ for d in formatted_cloud_resources:
1471
+ d.pop("cloud_deployment_id", None)
1472
+ return formatted_cloud_resources
1473
+
1474
+ def get_cloud_resources(self, cloud_id: str) -> List[CloudDeployment]:
1475
+ decorated_cloud_resources = self.get_decorated_cloud_resources(cloud_id)
1476
+
1477
+ # DecoratedCloudResource has extra fields that are not in CloudDeployment.
1478
+ allowed_keys = set(CloudDeployment.attribute_map.keys())
1479
+ allowed_keys.remove(
1480
+ "cloud_deployment_id"
1481
+ ) # Remove deprecated cloud_deployment_id field.
1482
+ return [
1483
+ CloudDeployment(
1484
+ **{k: v for k, v in resource.to_dict().items() if k in allowed_keys}
1485
+ )
1486
+ for resource in decorated_cloud_resources
1487
+ ]
1502
1488
 
1503
1489
  def update_aws_anyscale_iam_role(
1504
1490
  self,
@@ -1564,7 +1550,7 @@ class CloudController(BaseController):
1564
1550
 
1565
1551
  return role, iam_role_original_policy
1566
1552
 
1567
- def _generate_diff(self, existing: Dict[str, Any], new: Dict[str, Any]) -> str:
1553
+ def _generate_diff(self, existing: List[Any], new: List[Any]) -> str:
1568
1554
  """
1569
1555
  Generates a diff between the existing and new dicts.
1570
1556
  """
@@ -1590,48 +1576,59 @@ class CloudController(BaseController):
1590
1576
 
1591
1577
  return formatted_diff.strip()
1592
1578
 
1593
- def _preprocess_aws(self, cloud_id: str, deployment: CloudDeployment,) -> None:
1579
+ # Returns the role and original IAM policy, so that we can revert it if creating the cloud resource fails.
1580
+ def _preprocess_aws( # noqa: PLR0912
1581
+ self, cloud_id: str, deployment: CloudDeployment
1582
+ ) -> Tuple[Optional[Boto3Resource], Optional[str]]:
1594
1583
  if not deployment.aws_config and not deployment.file_storage:
1595
- return
1584
+ return None, None
1596
1585
 
1597
1586
  if not validate_aws_credentials(self.log):
1598
1587
  raise ClickException(
1599
- "Updating cloud deployments requires valid AWS credentials to be set locally. Learn more: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html"
1588
+ "Updating cloud resources requires valid AWS credentials to be set locally. Learn more: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html"
1600
1589
  )
1601
1590
 
1591
+ role, iam_role_original_policy = None, None
1592
+
1602
1593
  # Get EFS mount target IP.
1603
- if (
1604
- deployment.file_storage
1605
- and FileStorage(**deployment.file_storage).file_storage_id
1606
- ):
1607
- file_storage = FileStorage(**deployment.file_storage)
1594
+ file_storage = None
1595
+ if deployment.file_storage:
1596
+ if isinstance(deployment.file_storage, dict):
1597
+ file_storage = FileStorage(**deployment.file_storage)
1598
+ else:
1599
+ file_storage = deployment.file_storage
1608
1600
 
1609
- try:
1610
- boto3_session = boto3.Session(region_name=deployment.region)
1611
- efs_mount_target_ip = _get_aws_efs_mount_target_ip(
1612
- boto3_session, file_storage.file_storage_id,
1613
- )
1614
- if not efs_mount_target_ip:
1615
- raise ClickException(
1616
- f"EFS mount target IP not found for {file_storage.file_storage_id}."
1601
+ if file_storage.file_storage_id:
1602
+ try:
1603
+ boto3_session = boto3.Session(region_name=deployment.region)
1604
+ efs_mount_target_ip = _get_aws_efs_mount_target_ip(
1605
+ boto3_session, file_storage.file_storage_id,
1617
1606
  )
1618
- file_storage.mount_targets = [
1619
- NFSMountTarget(address=efs_mount_target_ip)
1620
- ]
1621
- except ClientError as e:
1622
- self.log.log_resource_exception(
1623
- CloudAnalyticsEventCloudResource.AWS_EFS, e
1624
- )
1625
- raise e
1607
+ if not efs_mount_target_ip:
1608
+ raise ClickException(
1609
+ f"EFS mount target IP not found for {file_storage.file_storage_id}."
1610
+ )
1611
+ file_storage.mount_targets = [
1612
+ NFSMountTarget(address=efs_mount_target_ip)
1613
+ ]
1614
+ except ClientError as e:
1615
+ self.log.log_resource_exception(
1616
+ CloudAnalyticsEventCloudResource.AWS_EFS, e
1617
+ )
1618
+ raise e
1626
1619
 
1627
1620
  deployment.file_storage = file_storage
1628
1621
 
1629
1622
  if deployment.aws_config:
1630
- aws_config = AWSConfig(**deployment.aws_config)
1623
+ if isinstance(deployment.aws_config, dict):
1624
+ aws_config = AWSConfig(**deployment.aws_config)
1625
+ else:
1626
+ aws_config = deployment.aws_config
1627
+
1631
1628
  assert deployment.region
1632
1629
 
1633
1630
  # Update Anyscale IAM role's assume policy to include the cloud ID as the external ID.
1634
- self.update_aws_anyscale_iam_role(
1631
+ role, iam_role_original_policy = self.update_aws_anyscale_iam_role(
1635
1632
  cloud_id,
1636
1633
  deployment.region,
1637
1634
  aws_config.anyscale_iam_role_id,
@@ -1658,13 +1655,19 @@ class CloudController(BaseController):
1658
1655
 
1659
1656
  deployment.aws_config = aws_config
1660
1657
 
1658
+ return role, iam_role_original_policy
1659
+
1661
1660
  def _preprocess_gcp(
1662
1661
  self, deployment: CloudDeployment,
1663
1662
  ):
1664
1663
  if not deployment.gcp_config:
1665
1664
  return
1666
1665
 
1667
- gcp_config = GCPConfig(**deployment.gcp_config)
1666
+ if isinstance(deployment.gcp_config, dict):
1667
+ gcp_config = GCPConfig(**deployment.gcp_config)
1668
+ else:
1669
+ gcp_config = deployment.gcp_config
1670
+
1668
1671
  deployment.gcp_config = gcp_config
1669
1672
  if not deployment.file_storage and not gcp_config.memorystore_instance_name:
1670
1673
  return
@@ -1681,7 +1684,11 @@ class CloudController(BaseController):
1681
1684
 
1682
1685
  # Get Filestore mount target IP and root dir.
1683
1686
  if deployment.file_storage:
1684
- fs = FileStorage(**deployment.file_storage)
1687
+ if isinstance(deployment.file_storage, dict):
1688
+ fs = FileStorage(**deployment.file_storage)
1689
+ else:
1690
+ fs = deployment.file_storage
1691
+
1685
1692
  if fs.file_storage_id:
1686
1693
  if not gcp_config.vpc_name:
1687
1694
  raise ClickException(
@@ -1699,7 +1706,7 @@ class CloudController(BaseController):
1699
1706
  NFSMountTarget(address=filestore_config.mount_target_ip)
1700
1707
  ]
1701
1708
 
1702
- deployment.file_storage = fs
1709
+ deployment.file_storage = fs
1703
1710
 
1704
1711
  # Get Memorystore config.
1705
1712
  if gcp_config.memorystore_instance_name:
@@ -1711,7 +1718,7 @@ class CloudController(BaseController):
1711
1718
 
1712
1719
  deployment.gcp_config = gcp_config
1713
1720
 
1714
- def create_cloud_deployment(
1721
+ def create_cloud_resource(
1715
1722
  self,
1716
1723
  cloud_name: str,
1717
1724
  spec_file: str,
@@ -1731,7 +1738,7 @@ class CloudController(BaseController):
1731
1738
  try:
1732
1739
  new_deployment = CloudDeployment(**spec)
1733
1740
  except Exception as e: # noqa: BLE001
1734
- raise ClickException(f"Failed to parse deployment: {e}")
1741
+ raise ClickException(f"Failed to parse cloud resource: {e}")
1735
1742
 
1736
1743
  if new_deployment.provider == CloudProviders.AWS:
1737
1744
  self._preprocess_aws(cloud_id=cloud_id, deployment=new_deployment)
@@ -1741,17 +1748,16 @@ class CloudController(BaseController):
1741
1748
  if not skip_verification and not self.verify_cloud_deployment(
1742
1749
  cloud_id=cloud_id, cloud_deployment=new_deployment
1743
1750
  ):
1744
- raise ClickException("Cloud deployment verification failed.")
1751
+ raise ClickException("Cloud resource verification failed.")
1745
1752
 
1746
1753
  # Log an additional warning if a new deployment is being added but a deployment with the same AWS/GCP region already exists.
1747
- existing_spec = self.get_cloud_deployments(cloud_id)
1748
- existing_deployments = {
1749
- deployment["cloud_deployment_id"]: CloudDeployment(**deployment)
1750
- for deployment in existing_spec["deployments"]
1754
+ existing_resources = {
1755
+ resource.cloud_resource_id: resource
1756
+ for resource in self.get_cloud_resources(cloud_id)
1751
1757
  }
1752
1758
  existing_stack_provider_regions = {
1753
1759
  (d.compute_stack, d.provider, d.region)
1754
- for d in existing_deployments.values()
1760
+ for d in existing_resources.values()
1755
1761
  if d.provider in (CloudProviders.AWS, CloudProviders.GCP)
1756
1762
  }
1757
1763
  if (
@@ -1760,125 +1766,144 @@ class CloudController(BaseController):
1760
1766
  new_deployment.region,
1761
1767
  ) in existing_stack_provider_regions:
1762
1768
  self.log.warning(
1763
- f"A {new_deployment.provider} {new_deployment.compute_stack} deployment in region {new_deployment.region} already exists."
1769
+ f"A {new_deployment.provider} {new_deployment.compute_stack} resource in region {new_deployment.region} already exists."
1764
1770
  )
1765
- confirm("Would you like to proceed with adding this deployment?", yes)
1771
+ confirm("Would you like to proceed with adding this cloud resource?", yes)
1766
1772
 
1767
- # Add the deployment.
1773
+ # Add the resource.
1768
1774
  try:
1769
- self.api_client.add_cloud_deployment_api_v2_clouds_cloud_id_add_deployment_put(
1775
+ self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
1770
1776
  cloud_id=cloud_id, cloud_deployment=new_deployment,
1771
1777
  )
1772
1778
  except Exception as e: # noqa: BLE001
1773
- raise ClickException(f"Failed to add cloud deployment: {e}")
1779
+ raise ClickException(f"Failed to add cloud resource: {e}")
1774
1780
 
1775
1781
  self.log.info(
1776
- f"Successfully created cloud deployment{' ' + new_deployment.name if new_deployment.name else ''} in cloud {existing_spec['name']}!"
1782
+ f"Successfully created cloud resource{' ' + new_deployment.name if new_deployment.name else ''} in cloud {cloud_name}!"
1777
1783
  )
1778
1784
 
1779
- def update_cloud_deployment( # noqa: PLR0912
1785
+ def update_cloud_resources( # noqa: PLR0912, C901
1780
1786
  self,
1781
- cloud: str,
1782
- spec_file: str,
1787
+ cloud_name: Optional[str],
1788
+ cloud_id: Optional[str],
1789
+ resources_file: str,
1783
1790
  skip_verification: bool = False,
1784
1791
  yes: bool = False,
1785
1792
  ):
1793
+ if not cloud_id:
1794
+ cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
1795
+ assert cloud_id
1796
+
1786
1797
  # Read the spec file.
1787
- path = pathlib.Path(spec_file)
1798
+ path = pathlib.Path(resources_file)
1788
1799
  if not path.exists():
1789
- raise ClickException(f"{spec_file} does not exist.")
1800
+ raise ClickException(f"{resources_file} does not exist.")
1790
1801
  if not path.is_file():
1791
- raise ClickException(f"{spec_file} is not a file.")
1802
+ raise ClickException(f"{resources_file} is not a file.")
1792
1803
 
1793
1804
  spec = yaml.safe_load(path.read_text())
1794
- try:
1795
- updated_deployment = CloudDeployment(**spec)
1796
- except Exception as e: # noqa: BLE001
1797
- raise ClickException(f"Failed to parse cloud deployment: {e}")
1798
1805
 
1799
- if not updated_deployment.cloud_deployment_id:
1806
+ # Get the existing spec.
1807
+ existing_resources = self.get_cloud_resources(cloud_id=cloud_id)
1808
+
1809
+ if len(existing_resources) > len(spec):
1800
1810
  raise ClickException(
1801
- "The cloud deployment must include a cloud_deployment_id."
1811
+ "Please use `anyscale cloud resource delete` to remove cloud resources."
1802
1812
  )
1803
-
1804
- # Get the existing cloud deployment.
1805
- cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud)
1806
- existing_deployment = self.get_cloud_deployment(
1807
- cloud_id=cloud_id,
1808
- cloud_deployment_id=updated_deployment.cloud_deployment_id,
1809
- )
1810
- if (
1811
- updated_deployment.provider == CloudProviders.PCP
1812
- or existing_deployment.provider == CloudProviders.PCP
1813
- ):
1813
+ if len(existing_resources) < len(spec):
1814
1814
  raise ClickException(
1815
- "Please use the `anyscale machine-pool` CLI to update machine pools."
1815
+ "Please use `anyscale cloud resource create` to add cloud resources."
1816
1816
  )
1817
1817
 
1818
- # Diff the existing and new cloud deployments.
1818
+ # Diff the existing and new specs
1819
1819
  diff = self._generate_diff(
1820
- self._remove_empty_values(existing_deployment.to_dict()),
1821
- self._remove_empty_values(updated_deployment.to_dict()),
1820
+ [self._remove_empty_values(r.to_dict()) for r in existing_resources], spec
1822
1821
  )
1823
1822
  if not diff:
1824
1823
  self.log.info("No changes detected.")
1825
1824
  return
1826
1825
 
1827
- # Preprocess the deployment if necessary.
1828
- if updated_deployment.provider == CloudProviders.AWS:
1829
- self._preprocess_aws(cloud_id=cloud_id, deployment=updated_deployment)
1830
- elif updated_deployment.provider == CloudProviders.GCP:
1831
- self._preprocess_gcp(deployment=updated_deployment)
1832
- # Skip verification for Kubernetes stacks or if explicitly requested
1833
- if updated_deployment.compute_stack == ComputeStack.K8S:
1834
- self.log.info("Skipping verification for Kubernetes compute stack.")
1835
- elif not skip_verification and not self.verify_cloud_deployment(
1836
- cloud_id=cloud_id, cloud_deployment=updated_deployment
1837
- ):
1838
- raise ClickException(
1839
- f"Verification failed for cloud deployment {updated_deployment.name}."
1840
- )
1826
+ existing_resources_dict = {
1827
+ resource.cloud_resource_id: resource for resource in existing_resources
1828
+ }
1829
+
1830
+ updated_deployments: List[CloudDeployment] = []
1831
+ for d in spec:
1832
+ try:
1833
+ deployment = CloudDeployment(**d)
1834
+ except Exception as e: # noqa: BLE001
1835
+ raise ClickException(f"Failed to parse cloud resource: {e}")
1836
+
1837
+ if not deployment.cloud_resource_id:
1838
+ raise ClickException(
1839
+ "All cloud resources must include a cloud_resource_id."
1840
+ )
1841
+ if deployment.cloud_resource_id not in existing_resources_dict:
1842
+ raise ClickException(
1843
+ f"Cloud resource {deployment.cloud_resource_id} not found."
1844
+ )
1845
+ if deployment.provider == CloudProviders.PCP:
1846
+ raise ClickException(
1847
+ "Please use the `anyscale machine-pool` CLI to update machine pools."
1848
+ )
1849
+ if deployment != existing_resources_dict[deployment.cloud_resource_id]:
1850
+ updated_deployments.append(deployment)
1841
1851
 
1842
1852
  # Log the diff and confirm.
1843
1853
  self.log.info(f"Detected the following changes:\n{diff}")
1844
1854
 
1845
- confirm("Would you like to proceed with updating this cloud deployment?", yes)
1855
+ confirm("Would you like to proceed with updating this cloud?", yes)
1856
+
1857
+ # Preprocess the deployments if necessary.
1858
+ for deployment in updated_deployments:
1859
+ if deployment.provider == CloudProviders.AWS:
1860
+ self._preprocess_aws(cloud_id=cloud_id, deployment=deployment)
1861
+ elif deployment.provider == CloudProviders.GCP:
1862
+ self._preprocess_gcp(deployment=deployment)
1863
+
1864
+ # Skip verification for Kubernetes stacks or if explicitly requested
1865
+ if deployment.compute_stack == ComputeStack.K8S:
1866
+ self.log.info("Skipping verification for Kubernetes compute stack.")
1867
+ elif not skip_verification and not self.verify_cloud_deployment(
1868
+ cloud_id=cloud_id, cloud_deployment=deployment
1869
+ ):
1870
+ raise ClickException(
1871
+ f"Verification failed for cloud resource {deployment.name or deployment.cloud_resource_id}."
1872
+ )
1846
1873
 
1847
- # Update the deployment.
1874
+ # Update the cloud resources.
1848
1875
  try:
1849
- self.api_client.update_cloud_deployment_api_v2_clouds_cloud_id_update_deployment_put(
1850
- cloud_id=cloud_id, cloud_deployment=updated_deployment,
1876
+ self.api_client.update_cloud_resources_api_v2_clouds_cloud_id_resources_put(
1877
+ cloud_id=cloud_id, cloud_deployment=updated_deployments,
1851
1878
  )
1852
1879
  except Exception as e: # noqa: BLE001
1853
- raise ClickException(f"Failed to update cloud deployment: {e}")
1880
+ raise ClickException(f"Failed to update cloud resources: {e}")
1854
1881
 
1855
- self.log.info(
1856
- f"Successfully updated cloud deployment {updated_deployment.name or updated_deployment.cloud_deployment_id} in cloud {cloud}."
1857
- )
1882
+ self.log.info(f"Successfully updated cloud {cloud_name or cloud_id}.")
1858
1883
 
1859
- def remove_cloud_deployment(
1860
- self, cloud_name: str, deployment_name: str, yes: bool,
1884
+ def remove_cloud_resource(
1885
+ self, cloud_name: str, resource_name: str, yes: bool,
1861
1886
  ):
1862
1887
  confirm(
1863
- f"Please confirm that you would like to remove deployment {deployment_name} from cloud {cloud_name}.",
1888
+ f"Please confirm that you would like to remove resource {resource_name} from cloud {cloud_name}.",
1864
1889
  yes,
1865
1890
  )
1866
1891
 
1867
1892
  cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
1868
1893
  try:
1869
- with self.log.spinner("Removing cloud deployment..."):
1870
- self.api_client.remove_cloud_deployment_api_v2_clouds_cloud_id_remove_deployment_delete(
1871
- cloud_id=cloud_id, cloud_deployment_name=deployment_name,
1894
+ with self.log.spinner("Removing cloud resource..."):
1895
+ self.api_client.remove_cloud_resource_api_v2_clouds_cloud_id_remove_resource_delete(
1896
+ cloud_id=cloud_id, cloud_resource_name=resource_name,
1872
1897
  )
1873
1898
  except Exception as e: # noqa: BLE001
1874
- raise ClickException(f"Failed to remove cloud deployment: {e}")
1899
+ raise ClickException(f"Failed to remove cloud resource: {e}")
1875
1900
 
1876
1901
  self.log.warning(
1877
1902
  "The trust policy or service account that provides access to Anyscale's control plane needs to be deleted manually if you no longer wish for Anyscale to have access."
1878
1903
  )
1879
1904
 
1880
1905
  self.log.info(
1881
- f"Successfully removed deployment {deployment_name} from cloud {cloud_name}!"
1906
+ f"Successfully removed resource {resource_name} from cloud {cloud_name}!"
1882
1907
  )
1883
1908
 
1884
1909
  def get_cloud_config(
@@ -2027,7 +2052,7 @@ class CloudController(BaseController):
2027
2052
  yes: bool = False,
2028
2053
  ) -> bool:
2029
2054
  """
2030
- Verifies a cloud by name or id, including all cloud deployments.
2055
+ Verifies a cloud by name or id, including all cloud resources.
2031
2056
 
2032
2057
  Note: If your changes involve operations that may require additional permissions
2033
2058
  (for example, `boto3_session.client("efs").describe_backup_policy`), it's important
@@ -2053,15 +2078,15 @@ class CloudController(BaseController):
2053
2078
  return False
2054
2079
 
2055
2080
  try:
2056
- deployments = self.api_client.get_cloud_deployments_api_v2_clouds_cloud_id_deployments_get(
2081
+ cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
2057
2082
  cloud_id=cloud_id,
2058
2083
  ).results
2059
2084
  except Exception as e: # noqa: BLE001
2060
- self.log.error(f"Failed to retrieve cloud deployments: {e}")
2085
+ self.log.error(f"Failed to retrieve cloud resources: {e}")
2061
2086
  return False
2062
2087
 
2063
- if not deployments:
2064
- self.log.error("No cloud deployments found for this cloud")
2088
+ if not cloud_resources:
2089
+ self.log.error("No cloud resources found for this cloud")
2065
2090
  return False
2066
2091
 
2067
2092
  self.cloud_event_producer.init_trace_context(
@@ -2073,31 +2098,35 @@ class CloudController(BaseController):
2073
2098
  CloudAnalyticsEventName.COMMAND_START, succeeded=True
2074
2099
  )
2075
2100
 
2076
- deployment_results = []
2077
- for deployment in deployments:
2101
+ cloud_resource_results = []
2102
+ for cloud_resource in cloud_resources:
2078
2103
  try:
2079
- deployment_name = deployment.name or deployment.cloud_deployment_id
2104
+ cloud_resource_name = (
2105
+ cloud_resource.name or cloud_resource.cloud_resource_id
2106
+ )
2080
2107
 
2081
- self.log.info(f"Verifying deployment: {deployment_name}")
2108
+ self.log.info(f"Verifying cloud resource: {cloud_resource_name}")
2082
2109
  result = self.verify_cloud_deployment(
2083
2110
  cloud_id,
2084
- deployment,
2111
+ cloud_resource,
2085
2112
  strict=strict,
2086
2113
  _use_strict_iam_permissions=_use_strict_iam_permissions,
2087
2114
  boto3_session=boto3_session,
2088
2115
  )
2089
- deployment_results.append((deployment_name, result))
2116
+ cloud_resource_results.append((cloud_resource_name, result))
2090
2117
 
2091
2118
  except (ValueError, TypeError, KeyError, AttributeError, RuntimeError) as e:
2092
- deployment_name = getattr(deployment, "name", None) or getattr(
2093
- deployment, "cloud_deployment_id", "unknown"
2119
+ cloud_resource_name = getattr(cloud_resource, "name", None) or getattr(
2120
+ cloud_resource, "cloud_resource_id", "unknown"
2121
+ )
2122
+ self.log.error(
2123
+ f"Failed to verify cloud resource {cloud_resource_name}: {e}"
2094
2124
  )
2095
- self.log.error(f"Failed to verify deployment {deployment_name}: {e}")
2096
- deployment_results.append((deployment_name, False))
2125
+ cloud_resource_results.append((cloud_resource_name, False))
2097
2126
 
2098
- self._print_deployment_verification_results(deployment_results)
2127
+ self._print_cloud_resource_verification_results(cloud_resource_results)
2099
2128
 
2100
- overall_success = all(result for _, result in deployment_results)
2129
+ overall_success = all(result for _, result in cloud_resource_results)
2101
2130
 
2102
2131
  self.cloud_event_producer.produce(
2103
2132
  CloudAnalyticsEventName.RESOURCES_VERIFIED, succeeded=overall_success,
@@ -2154,6 +2183,7 @@ class CloudController(BaseController):
2154
2183
  strict: bool = False,
2155
2184
  _use_strict_iam_permissions: bool = False, # This should only be used in testing.
2156
2185
  boto3_session: Optional[boto3.Session] = None,
2186
+ logger: CloudSetupLogger = None,
2157
2187
  ) -> bool:
2158
2188
  assert cloud_deployment.region
2159
2189
  assert cloud_deployment.aws_config
@@ -2193,6 +2223,7 @@ class CloudController(BaseController):
2193
2223
  == NetworkingMode.PRIVATE,
2194
2224
  strict=strict,
2195
2225
  _use_strict_iam_permissions=_use_strict_iam_permissions,
2226
+ logger=logger,
2196
2227
  )
2197
2228
 
2198
2229
  def _get_memorydb_config_for_verification(
@@ -2482,26 +2513,26 @@ class CloudController(BaseController):
2482
2513
  f"{quota_error_str}\n\nFor instructions on how to increase quotas, visit this link: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html#request-increase"
2483
2514
  )
2484
2515
 
2485
- def _print_deployment_verification_results(
2486
- self, deployment_results: List[Tuple[str, bool]]
2516
+ def _print_cloud_resource_verification_results(
2517
+ self, cloud_resource_results: List[Tuple[str, bool]]
2487
2518
  ) -> None:
2488
- """Print verification results for multiple deployments"""
2519
+ """Print verification results for multiple cloud resources"""
2489
2520
  self.log.info("=" * 60)
2490
- self.log.info("DEPLOYMENT VERIFICATION RESULTS:")
2521
+ self.log.info("CLOUD RESOURCE VERIFICATION RESULTS:")
2491
2522
  self.log.info("=" * 60)
2492
2523
 
2493
- for deployment_name, success in deployment_results:
2524
+ for cloud_resource_name, success in cloud_resource_results:
2494
2525
  status = "PASSED" if success else "FAILED"
2495
- self.log.info(f"{deployment_name}: {status}")
2526
+ self.log.info(f"{cloud_resource_name}: {status}")
2496
2527
 
2497
2528
  self.log.info("=" * 60)
2498
2529
 
2499
- passed_count = sum(1 for _, success in deployment_results if success)
2500
- total_count = len(deployment_results)
2530
+ passed_count = sum(1 for _, success in cloud_resource_results if success)
2531
+ total_count = len(cloud_resource_results)
2501
2532
 
2502
2533
  if passed_count == total_count:
2503
2534
  self.log.info(
2504
- f"Overall Result: ALL {total_count} deployments verified successfully"
2535
+ f"Overall Result: ALL {total_count} cloud resources verified successfully"
2505
2536
  )
2506
2537
 
2507
2538
  def register_azure_or_generic_cloud( # noqa: PLR0913
@@ -2517,6 +2548,8 @@ class CloudController(BaseController):
2517
2548
  cloud_storage_bucket_region: Optional[str] = None,
2518
2549
  nfs_mount_targets: Optional[List[str]] = None,
2519
2550
  nfs_mount_path: Optional[str] = None,
2551
+ persistent_volume_claim: Optional[str] = None,
2552
+ csi_ephemeral_volume_driver: Optional[str] = None,
2520
2553
  kubernetes_zones: Optional[List[str]] = None,
2521
2554
  anyscale_operator_iam_identity: Optional[str] = None,
2522
2555
  ) -> None:
@@ -2576,19 +2609,30 @@ class CloudController(BaseController):
2576
2609
  # Attempt to create the cloud resource.
2577
2610
  try:
2578
2611
  with self.log.spinner("Registering Anyscale cloud resources..."):
2579
- cloud_resource = self.api_client.update_cloud_with_cloud_resource_api_v2_clouds_with_cloud_resource_router_cloud_id_put(
2612
+ self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
2580
2613
  cloud_id=cloud_id,
2581
- update_cloud_with_cloud_resource=UpdateCloudWithCloudResource(
2582
- cloud_resource_to_update=CreateCloudResource(
2583
- compute_stack=ComputeStack.K8S,
2584
- kubernetes_zones=kubernetes_zones,
2585
- cloud_storage_bucket_name=cloud_storage_bucket_name,
2586
- cloud_storage_bucket_endpoint=cloud_storage_bucket_endpoint,
2587
- cloud_storage_bucket_region=cloud_storage_bucket_region
2588
- or region,
2589
- nfs_mount_targets=mount_targets,
2590
- nfs_mount_path=nfs_mount_path,
2591
- ),
2614
+ cloud_deployment=CloudDeployment(
2615
+ compute_stack=ComputeStack.K8S,
2616
+ provider=cloud_provider,
2617
+ region=region,
2618
+ object_storage=ObjectStorage(
2619
+ bucket_name=cloud_storage_bucket_name,
2620
+ region=cloud_storage_bucket_region or region,
2621
+ endpoint=cloud_storage_bucket_endpoint,
2622
+ )
2623
+ if cloud_storage_bucket_name
2624
+ else None,
2625
+ file_storage=FileStorage(
2626
+ mount_targets=mount_targets,
2627
+ mount_path=nfs_mount_path,
2628
+ persistent_volume_claim=persistent_volume_claim,
2629
+ csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
2630
+ )
2631
+ if mount_targets
2632
+ or persistent_volume_claim
2633
+ or csi_ephemeral_volume_driver
2634
+ else None,
2635
+ kubernetes_config=KubernetesConfig(zones=kubernetes_zones,),
2592
2636
  ),
2593
2637
  )
2594
2638
 
@@ -2611,26 +2655,27 @@ class CloudController(BaseController):
2611
2655
  raise ClickException(f"Cloud registration failed! {e}")
2612
2656
 
2613
2657
  # TODO (shomilj): Fetch & optionally run the Helm installation here.
2614
- cloud_resource_id = cloud_resource.result.cloud_resource.id
2615
- # For Azure and generic providers, add CLI token to helm command
2616
- if provider in ["azure", "generic"]:
2617
- helm_command = self._generate_helm_upgrade_command(
2618
- provider=provider,
2619
- cloud_deployment_id=cloud_resource_id,
2620
- region=region if provider != "generic" else None,
2621
- kubernetes_zones=kubernetes_zones,
2622
- operator_iam_identity=anyscale_operator_iam_identity
2623
- if provider == "azure"
2624
- else None,
2625
- anyscale_cli_token=None, # TODO: use $ANYSCALE_CLI_TOKEN placeholder
2626
- )
2627
- else:
2628
- helm_command = self._generate_helm_upgrade_command(
2629
- provider=provider,
2630
- cloud_deployment_id=cloud_resource_id,
2631
- region=region,
2632
- kubernetes_zones=kubernetes_zones,
2633
- )
2658
+
2659
+ # Get the cloud resource ID to pass to the helm command.
2660
+ cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
2661
+ cloud_id=cloud_id,
2662
+ ).results
2663
+ assert (
2664
+ len(cloud_resources) == 1
2665
+ ), f"Expected 1 cloud resource, got {len(cloud_resources)}"
2666
+ cloud_resource_id = cloud_resources[0].cloud_resource_id
2667
+
2668
+ # Use CLI token to helm command
2669
+ helm_command = self._generate_helm_upgrade_command(
2670
+ provider=provider,
2671
+ cloud_deployment_id=cloud_resource_id,
2672
+ region=region if cloud_provider == CloudProviders.AZURE else None,
2673
+ kubernetes_zones=kubernetes_zones,
2674
+ operator_iam_identity=anyscale_operator_iam_identity
2675
+ if cloud_provider == CloudProviders.AZURE
2676
+ else None,
2677
+ anyscale_cli_token=None, # TODO: use $ANYSCALE_CLI_TOKEN placeholder
2678
+ )
2634
2679
 
2635
2680
  self.log.info(
2636
2681
  f"Cloud registration complete! To install the Anyscale operator, run:\n\n{helm_command}"
@@ -2661,6 +2706,8 @@ class CloudController(BaseController):
2661
2706
  compute_stack: ComputeStack = ComputeStack.VM,
2662
2707
  kubernetes_zones: Optional[List[str]] = None,
2663
2708
  anyscale_operator_iam_identity: Optional[str] = None,
2709
+ persistent_volume_claim: Optional[str] = None,
2710
+ csi_ephemeral_volume_driver: Optional[str] = None,
2664
2711
  ):
2665
2712
  functions_to_verify = self._validate_functional_verification_args(
2666
2713
  functional_verify
@@ -2721,55 +2768,40 @@ class CloudController(BaseController):
2721
2768
  raise
2722
2769
 
2723
2770
  try:
2724
- # The Anyscale IAM role is optional for the K8s stack.
2725
- has_anyscale_iam_role = compute_stack == ComputeStack.VM or (
2726
- compute_stack == ComputeStack.K8S and anyscale_iam_role_id
2727
- )
2728
-
2729
- iam_role_original_policy = None
2730
- if has_anyscale_iam_role:
2731
- # Update anyscale IAM role's assume policy to include the cloud id as the external ID
2732
- role, iam_role_original_policy = self.update_aws_anyscale_iam_role(
2733
- cloud_id=cloud_id,
2734
- region=region,
2771
+ cloud_resource = CloudDeployment(
2772
+ compute_stack=compute_stack,
2773
+ provider=CloudProviders.AWS,
2774
+ region=region,
2775
+ networking_mode=NetworkingMode.PRIVATE
2776
+ if private_network
2777
+ else NetworkingMode.PUBLIC,
2778
+ object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
2779
+ file_storage=FileStorage(
2780
+ file_storage_id=efs_id,
2781
+ persistent_volume_claim=persistent_volume_claim,
2782
+ csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
2783
+ )
2784
+ if efs_id or persistent_volume_claim or csi_ephemeral_volume_driver
2785
+ else None,
2786
+ aws_config=AWSConfig(
2787
+ vpc_id=vpc_id,
2788
+ subnet_ids=subnet_ids,
2789
+ security_group_ids=security_group_ids,
2735
2790
  anyscale_iam_role_id=anyscale_iam_role_id,
2736
2791
  external_id=external_id,
2792
+ cluster_iam_role_id=instance_iam_role_id,
2793
+ memorydb_cluster_name=memorydb_cluster_id,
2794
+ ),
2795
+ kubernetes_config=KubernetesConfig(
2796
+ anyscale_operator_iam_identity=anyscale_operator_iam_identity,
2797
+ zones=kubernetes_zones,
2737
2798
  )
2738
-
2739
- # When running on the VM compute stack, validate and retrieve the EFS mount target IP.
2740
- # When running on the K8S compute stack, EFS is optional; if efs_id is provided, then
2741
- # validate and retrieve the EFS mount target IP.
2742
- if efs_id:
2743
- try:
2744
- boto3_session = boto3.Session(region_name=region)
2745
- aws_efs_mount_target_ip = _get_aws_efs_mount_target_ip(
2746
- boto3_session, efs_id
2747
- )
2748
- except ClientError as e:
2749
- self.log.log_resource_exception(
2750
- CloudAnalyticsEventCloudResource.AWS_EFS, e
2751
- )
2752
- raise e
2753
- else:
2754
- boto3_session = None
2755
- aws_efs_mount_target_ip = None
2756
-
2757
- # When running on the VM compute stack, associate the AWS subnets with their availability zones.
2758
- if compute_stack == ComputeStack.VM:
2759
- aws_subnet_ids_with_availability_zones = associate_aws_subnets_with_azs(
2760
- subnet_ids, region, self.log
2761
- )
2762
- else:
2763
- aws_subnet_ids_with_availability_zones = None
2764
-
2765
- # If memorydb cluster is provided, get the memorydb cluster config.
2766
- if memorydb_cluster_id is not None:
2767
- memorydb_cluster_config = _get_memorydb_cluster_config(
2768
- memorydb_cluster_id, region, self.log
2769
- )
2770
- else:
2771
- memorydb_cluster_config = None
2772
-
2799
+ if compute_stack == ComputeStack.K8S
2800
+ else None,
2801
+ )
2802
+ role, iam_role_original_policy = self._preprocess_aws(
2803
+ cloud_id=cloud_id, deployment=cloud_resource
2804
+ )
2773
2805
  self.cloud_event_producer.produce(
2774
2806
  CloudAnalyticsEventName.PREPROCESS_COMPLETE, succeeded=True
2775
2807
  )
@@ -2803,44 +2835,17 @@ class CloudController(BaseController):
2803
2835
  )
2804
2836
  raise ClickException(f"Cloud registration failed! {error}")
2805
2837
 
2806
- aws_iam_role_arns = None
2807
- if compute_stack == ComputeStack.VM:
2808
- aws_iam_role_arns = [anyscale_iam_role_id, instance_iam_role_id]
2809
- elif compute_stack == ComputeStack.K8S and anyscale_iam_role_id:
2810
- aws_iam_role_arns = [anyscale_iam_role_id]
2811
-
2812
2838
  try:
2813
2839
  # Verify cloud resources meet our requirement
2814
- create_cloud_resource = CreateCloudResource(
2815
- aws_vpc_id=vpc_id,
2816
- aws_subnet_ids_with_availability_zones=aws_subnet_ids_with_availability_zones,
2817
- aws_iam_role_arns=aws_iam_role_arns,
2818
- aws_security_groups=security_group_ids,
2819
- aws_s3_id=cloud_storage_bucket_name[len(S3_STORAGE_PREFIX) :],
2820
- aws_efs_id=efs_id,
2821
- aws_efs_mount_target_ip=aws_efs_mount_target_ip,
2822
- memorydb_cluster_config=memorydb_cluster_config,
2823
- compute_stack=compute_stack,
2824
- kubernetes_zones=kubernetes_zones,
2825
- kubernetes_dataplane_identity=anyscale_operator_iam_identity,
2826
- cloud_storage_bucket_name=cloud_storage_bucket_name,
2827
- )
2828
-
2829
2840
  # Verification is only performed for VM compute stack.
2830
2841
  # TODO (shomilj): Add verification to the K8S compute stack as well.
2831
2842
  if compute_stack == ComputeStack.VM:
2832
2843
  with self.log.spinner("Verifying cloud resources...") as spinner:
2833
- if boto3_session is None:
2834
- boto3_session = boto3.Session(region_name=region)
2835
2844
  if (
2836
2845
  not skip_verifications
2837
- and not self.verify_aws_cloud_resources_for_create_cloud_resource(
2838
- cloud_resource=create_cloud_resource,
2839
- boto3_session=boto3_session,
2840
- region=region,
2841
- is_bring_your_own_resource=True,
2842
- is_private_network=private_network,
2846
+ and not self.verify_aws_cloud_resources_for_cloud_deployment(
2843
2847
  cloud_id=cloud_id,
2848
+ cloud_deployment=cloud_resource,
2844
2849
  logger=CloudSetupLogger(spinner_manager=spinner),
2845
2850
  )
2846
2851
  ):
@@ -2888,12 +2893,9 @@ class CloudController(BaseController):
2888
2893
  with self.log.spinner(
2889
2894
  "Updating Anyscale cloud with cloud resource..."
2890
2895
  ) as spinner:
2891
- # update cloud with verified cloud resources
2892
- cloud_resource = self.api_client.update_cloud_with_cloud_resource_api_v2_clouds_with_cloud_resource_router_cloud_id_put(
2893
- cloud_id=cloud_id,
2894
- update_cloud_with_cloud_resource=UpdateCloudWithCloudResource(
2895
- cloud_resource_to_update=create_cloud_resource,
2896
- ),
2896
+ # Update cloud with verified cloud resources.
2897
+ self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
2898
+ cloud_id=cloud_id, cloud_deployment=cloud_resource,
2897
2899
  )
2898
2900
  # For now, only wait for the cloud to be active if the compute stack is VM.
2899
2901
  # TODO (shomilj): support this fully for Kubernetes after provider metadata
@@ -2901,7 +2903,15 @@ class CloudController(BaseController):
2901
2903
  if compute_stack == ComputeStack.VM:
2902
2904
  self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.AWS)
2903
2905
  if compute_stack == ComputeStack.K8S:
2904
- cloud_resource_id = cloud_resource.result.cloud_resource.id
2906
+ # Get the cloud resource ID to pass to the helm command.
2907
+ cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
2908
+ cloud_id=cloud_id,
2909
+ ).results
2910
+ assert (
2911
+ len(cloud_resources) == 1
2912
+ ), f"Expected 1 cloud resource, got {len(cloud_resources)}"
2913
+ cloud_resource_id = cloud_resources[0].cloud_resource_id
2914
+
2905
2915
  helm_command = self._generate_helm_upgrade_command(
2906
2916
  provider="aws",
2907
2917
  cloud_deployment_id=cloud_resource_id,
@@ -2947,7 +2957,12 @@ class CloudController(BaseController):
2947
2957
  ).start_verification(cloud_id, CloudProviders.AWS, functions_to_verify, yes)
2948
2958
 
2949
2959
  def verify_gcp_cloud_resources_from_cloud_deployment(
2950
- self, cloud_id: str, cloud_deployment: CloudDeployment, strict: bool = False
2960
+ self,
2961
+ cloud_id: str,
2962
+ cloud_deployment: CloudDeployment,
2963
+ strict: bool = False,
2964
+ yes: bool = False,
2965
+ is_private_service_cloud: bool = False,
2951
2966
  ) -> bool:
2952
2967
  assert cloud_deployment.region
2953
2968
  assert cloud_deployment.gcp_config
@@ -2979,6 +2994,8 @@ class CloudController(BaseController):
2979
2994
  cloud_id=cloud_id,
2980
2995
  host_project_id=gcp_config.host_project_id,
2981
2996
  strict=strict,
2997
+ yes=yes,
2998
+ is_private_service_cloud=is_private_service_cloud,
2982
2999
  )
2983
3000
 
2984
3001
  def verify_gcp_cloud_resources_from_create_cloud_resource(
@@ -3187,11 +3204,12 @@ class CloudController(BaseController):
3187
3204
  compute_stack: ComputeStack = ComputeStack.VM,
3188
3205
  kubernetes_zones: Optional[List[str]] = None,
3189
3206
  anyscale_operator_iam_identity: Optional[str] = None,
3207
+ persistent_volume_claim: Optional[str] = None,
3208
+ csi_ephemeral_volume_driver: Optional[str] = None,
3190
3209
  ):
3191
3210
  functions_to_verify = self._validate_functional_verification_args(
3192
3211
  functional_verify
3193
3212
  )
3194
- gcp_utils = try_import_gcp_utils()
3195
3213
 
3196
3214
  # Create a cloud without cloud resources first
3197
3215
  # Provider ID is optional for K8s clouds.
@@ -3267,11 +3285,6 @@ class CloudController(BaseController):
3267
3285
  raise
3268
3286
 
3269
3287
  try:
3270
- # Set defaults for Kubernetes clouds.
3271
- if compute_stack == ComputeStack.K8S:
3272
- instance_service_account_email = ""
3273
- subnet_names = []
3274
-
3275
3288
  enable_filestore = filestore_location and filestore_instance_id
3276
3289
 
3277
3290
  # Normally, for Kubernetes clouds, we don't need a VPC name, since networking is managed by Kubernetes.
@@ -3285,72 +3298,62 @@ class CloudController(BaseController):
3285
3298
  if (enable_filestore or memorystore_instance_name) and not project_id:
3286
3299
  raise ClickException("Please provide a project ID.")
3287
3300
 
3288
- if project_id:
3289
- factory = gcp_utils.get_google_cloud_client_factory(
3290
- self.log, project_id
3291
- )
3292
-
3293
- if enable_filestore:
3294
- filestore_config = gcp_utils.get_gcp_filestore_config(
3295
- factory,
3296
- project_id,
3297
- vpc_name,
3298
- filestore_location,
3299
- filestore_instance_id,
3300
- self.log,
3301
- )
3302
- else:
3303
- filestore_config = GCPFileStoreConfig(
3304
- instance_name="", mount_target_ip="", root_dir=""
3305
- )
3306
- if compute_stack == ComputeStack.K8S:
3307
- # Set vpc_name to empty string for Kubernetes clouds
3308
- vpc_name = ""
3309
-
3310
- if memorystore_instance_name:
3311
- memorystore_instance_config = gcp_utils.get_gcp_memorystore_config(
3312
- factory, memorystore_instance_name
3313
- )
3314
- else:
3315
- memorystore_instance_config = None
3316
-
3317
3301
  if not cloud_storage_bucket_name.startswith(GCS_STORAGE_PREFIX):
3318
3302
  cloud_storage_bucket_name = (
3319
3303
  GCS_STORAGE_PREFIX + cloud_storage_bucket_name
3320
3304
  )
3321
3305
 
3322
- # Verify cloud resources meet our requirement
3323
- create_cloud_resource_gcp = CreateCloudResourceGCP(
3324
- gcp_vpc_id=vpc_name,
3325
- gcp_subnet_ids=subnet_names,
3326
- gcp_cluster_node_service_account_email=instance_service_account_email,
3327
- gcp_anyscale_iam_service_account_email=anyscale_service_account_email
3328
- or "",
3329
- gcp_filestore_config=filestore_config,
3330
- gcp_firewall_policy_ids=firewall_policy_names,
3331
- gcp_cloud_storage_bucket_id=cloud_storage_bucket_name[
3332
- len(GCS_STORAGE_PREFIX) :
3333
- ],
3334
- memorystore_instance_config=memorystore_instance_config,
3306
+ cloud_resource = CloudDeployment(
3335
3307
  compute_stack=compute_stack,
3336
- kubernetes_zones=kubernetes_zones,
3337
- kubernetes_dataplane_identity=anyscale_operator_iam_identity,
3338
- cloud_storage_bucket_name=cloud_storage_bucket_name,
3308
+ provider=CloudProviders.GCP,
3309
+ region=region,
3310
+ networking_mode=NetworkingMode.PRIVATE
3311
+ if private_network
3312
+ else NetworkingMode.PUBLIC,
3313
+ object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
3314
+ file_storage=FileStorage(
3315
+ file_storage_id="projects/{}/locations/{}/instances/{}".format(
3316
+ project_id, filestore_location, filestore_instance_id
3317
+ )
3318
+ if filestore_instance_id
3319
+ else None,
3320
+ persistent_volume_claim=persistent_volume_claim,
3321
+ csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
3322
+ )
3323
+ if filestore_instance_id
3324
+ or persistent_volume_claim
3325
+ or csi_ephemeral_volume_driver
3326
+ else None,
3327
+ gcp_config=GCPConfig(
3328
+ project_id=project_id,
3329
+ host_project_id=host_project_id,
3330
+ provider_name=provider_id,
3331
+ vpc_name=vpc_name,
3332
+ subnet_names=subnet_names,
3333
+ firewall_policy_names=firewall_policy_names,
3334
+ anyscale_service_account_email=anyscale_service_account_email,
3335
+ cluster_service_account_email=instance_service_account_email,
3336
+ memorystore_instance_name=memorystore_instance_name,
3337
+ ),
3338
+ kubernetes_config=KubernetesConfig(
3339
+ anyscale_operator_iam_identity=anyscale_operator_iam_identity,
3340
+ zones=kubernetes_zones,
3341
+ )
3342
+ if compute_stack == ComputeStack.K8S
3343
+ else None,
3339
3344
  )
3340
3345
 
3346
+ self._preprocess_gcp(cloud_resource)
3347
+
3341
3348
  # Verification is only performed for VM compute stack.
3342
3349
  # TODO (shomilj): Add verification to the K8S compute stack as well.
3343
3350
  if compute_stack == ComputeStack.VM:
3344
3351
  if (
3345
3352
  not skip_verifications
3346
- and not self.verify_gcp_cloud_resources_from_create_cloud_resource(
3347
- cloud_resource=create_cloud_resource_gcp,
3348
- project_id=project_id,
3349
- host_project_id=host_project_id,
3350
- region=region,
3353
+ and not self.verify_gcp_cloud_resources_from_cloud_deployment(
3351
3354
  cloud_id=cloud_id,
3355
+ cloud_deployment=cloud_resource,
3352
3356
  yes=yes,
3353
- factory=factory,
3354
3357
  is_private_service_cloud=is_private_service_cloud,
3355
3358
  )
3356
3359
  ):
@@ -3384,13 +3387,10 @@ class CloudController(BaseController):
3384
3387
  raise ClickException(f"Cloud registration failed! {e}")
3385
3388
 
3386
3389
  try:
3387
- # update cloud with verified cloud resources
3388
3390
  with self.log.spinner("Updating Anyscale cloud with cloud resources..."):
3389
- cloud_resource = self.api_client.update_cloud_with_cloud_resource_api_v2_clouds_with_cloud_resource_gcp_router_cloud_id_put(
3390
- cloud_id=cloud_id,
3391
- update_cloud_with_cloud_resource_gcp=UpdateCloudWithCloudResourceGCP(
3392
- cloud_resource_to_update=create_cloud_resource_gcp,
3393
- ),
3391
+ # Update cloud with verified cloud resources.
3392
+ self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
3393
+ cloud_id=cloud_id, cloud_deployment=cloud_resource,
3394
3394
  )
3395
3395
  # For now, only wait for the cloud to be active if the compute stack is VM.
3396
3396
  # TODO (shomilj): support this fully for Kubernetes after provider metadata
@@ -3398,7 +3398,15 @@ class CloudController(BaseController):
3398
3398
  if compute_stack == ComputeStack.VM:
3399
3399
  self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.GCP)
3400
3400
  if compute_stack == ComputeStack.K8S:
3401
- cloud_resource_id = cloud_resource.result.cloud_resource.id
3401
+ # Get the cloud resource ID to pass to the helm command.
3402
+ cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
3403
+ cloud_id=cloud_id,
3404
+ ).results
3405
+ assert (
3406
+ len(cloud_resources) == 1
3407
+ ), f"Expected 1 cloud resource, got {len(cloud_resources)}"
3408
+ cloud_resource_id = cloud_resources[0].cloud_resource_id
3409
+
3402
3410
  helm_command = self._generate_helm_upgrade_command(
3403
3411
  provider="gcp",
3404
3412
  cloud_deployment_id=cloud_resource_id,
@@ -3518,13 +3526,6 @@ class CloudController(BaseController):
3518
3526
  )
3519
3527
 
3520
3528
  cloud = response.result
3521
- except ClickException as e:
3522
- raise ClickException(
3523
- f"Failed to update cloud state to deleting for cloud {cloud_name}: {e}"
3524
- )
3525
-
3526
- # Clean up cloud resources
3527
- try:
3528
3529
  if cloud_provider == CloudProviders.AWS:
3529
3530
  if not (cloud.is_aioa or cloud.compute_stack == ComputeStack.K8S):
3530
3531
  # Delete services resources