anyscale 0.26.51__py3-none-any.whl → 0.26.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/_private/anyscale_client/README.md +1 -1
- anyscale/_private/anyscale_client/anyscale_client.py +178 -46
- anyscale/_private/anyscale_client/common.py +61 -2
- anyscale/_private/anyscale_client/fake_anyscale_client.py +145 -8
- anyscale/_private/docgen/__main__.py +42 -31
- anyscale/_private/docgen/generator.py +63 -28
- anyscale/_private/docgen/models.md +4 -2
- anyscale/_private/sdk/__init__.py +124 -1
- anyscale/_private/workload/workload_config.py +4 -6
- anyscale/_private/workload/workload_sdk.py +105 -12
- anyscale/client/README.md +13 -11
- anyscale/client/openapi_client/__init__.py +3 -3
- anyscale/client/openapi_client/api/default_api.py +512 -316
- anyscale/client/openapi_client/models/__init__.py +3 -3
- anyscale/client/openapi_client/models/aws_config.py +2 -2
- anyscale/client/openapi_client/models/baseimagesenum.py +158 -1
- anyscale/client/openapi_client/models/cloud_data_bucket_presigned_url_request.py +31 -3
- anyscale/client/openapi_client/models/cloud_deployment.py +37 -36
- anyscale/client/openapi_client/models/cloud_resource.py +59 -3
- anyscale/client/openapi_client/models/cloud_resource_gcp.py +59 -3
- anyscale/client/openapi_client/models/create_cloud_resource.py +59 -3
- anyscale/client/openapi_client/models/create_cloud_resource_gcp.py +59 -3
- anyscale/client/openapi_client/models/create_resource_notification.py +31 -3
- anyscale/client/openapi_client/models/{decorated_cloud_deployment.py → decorated_cloud_resource.py} +124 -96
- anyscale/client/openapi_client/models/{clouddeployment_list_response.py → decoratedcloudresource_list_response.py} +15 -15
- anyscale/client/openapi_client/models/{decoratedclouddeployment_response.py → decoratedcloudresource_response.py} +11 -11
- anyscale/client/openapi_client/models/file_storage.py +4 -4
- anyscale/client/openapi_client/models/gcp_config.py +2 -2
- anyscale/client/openapi_client/models/ha_job_error_types.py +9 -2
- anyscale/client/openapi_client/models/object_storage.py +4 -4
- anyscale/client/openapi_client/models/ray_runtime_env_config.py +57 -1
- anyscale/client/openapi_client/models/resource_alert_event_type.py +2 -1
- anyscale/client/openapi_client/models/resource_notification.py +29 -1
- anyscale/client/openapi_client/models/supportedbaseimagesenum.py +155 -1
- anyscale/client/openapi_client/models/workload_info.py +31 -3
- anyscale/client/openapi_client/models/workload_state_info.py +29 -1
- anyscale/cloud/models.py +40 -43
- anyscale/commands/cloud_commands.py +93 -88
- anyscale/commands/command_examples.py +37 -49
- anyscale/commands/exec_commands.py +12 -1
- anyscale/commands/list_commands.py +42 -12
- anyscale/commands/project_commands.py +399 -115
- anyscale/commands/schedule_commands.py +22 -11
- anyscale/commands/service_commands.py +11 -6
- anyscale/commands/util.py +94 -1
- anyscale/commands/workspace_commands.py +92 -38
- anyscale/compute_config/__init__.py +1 -1
- anyscale/compute_config/_private/compute_config_sdk.py +8 -11
- anyscale/compute_config/commands.py +3 -3
- anyscale/compute_config/models.py +30 -30
- anyscale/controllers/cloud_controller.py +361 -360
- anyscale/controllers/kubernetes_verifier.py +1 -1
- anyscale/job/_private/job_sdk.py +41 -23
- anyscale/job/models.py +1 -1
- anyscale/project/__init__.py +101 -1
- anyscale/project/_private/project_sdk.py +90 -2
- anyscale/project/commands.py +188 -1
- anyscale/project/models.py +198 -2
- anyscale/sdk/anyscale_client/models/baseimagesenum.py +158 -1
- anyscale/sdk/anyscale_client/models/ray_runtime_env_config.py +57 -1
- anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +155 -1
- anyscale/service/_private/service_sdk.py +2 -1
- anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
- anyscale/util.py +3 -0
- anyscale/utils/runtime_env.py +3 -1
- anyscale/version.py +1 -1
- anyscale/workspace/commands.py +114 -23
- anyscale/workspace/models.py +3 -5
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/METADATA +1 -1
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/RECORD +75 -75
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/WHEEL +0 -0
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.51.dist-info → anyscale-0.26.53.dist-info}/top_level.txt +0 -0
@@ -42,11 +42,13 @@ from anyscale.client.openapi_client.models import (
|
|
42
42
|
ComputeStack,
|
43
43
|
CreateCloudResource,
|
44
44
|
CreateCloudResourceGCP,
|
45
|
+
DecoratedCloudResource,
|
45
46
|
EditableCloudResource,
|
46
47
|
EditableCloudResourceGCP,
|
47
48
|
FileStorage,
|
48
49
|
GCPConfig,
|
49
50
|
GCPFileStoreConfig,
|
51
|
+
KubernetesConfig,
|
50
52
|
NetworkingMode,
|
51
53
|
NFSMountTarget,
|
52
54
|
ObjectStorage,
|
@@ -1425,18 +1427,6 @@ class CloudController(BaseController):
|
|
1425
1427
|
cloud_id, CloudProviders.AWS, functions_to_verify, yes,
|
1426
1428
|
)
|
1427
1429
|
|
1428
|
-
def get_cloud_deployment(
|
1429
|
-
self, cloud_id: str, cloud_deployment_id: str
|
1430
|
-
) -> CloudDeployment:
|
1431
|
-
try:
|
1432
|
-
return self.api_client.get_cloud_deployment_api_v2_clouds_cloud_id_deployment_get(
|
1433
|
-
cloud_id=cloud_id, cloud_deployment_id=cloud_deployment_id,
|
1434
|
-
).result
|
1435
|
-
except Exception as e: # noqa: BLE001
|
1436
|
-
raise ClickException(
|
1437
|
-
f"Failed to get cloud deployment {cloud_deployment_id} for cloud {cloud_id}. Error: {e}"
|
1438
|
-
)
|
1439
|
-
|
1440
1430
|
# Avoid displaying fields with empty values (since the values for optional fields default to None).
|
1441
1431
|
def _remove_empty_values(self, d):
|
1442
1432
|
if isinstance(d, dict):
|
@@ -1449,56 +1439,52 @@ class CloudController(BaseController):
|
|
1449
1439
|
return [self._remove_empty_values(v) for v in d]
|
1450
1440
|
return d
|
1451
1441
|
|
1452
|
-
def
|
1442
|
+
def get_decorated_cloud_resources(
|
1443
|
+
self, cloud_id: str
|
1444
|
+
) -> List[DecoratedCloudResource]:
|
1453
1445
|
cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(
|
1454
1446
|
cloud_id=cloud_id,
|
1455
1447
|
).result
|
1456
1448
|
|
1457
1449
|
if cloud.is_aioa:
|
1458
1450
|
raise ValueError(
|
1459
|
-
"Listing cloud
|
1451
|
+
"Listing cloud resources is only supported for customer-hosted clouds."
|
1460
1452
|
)
|
1461
1453
|
|
1462
1454
|
try:
|
1463
|
-
|
1455
|
+
return self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
1464
1456
|
cloud_id=cloud_id,
|
1465
1457
|
).results
|
1466
1458
|
except Exception as e: # noqa: BLE001
|
1467
1459
|
raise ClickException(
|
1468
|
-
f"Failed to get cloud
|
1460
|
+
f"Failed to get cloud resources for cloud {cloud.name} ({cloud_id}). Error: {e}"
|
1469
1461
|
)
|
1470
1462
|
|
1471
|
-
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1477
|
-
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
) ->
|
1483
|
-
|
1484
|
-
|
1485
|
-
|
1486
|
-
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
for deployment in deployments:
|
1498
|
-
if deployment.get("name") == cloud_deployment_name:
|
1499
|
-
return deployment
|
1500
|
-
|
1501
|
-
raise ClickException(f"Cloud deployment {cloud_deployment_name} not found.")
|
1463
|
+
def get_formatted_cloud_resources(self, cloud_id: str) -> List[Any]:
|
1464
|
+
cloud_resources = self.get_decorated_cloud_resources(cloud_id)
|
1465
|
+
formatted_cloud_resources = [
|
1466
|
+
self._remove_empty_values(cloud_resource.to_dict())
|
1467
|
+
for cloud_resource in cloud_resources
|
1468
|
+
]
|
1469
|
+
# Remove the deprecated cloud_deployment_id field.
|
1470
|
+
for d in formatted_cloud_resources:
|
1471
|
+
d.pop("cloud_deployment_id", None)
|
1472
|
+
return formatted_cloud_resources
|
1473
|
+
|
1474
|
+
def get_cloud_resources(self, cloud_id: str) -> List[CloudDeployment]:
|
1475
|
+
decorated_cloud_resources = self.get_decorated_cloud_resources(cloud_id)
|
1476
|
+
|
1477
|
+
# DecoratedCloudResource has extra fields that are not in CloudDeployment.
|
1478
|
+
allowed_keys = set(CloudDeployment.attribute_map.keys())
|
1479
|
+
allowed_keys.remove(
|
1480
|
+
"cloud_deployment_id"
|
1481
|
+
) # Remove deprecated cloud_deployment_id field.
|
1482
|
+
return [
|
1483
|
+
CloudDeployment(
|
1484
|
+
**{k: v for k, v in resource.to_dict().items() if k in allowed_keys}
|
1485
|
+
)
|
1486
|
+
for resource in decorated_cloud_resources
|
1487
|
+
]
|
1502
1488
|
|
1503
1489
|
def update_aws_anyscale_iam_role(
|
1504
1490
|
self,
|
@@ -1564,7 +1550,7 @@ class CloudController(BaseController):
|
|
1564
1550
|
|
1565
1551
|
return role, iam_role_original_policy
|
1566
1552
|
|
1567
|
-
def _generate_diff(self, existing:
|
1553
|
+
def _generate_diff(self, existing: List[Any], new: List[Any]) -> str:
|
1568
1554
|
"""
|
1569
1555
|
Generates a diff between the existing and new dicts.
|
1570
1556
|
"""
|
@@ -1590,48 +1576,59 @@ class CloudController(BaseController):
|
|
1590
1576
|
|
1591
1577
|
return formatted_diff.strip()
|
1592
1578
|
|
1593
|
-
|
1579
|
+
# Returns the role and original IAM policy, so that we can revert it if creating the cloud resource fails.
|
1580
|
+
def _preprocess_aws( # noqa: PLR0912
|
1581
|
+
self, cloud_id: str, deployment: CloudDeployment
|
1582
|
+
) -> Tuple[Optional[Boto3Resource], Optional[str]]:
|
1594
1583
|
if not deployment.aws_config and not deployment.file_storage:
|
1595
|
-
return
|
1584
|
+
return None, None
|
1596
1585
|
|
1597
1586
|
if not validate_aws_credentials(self.log):
|
1598
1587
|
raise ClickException(
|
1599
|
-
"Updating cloud
|
1588
|
+
"Updating cloud resources requires valid AWS credentials to be set locally. Learn more: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html"
|
1600
1589
|
)
|
1601
1590
|
|
1591
|
+
role, iam_role_original_policy = None, None
|
1592
|
+
|
1602
1593
|
# Get EFS mount target IP.
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1594
|
+
file_storage = None
|
1595
|
+
if deployment.file_storage:
|
1596
|
+
if isinstance(deployment.file_storage, dict):
|
1597
|
+
file_storage = FileStorage(**deployment.file_storage)
|
1598
|
+
else:
|
1599
|
+
file_storage = deployment.file_storage
|
1608
1600
|
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
if not efs_mount_target_ip:
|
1615
|
-
raise ClickException(
|
1616
|
-
f"EFS mount target IP not found for {file_storage.file_storage_id}."
|
1601
|
+
if file_storage.file_storage_id:
|
1602
|
+
try:
|
1603
|
+
boto3_session = boto3.Session(region_name=deployment.region)
|
1604
|
+
efs_mount_target_ip = _get_aws_efs_mount_target_ip(
|
1605
|
+
boto3_session, file_storage.file_storage_id,
|
1617
1606
|
)
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1607
|
+
if not efs_mount_target_ip:
|
1608
|
+
raise ClickException(
|
1609
|
+
f"EFS mount target IP not found for {file_storage.file_storage_id}."
|
1610
|
+
)
|
1611
|
+
file_storage.mount_targets = [
|
1612
|
+
NFSMountTarget(address=efs_mount_target_ip)
|
1613
|
+
]
|
1614
|
+
except ClientError as e:
|
1615
|
+
self.log.log_resource_exception(
|
1616
|
+
CloudAnalyticsEventCloudResource.AWS_EFS, e
|
1617
|
+
)
|
1618
|
+
raise e
|
1626
1619
|
|
1627
1620
|
deployment.file_storage = file_storage
|
1628
1621
|
|
1629
1622
|
if deployment.aws_config:
|
1630
|
-
|
1623
|
+
if isinstance(deployment.aws_config, dict):
|
1624
|
+
aws_config = AWSConfig(**deployment.aws_config)
|
1625
|
+
else:
|
1626
|
+
aws_config = deployment.aws_config
|
1627
|
+
|
1631
1628
|
assert deployment.region
|
1632
1629
|
|
1633
1630
|
# Update Anyscale IAM role's assume policy to include the cloud ID as the external ID.
|
1634
|
-
self.update_aws_anyscale_iam_role(
|
1631
|
+
role, iam_role_original_policy = self.update_aws_anyscale_iam_role(
|
1635
1632
|
cloud_id,
|
1636
1633
|
deployment.region,
|
1637
1634
|
aws_config.anyscale_iam_role_id,
|
@@ -1658,13 +1655,19 @@ class CloudController(BaseController):
|
|
1658
1655
|
|
1659
1656
|
deployment.aws_config = aws_config
|
1660
1657
|
|
1658
|
+
return role, iam_role_original_policy
|
1659
|
+
|
1661
1660
|
def _preprocess_gcp(
|
1662
1661
|
self, deployment: CloudDeployment,
|
1663
1662
|
):
|
1664
1663
|
if not deployment.gcp_config:
|
1665
1664
|
return
|
1666
1665
|
|
1667
|
-
|
1666
|
+
if isinstance(deployment.gcp_config, dict):
|
1667
|
+
gcp_config = GCPConfig(**deployment.gcp_config)
|
1668
|
+
else:
|
1669
|
+
gcp_config = deployment.gcp_config
|
1670
|
+
|
1668
1671
|
deployment.gcp_config = gcp_config
|
1669
1672
|
if not deployment.file_storage and not gcp_config.memorystore_instance_name:
|
1670
1673
|
return
|
@@ -1681,7 +1684,11 @@ class CloudController(BaseController):
|
|
1681
1684
|
|
1682
1685
|
# Get Filestore mount target IP and root dir.
|
1683
1686
|
if deployment.file_storage:
|
1684
|
-
|
1687
|
+
if isinstance(deployment.file_storage, dict):
|
1688
|
+
fs = FileStorage(**deployment.file_storage)
|
1689
|
+
else:
|
1690
|
+
fs = deployment.file_storage
|
1691
|
+
|
1685
1692
|
if fs.file_storage_id:
|
1686
1693
|
if not gcp_config.vpc_name:
|
1687
1694
|
raise ClickException(
|
@@ -1699,7 +1706,7 @@ class CloudController(BaseController):
|
|
1699
1706
|
NFSMountTarget(address=filestore_config.mount_target_ip)
|
1700
1707
|
]
|
1701
1708
|
|
1702
|
-
|
1709
|
+
deployment.file_storage = fs
|
1703
1710
|
|
1704
1711
|
# Get Memorystore config.
|
1705
1712
|
if gcp_config.memorystore_instance_name:
|
@@ -1711,7 +1718,7 @@ class CloudController(BaseController):
|
|
1711
1718
|
|
1712
1719
|
deployment.gcp_config = gcp_config
|
1713
1720
|
|
1714
|
-
def
|
1721
|
+
def create_cloud_resource(
|
1715
1722
|
self,
|
1716
1723
|
cloud_name: str,
|
1717
1724
|
spec_file: str,
|
@@ -1731,7 +1738,7 @@ class CloudController(BaseController):
|
|
1731
1738
|
try:
|
1732
1739
|
new_deployment = CloudDeployment(**spec)
|
1733
1740
|
except Exception as e: # noqa: BLE001
|
1734
|
-
raise ClickException(f"Failed to parse
|
1741
|
+
raise ClickException(f"Failed to parse cloud resource: {e}")
|
1735
1742
|
|
1736
1743
|
if new_deployment.provider == CloudProviders.AWS:
|
1737
1744
|
self._preprocess_aws(cloud_id=cloud_id, deployment=new_deployment)
|
@@ -1741,17 +1748,16 @@ class CloudController(BaseController):
|
|
1741
1748
|
if not skip_verification and not self.verify_cloud_deployment(
|
1742
1749
|
cloud_id=cloud_id, cloud_deployment=new_deployment
|
1743
1750
|
):
|
1744
|
-
raise ClickException("Cloud
|
1751
|
+
raise ClickException("Cloud resource verification failed.")
|
1745
1752
|
|
1746
1753
|
# Log an additional warning if a new deployment is being added but a deployment with the same AWS/GCP region already exists.
|
1747
|
-
|
1748
|
-
|
1749
|
-
|
1750
|
-
for deployment in existing_spec["deployments"]
|
1754
|
+
existing_resources = {
|
1755
|
+
resource.cloud_resource_id: resource
|
1756
|
+
for resource in self.get_cloud_resources(cloud_id)
|
1751
1757
|
}
|
1752
1758
|
existing_stack_provider_regions = {
|
1753
1759
|
(d.compute_stack, d.provider, d.region)
|
1754
|
-
for d in
|
1760
|
+
for d in existing_resources.values()
|
1755
1761
|
if d.provider in (CloudProviders.AWS, CloudProviders.GCP)
|
1756
1762
|
}
|
1757
1763
|
if (
|
@@ -1760,125 +1766,144 @@ class CloudController(BaseController):
|
|
1760
1766
|
new_deployment.region,
|
1761
1767
|
) in existing_stack_provider_regions:
|
1762
1768
|
self.log.warning(
|
1763
|
-
f"A {new_deployment.provider} {new_deployment.compute_stack}
|
1769
|
+
f"A {new_deployment.provider} {new_deployment.compute_stack} resource in region {new_deployment.region} already exists."
|
1764
1770
|
)
|
1765
|
-
confirm("Would you like to proceed with adding this
|
1771
|
+
confirm("Would you like to proceed with adding this cloud resource?", yes)
|
1766
1772
|
|
1767
|
-
# Add the
|
1773
|
+
# Add the resource.
|
1768
1774
|
try:
|
1769
|
-
self.api_client.
|
1775
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
1770
1776
|
cloud_id=cloud_id, cloud_deployment=new_deployment,
|
1771
1777
|
)
|
1772
1778
|
except Exception as e: # noqa: BLE001
|
1773
|
-
raise ClickException(f"Failed to add cloud
|
1779
|
+
raise ClickException(f"Failed to add cloud resource: {e}")
|
1774
1780
|
|
1775
1781
|
self.log.info(
|
1776
|
-
f"Successfully created cloud
|
1782
|
+
f"Successfully created cloud resource{' ' + new_deployment.name if new_deployment.name else ''} in cloud {cloud_name}!"
|
1777
1783
|
)
|
1778
1784
|
|
1779
|
-
def
|
1785
|
+
def update_cloud_resources( # noqa: PLR0912, C901
|
1780
1786
|
self,
|
1781
|
-
|
1782
|
-
|
1787
|
+
cloud_name: Optional[str],
|
1788
|
+
cloud_id: Optional[str],
|
1789
|
+
resources_file: str,
|
1783
1790
|
skip_verification: bool = False,
|
1784
1791
|
yes: bool = False,
|
1785
1792
|
):
|
1793
|
+
if not cloud_id:
|
1794
|
+
cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
|
1795
|
+
assert cloud_id
|
1796
|
+
|
1786
1797
|
# Read the spec file.
|
1787
|
-
path = pathlib.Path(
|
1798
|
+
path = pathlib.Path(resources_file)
|
1788
1799
|
if not path.exists():
|
1789
|
-
raise ClickException(f"{
|
1800
|
+
raise ClickException(f"{resources_file} does not exist.")
|
1790
1801
|
if not path.is_file():
|
1791
|
-
raise ClickException(f"{
|
1802
|
+
raise ClickException(f"{resources_file} is not a file.")
|
1792
1803
|
|
1793
1804
|
spec = yaml.safe_load(path.read_text())
|
1794
|
-
try:
|
1795
|
-
updated_deployment = CloudDeployment(**spec)
|
1796
|
-
except Exception as e: # noqa: BLE001
|
1797
|
-
raise ClickException(f"Failed to parse cloud deployment: {e}")
|
1798
1805
|
|
1799
|
-
|
1806
|
+
# Get the existing spec.
|
1807
|
+
existing_resources = self.get_cloud_resources(cloud_id=cloud_id)
|
1808
|
+
|
1809
|
+
if len(existing_resources) > len(spec):
|
1800
1810
|
raise ClickException(
|
1801
|
-
"
|
1811
|
+
"Please use `anyscale cloud resource delete` to remove cloud resources."
|
1802
1812
|
)
|
1803
|
-
|
1804
|
-
# Get the existing cloud deployment.
|
1805
|
-
cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud)
|
1806
|
-
existing_deployment = self.get_cloud_deployment(
|
1807
|
-
cloud_id=cloud_id,
|
1808
|
-
cloud_deployment_id=updated_deployment.cloud_deployment_id,
|
1809
|
-
)
|
1810
|
-
if (
|
1811
|
-
updated_deployment.provider == CloudProviders.PCP
|
1812
|
-
or existing_deployment.provider == CloudProviders.PCP
|
1813
|
-
):
|
1813
|
+
if len(existing_resources) < len(spec):
|
1814
1814
|
raise ClickException(
|
1815
|
-
"Please use
|
1815
|
+
"Please use `anyscale cloud resource create` to add cloud resources."
|
1816
1816
|
)
|
1817
1817
|
|
1818
|
-
# Diff the existing and new
|
1818
|
+
# Diff the existing and new specs
|
1819
1819
|
diff = self._generate_diff(
|
1820
|
-
self._remove_empty_values(
|
1821
|
-
self._remove_empty_values(updated_deployment.to_dict()),
|
1820
|
+
[self._remove_empty_values(r.to_dict()) for r in existing_resources], spec
|
1822
1821
|
)
|
1823
1822
|
if not diff:
|
1824
1823
|
self.log.info("No changes detected.")
|
1825
1824
|
return
|
1826
1825
|
|
1827
|
-
|
1828
|
-
|
1829
|
-
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
|
1836
|
-
|
1837
|
-
|
1838
|
-
|
1839
|
-
|
1840
|
-
|
1826
|
+
existing_resources_dict = {
|
1827
|
+
resource.cloud_resource_id: resource for resource in existing_resources
|
1828
|
+
}
|
1829
|
+
|
1830
|
+
updated_deployments: List[CloudDeployment] = []
|
1831
|
+
for d in spec:
|
1832
|
+
try:
|
1833
|
+
deployment = CloudDeployment(**d)
|
1834
|
+
except Exception as e: # noqa: BLE001
|
1835
|
+
raise ClickException(f"Failed to parse cloud resource: {e}")
|
1836
|
+
|
1837
|
+
if not deployment.cloud_resource_id:
|
1838
|
+
raise ClickException(
|
1839
|
+
"All cloud resources must include a cloud_resource_id."
|
1840
|
+
)
|
1841
|
+
if deployment.cloud_resource_id not in existing_resources_dict:
|
1842
|
+
raise ClickException(
|
1843
|
+
f"Cloud resource {deployment.cloud_resource_id} not found."
|
1844
|
+
)
|
1845
|
+
if deployment.provider == CloudProviders.PCP:
|
1846
|
+
raise ClickException(
|
1847
|
+
"Please use the `anyscale machine-pool` CLI to update machine pools."
|
1848
|
+
)
|
1849
|
+
if deployment != existing_resources_dict[deployment.cloud_resource_id]:
|
1850
|
+
updated_deployments.append(deployment)
|
1841
1851
|
|
1842
1852
|
# Log the diff and confirm.
|
1843
1853
|
self.log.info(f"Detected the following changes:\n{diff}")
|
1844
1854
|
|
1845
|
-
confirm("Would you like to proceed with updating this cloud
|
1855
|
+
confirm("Would you like to proceed with updating this cloud?", yes)
|
1856
|
+
|
1857
|
+
# Preprocess the deployments if necessary.
|
1858
|
+
for deployment in updated_deployments:
|
1859
|
+
if deployment.provider == CloudProviders.AWS:
|
1860
|
+
self._preprocess_aws(cloud_id=cloud_id, deployment=deployment)
|
1861
|
+
elif deployment.provider == CloudProviders.GCP:
|
1862
|
+
self._preprocess_gcp(deployment=deployment)
|
1863
|
+
|
1864
|
+
# Skip verification for Kubernetes stacks or if explicitly requested
|
1865
|
+
if deployment.compute_stack == ComputeStack.K8S:
|
1866
|
+
self.log.info("Skipping verification for Kubernetes compute stack.")
|
1867
|
+
elif not skip_verification and not self.verify_cloud_deployment(
|
1868
|
+
cloud_id=cloud_id, cloud_deployment=deployment
|
1869
|
+
):
|
1870
|
+
raise ClickException(
|
1871
|
+
f"Verification failed for cloud resource {deployment.name or deployment.cloud_resource_id}."
|
1872
|
+
)
|
1846
1873
|
|
1847
|
-
# Update the
|
1874
|
+
# Update the cloud resources.
|
1848
1875
|
try:
|
1849
|
-
self.api_client.
|
1850
|
-
cloud_id=cloud_id, cloud_deployment=
|
1876
|
+
self.api_client.update_cloud_resources_api_v2_clouds_cloud_id_resources_put(
|
1877
|
+
cloud_id=cloud_id, cloud_deployment=updated_deployments,
|
1851
1878
|
)
|
1852
1879
|
except Exception as e: # noqa: BLE001
|
1853
|
-
raise ClickException(f"Failed to update cloud
|
1880
|
+
raise ClickException(f"Failed to update cloud resources: {e}")
|
1854
1881
|
|
1855
|
-
self.log.info(
|
1856
|
-
f"Successfully updated cloud deployment {updated_deployment.name or updated_deployment.cloud_deployment_id} in cloud {cloud}."
|
1857
|
-
)
|
1882
|
+
self.log.info(f"Successfully updated cloud {cloud_name or cloud_id}.")
|
1858
1883
|
|
1859
|
-
def
|
1860
|
-
self, cloud_name: str,
|
1884
|
+
def remove_cloud_resource(
|
1885
|
+
self, cloud_name: str, resource_name: str, yes: bool,
|
1861
1886
|
):
|
1862
1887
|
confirm(
|
1863
|
-
f"Please confirm that you would like to remove
|
1888
|
+
f"Please confirm that you would like to remove resource {resource_name} from cloud {cloud_name}.",
|
1864
1889
|
yes,
|
1865
1890
|
)
|
1866
1891
|
|
1867
1892
|
cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
|
1868
1893
|
try:
|
1869
|
-
with self.log.spinner("Removing cloud
|
1870
|
-
self.api_client.
|
1871
|
-
cloud_id=cloud_id,
|
1894
|
+
with self.log.spinner("Removing cloud resource..."):
|
1895
|
+
self.api_client.remove_cloud_resource_api_v2_clouds_cloud_id_remove_resource_delete(
|
1896
|
+
cloud_id=cloud_id, cloud_resource_name=resource_name,
|
1872
1897
|
)
|
1873
1898
|
except Exception as e: # noqa: BLE001
|
1874
|
-
raise ClickException(f"Failed to remove cloud
|
1899
|
+
raise ClickException(f"Failed to remove cloud resource: {e}")
|
1875
1900
|
|
1876
1901
|
self.log.warning(
|
1877
1902
|
"The trust policy or service account that provides access to Anyscale's control plane needs to be deleted manually if you no longer wish for Anyscale to have access."
|
1878
1903
|
)
|
1879
1904
|
|
1880
1905
|
self.log.info(
|
1881
|
-
f"Successfully removed
|
1906
|
+
f"Successfully removed resource {resource_name} from cloud {cloud_name}!"
|
1882
1907
|
)
|
1883
1908
|
|
1884
1909
|
def get_cloud_config(
|
@@ -2027,7 +2052,7 @@ class CloudController(BaseController):
|
|
2027
2052
|
yes: bool = False,
|
2028
2053
|
) -> bool:
|
2029
2054
|
"""
|
2030
|
-
Verifies a cloud by name or id, including all cloud
|
2055
|
+
Verifies a cloud by name or id, including all cloud resources.
|
2031
2056
|
|
2032
2057
|
Note: If your changes involve operations that may require additional permissions
|
2033
2058
|
(for example, `boto3_session.client("efs").describe_backup_policy`), it's important
|
@@ -2053,15 +2078,15 @@ class CloudController(BaseController):
|
|
2053
2078
|
return False
|
2054
2079
|
|
2055
2080
|
try:
|
2056
|
-
|
2081
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2057
2082
|
cloud_id=cloud_id,
|
2058
2083
|
).results
|
2059
2084
|
except Exception as e: # noqa: BLE001
|
2060
|
-
self.log.error(f"Failed to retrieve cloud
|
2085
|
+
self.log.error(f"Failed to retrieve cloud resources: {e}")
|
2061
2086
|
return False
|
2062
2087
|
|
2063
|
-
if not
|
2064
|
-
self.log.error("No cloud
|
2088
|
+
if not cloud_resources:
|
2089
|
+
self.log.error("No cloud resources found for this cloud")
|
2065
2090
|
return False
|
2066
2091
|
|
2067
2092
|
self.cloud_event_producer.init_trace_context(
|
@@ -2073,31 +2098,35 @@ class CloudController(BaseController):
|
|
2073
2098
|
CloudAnalyticsEventName.COMMAND_START, succeeded=True
|
2074
2099
|
)
|
2075
2100
|
|
2076
|
-
|
2077
|
-
for
|
2101
|
+
cloud_resource_results = []
|
2102
|
+
for cloud_resource in cloud_resources:
|
2078
2103
|
try:
|
2079
|
-
|
2104
|
+
cloud_resource_name = (
|
2105
|
+
cloud_resource.name or cloud_resource.cloud_resource_id
|
2106
|
+
)
|
2080
2107
|
|
2081
|
-
self.log.info(f"Verifying
|
2108
|
+
self.log.info(f"Verifying cloud resource: {cloud_resource_name}")
|
2082
2109
|
result = self.verify_cloud_deployment(
|
2083
2110
|
cloud_id,
|
2084
|
-
|
2111
|
+
cloud_resource,
|
2085
2112
|
strict=strict,
|
2086
2113
|
_use_strict_iam_permissions=_use_strict_iam_permissions,
|
2087
2114
|
boto3_session=boto3_session,
|
2088
2115
|
)
|
2089
|
-
|
2116
|
+
cloud_resource_results.append((cloud_resource_name, result))
|
2090
2117
|
|
2091
2118
|
except (ValueError, TypeError, KeyError, AttributeError, RuntimeError) as e:
|
2092
|
-
|
2093
|
-
|
2119
|
+
cloud_resource_name = getattr(cloud_resource, "name", None) or getattr(
|
2120
|
+
cloud_resource, "cloud_resource_id", "unknown"
|
2121
|
+
)
|
2122
|
+
self.log.error(
|
2123
|
+
f"Failed to verify cloud resource {cloud_resource_name}: {e}"
|
2094
2124
|
)
|
2095
|
-
|
2096
|
-
deployment_results.append((deployment_name, False))
|
2125
|
+
cloud_resource_results.append((cloud_resource_name, False))
|
2097
2126
|
|
2098
|
-
self.
|
2127
|
+
self._print_cloud_resource_verification_results(cloud_resource_results)
|
2099
2128
|
|
2100
|
-
overall_success = all(result for _, result in
|
2129
|
+
overall_success = all(result for _, result in cloud_resource_results)
|
2101
2130
|
|
2102
2131
|
self.cloud_event_producer.produce(
|
2103
2132
|
CloudAnalyticsEventName.RESOURCES_VERIFIED, succeeded=overall_success,
|
@@ -2154,6 +2183,7 @@ class CloudController(BaseController):
|
|
2154
2183
|
strict: bool = False,
|
2155
2184
|
_use_strict_iam_permissions: bool = False, # This should only be used in testing.
|
2156
2185
|
boto3_session: Optional[boto3.Session] = None,
|
2186
|
+
logger: CloudSetupLogger = None,
|
2157
2187
|
) -> bool:
|
2158
2188
|
assert cloud_deployment.region
|
2159
2189
|
assert cloud_deployment.aws_config
|
@@ -2193,6 +2223,7 @@ class CloudController(BaseController):
|
|
2193
2223
|
== NetworkingMode.PRIVATE,
|
2194
2224
|
strict=strict,
|
2195
2225
|
_use_strict_iam_permissions=_use_strict_iam_permissions,
|
2226
|
+
logger=logger,
|
2196
2227
|
)
|
2197
2228
|
|
2198
2229
|
def _get_memorydb_config_for_verification(
|
@@ -2482,26 +2513,26 @@ class CloudController(BaseController):
|
|
2482
2513
|
f"{quota_error_str}\n\nFor instructions on how to increase quotas, visit this link: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html#request-increase"
|
2483
2514
|
)
|
2484
2515
|
|
2485
|
-
def
|
2486
|
-
self,
|
2516
|
+
def _print_cloud_resource_verification_results(
|
2517
|
+
self, cloud_resource_results: List[Tuple[str, bool]]
|
2487
2518
|
) -> None:
|
2488
|
-
"""Print verification results for multiple
|
2519
|
+
"""Print verification results for multiple cloud resources"""
|
2489
2520
|
self.log.info("=" * 60)
|
2490
|
-
self.log.info("
|
2521
|
+
self.log.info("CLOUD RESOURCE VERIFICATION RESULTS:")
|
2491
2522
|
self.log.info("=" * 60)
|
2492
2523
|
|
2493
|
-
for
|
2524
|
+
for cloud_resource_name, success in cloud_resource_results:
|
2494
2525
|
status = "PASSED" if success else "FAILED"
|
2495
|
-
self.log.info(f"{
|
2526
|
+
self.log.info(f"{cloud_resource_name}: {status}")
|
2496
2527
|
|
2497
2528
|
self.log.info("=" * 60)
|
2498
2529
|
|
2499
|
-
passed_count = sum(1 for _, success in
|
2500
|
-
total_count = len(
|
2530
|
+
passed_count = sum(1 for _, success in cloud_resource_results if success)
|
2531
|
+
total_count = len(cloud_resource_results)
|
2501
2532
|
|
2502
2533
|
if passed_count == total_count:
|
2503
2534
|
self.log.info(
|
2504
|
-
f"Overall Result: ALL {total_count}
|
2535
|
+
f"Overall Result: ALL {total_count} cloud resources verified successfully"
|
2505
2536
|
)
|
2506
2537
|
|
2507
2538
|
def register_azure_or_generic_cloud( # noqa: PLR0913
|
@@ -2517,6 +2548,8 @@ class CloudController(BaseController):
|
|
2517
2548
|
cloud_storage_bucket_region: Optional[str] = None,
|
2518
2549
|
nfs_mount_targets: Optional[List[str]] = None,
|
2519
2550
|
nfs_mount_path: Optional[str] = None,
|
2551
|
+
persistent_volume_claim: Optional[str] = None,
|
2552
|
+
csi_ephemeral_volume_driver: Optional[str] = None,
|
2520
2553
|
kubernetes_zones: Optional[List[str]] = None,
|
2521
2554
|
anyscale_operator_iam_identity: Optional[str] = None,
|
2522
2555
|
) -> None:
|
@@ -2576,19 +2609,30 @@ class CloudController(BaseController):
|
|
2576
2609
|
# Attempt to create the cloud resource.
|
2577
2610
|
try:
|
2578
2611
|
with self.log.spinner("Registering Anyscale cloud resources..."):
|
2579
|
-
|
2612
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
2580
2613
|
cloud_id=cloud_id,
|
2581
|
-
|
2582
|
-
|
2583
|
-
|
2584
|
-
|
2585
|
-
|
2586
|
-
|
2587
|
-
|
2588
|
-
|
2589
|
-
|
2590
|
-
|
2591
|
-
|
2614
|
+
cloud_deployment=CloudDeployment(
|
2615
|
+
compute_stack=ComputeStack.K8S,
|
2616
|
+
provider=cloud_provider,
|
2617
|
+
region=region,
|
2618
|
+
object_storage=ObjectStorage(
|
2619
|
+
bucket_name=cloud_storage_bucket_name,
|
2620
|
+
region=cloud_storage_bucket_region or region,
|
2621
|
+
endpoint=cloud_storage_bucket_endpoint,
|
2622
|
+
)
|
2623
|
+
if cloud_storage_bucket_name
|
2624
|
+
else None,
|
2625
|
+
file_storage=FileStorage(
|
2626
|
+
mount_targets=mount_targets,
|
2627
|
+
mount_path=nfs_mount_path,
|
2628
|
+
persistent_volume_claim=persistent_volume_claim,
|
2629
|
+
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
2630
|
+
)
|
2631
|
+
if mount_targets
|
2632
|
+
or persistent_volume_claim
|
2633
|
+
or csi_ephemeral_volume_driver
|
2634
|
+
else None,
|
2635
|
+
kubernetes_config=KubernetesConfig(zones=kubernetes_zones,),
|
2592
2636
|
),
|
2593
2637
|
)
|
2594
2638
|
|
@@ -2611,26 +2655,27 @@ class CloudController(BaseController):
|
|
2611
2655
|
raise ClickException(f"Cloud registration failed! {e}")
|
2612
2656
|
|
2613
2657
|
# TODO (shomilj): Fetch & optionally run the Helm installation here.
|
2614
|
-
|
2615
|
-
#
|
2616
|
-
|
2617
|
-
|
2618
|
-
|
2619
|
-
|
2620
|
-
|
2621
|
-
|
2622
|
-
|
2623
|
-
|
2624
|
-
|
2625
|
-
|
2626
|
-
|
2627
|
-
|
2628
|
-
|
2629
|
-
|
2630
|
-
|
2631
|
-
|
2632
|
-
|
2633
|
-
|
2658
|
+
|
2659
|
+
# Get the cloud resource ID to pass to the helm command.
|
2660
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2661
|
+
cloud_id=cloud_id,
|
2662
|
+
).results
|
2663
|
+
assert (
|
2664
|
+
len(cloud_resources) == 1
|
2665
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
2666
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
2667
|
+
|
2668
|
+
# Use CLI token to helm command
|
2669
|
+
helm_command = self._generate_helm_upgrade_command(
|
2670
|
+
provider=provider,
|
2671
|
+
cloud_deployment_id=cloud_resource_id,
|
2672
|
+
region=region if cloud_provider == CloudProviders.AZURE else None,
|
2673
|
+
kubernetes_zones=kubernetes_zones,
|
2674
|
+
operator_iam_identity=anyscale_operator_iam_identity
|
2675
|
+
if cloud_provider == CloudProviders.AZURE
|
2676
|
+
else None,
|
2677
|
+
anyscale_cli_token=None, # TODO: use $ANYSCALE_CLI_TOKEN placeholder
|
2678
|
+
)
|
2634
2679
|
|
2635
2680
|
self.log.info(
|
2636
2681
|
f"Cloud registration complete! To install the Anyscale operator, run:\n\n{helm_command}"
|
@@ -2661,6 +2706,8 @@ class CloudController(BaseController):
|
|
2661
2706
|
compute_stack: ComputeStack = ComputeStack.VM,
|
2662
2707
|
kubernetes_zones: Optional[List[str]] = None,
|
2663
2708
|
anyscale_operator_iam_identity: Optional[str] = None,
|
2709
|
+
persistent_volume_claim: Optional[str] = None,
|
2710
|
+
csi_ephemeral_volume_driver: Optional[str] = None,
|
2664
2711
|
):
|
2665
2712
|
functions_to_verify = self._validate_functional_verification_args(
|
2666
2713
|
functional_verify
|
@@ -2721,55 +2768,40 @@ class CloudController(BaseController):
|
|
2721
2768
|
raise
|
2722
2769
|
|
2723
2770
|
try:
|
2724
|
-
|
2725
|
-
|
2726
|
-
|
2727
|
-
|
2728
|
-
|
2729
|
-
|
2730
|
-
|
2731
|
-
|
2732
|
-
|
2733
|
-
|
2734
|
-
|
2771
|
+
cloud_resource = CloudDeployment(
|
2772
|
+
compute_stack=compute_stack,
|
2773
|
+
provider=CloudProviders.AWS,
|
2774
|
+
region=region,
|
2775
|
+
networking_mode=NetworkingMode.PRIVATE
|
2776
|
+
if private_network
|
2777
|
+
else NetworkingMode.PUBLIC,
|
2778
|
+
object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
|
2779
|
+
file_storage=FileStorage(
|
2780
|
+
file_storage_id=efs_id,
|
2781
|
+
persistent_volume_claim=persistent_volume_claim,
|
2782
|
+
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
2783
|
+
)
|
2784
|
+
if efs_id or persistent_volume_claim or csi_ephemeral_volume_driver
|
2785
|
+
else None,
|
2786
|
+
aws_config=AWSConfig(
|
2787
|
+
vpc_id=vpc_id,
|
2788
|
+
subnet_ids=subnet_ids,
|
2789
|
+
security_group_ids=security_group_ids,
|
2735
2790
|
anyscale_iam_role_id=anyscale_iam_role_id,
|
2736
2791
|
external_id=external_id,
|
2792
|
+
cluster_iam_role_id=instance_iam_role_id,
|
2793
|
+
memorydb_cluster_name=memorydb_cluster_id,
|
2794
|
+
),
|
2795
|
+
kubernetes_config=KubernetesConfig(
|
2796
|
+
anyscale_operator_iam_identity=anyscale_operator_iam_identity,
|
2797
|
+
zones=kubernetes_zones,
|
2737
2798
|
)
|
2738
|
-
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
boto3_session = boto3.Session(region_name=region)
|
2745
|
-
aws_efs_mount_target_ip = _get_aws_efs_mount_target_ip(
|
2746
|
-
boto3_session, efs_id
|
2747
|
-
)
|
2748
|
-
except ClientError as e:
|
2749
|
-
self.log.log_resource_exception(
|
2750
|
-
CloudAnalyticsEventCloudResource.AWS_EFS, e
|
2751
|
-
)
|
2752
|
-
raise e
|
2753
|
-
else:
|
2754
|
-
boto3_session = None
|
2755
|
-
aws_efs_mount_target_ip = None
|
2756
|
-
|
2757
|
-
# When running on the VM compute stack, associate the AWS subnets with their availability zones.
|
2758
|
-
if compute_stack == ComputeStack.VM:
|
2759
|
-
aws_subnet_ids_with_availability_zones = associate_aws_subnets_with_azs(
|
2760
|
-
subnet_ids, region, self.log
|
2761
|
-
)
|
2762
|
-
else:
|
2763
|
-
aws_subnet_ids_with_availability_zones = None
|
2764
|
-
|
2765
|
-
# If memorydb cluster is provided, get the memorydb cluster config.
|
2766
|
-
if memorydb_cluster_id is not None:
|
2767
|
-
memorydb_cluster_config = _get_memorydb_cluster_config(
|
2768
|
-
memorydb_cluster_id, region, self.log
|
2769
|
-
)
|
2770
|
-
else:
|
2771
|
-
memorydb_cluster_config = None
|
2772
|
-
|
2799
|
+
if compute_stack == ComputeStack.K8S
|
2800
|
+
else None,
|
2801
|
+
)
|
2802
|
+
role, iam_role_original_policy = self._preprocess_aws(
|
2803
|
+
cloud_id=cloud_id, deployment=cloud_resource
|
2804
|
+
)
|
2773
2805
|
self.cloud_event_producer.produce(
|
2774
2806
|
CloudAnalyticsEventName.PREPROCESS_COMPLETE, succeeded=True
|
2775
2807
|
)
|
@@ -2803,44 +2835,17 @@ class CloudController(BaseController):
|
|
2803
2835
|
)
|
2804
2836
|
raise ClickException(f"Cloud registration failed! {error}")
|
2805
2837
|
|
2806
|
-
aws_iam_role_arns = None
|
2807
|
-
if compute_stack == ComputeStack.VM:
|
2808
|
-
aws_iam_role_arns = [anyscale_iam_role_id, instance_iam_role_id]
|
2809
|
-
elif compute_stack == ComputeStack.K8S and anyscale_iam_role_id:
|
2810
|
-
aws_iam_role_arns = [anyscale_iam_role_id]
|
2811
|
-
|
2812
2838
|
try:
|
2813
2839
|
# Verify cloud resources meet our requirement
|
2814
|
-
create_cloud_resource = CreateCloudResource(
|
2815
|
-
aws_vpc_id=vpc_id,
|
2816
|
-
aws_subnet_ids_with_availability_zones=aws_subnet_ids_with_availability_zones,
|
2817
|
-
aws_iam_role_arns=aws_iam_role_arns,
|
2818
|
-
aws_security_groups=security_group_ids,
|
2819
|
-
aws_s3_id=cloud_storage_bucket_name[len(S3_STORAGE_PREFIX) :],
|
2820
|
-
aws_efs_id=efs_id,
|
2821
|
-
aws_efs_mount_target_ip=aws_efs_mount_target_ip,
|
2822
|
-
memorydb_cluster_config=memorydb_cluster_config,
|
2823
|
-
compute_stack=compute_stack,
|
2824
|
-
kubernetes_zones=kubernetes_zones,
|
2825
|
-
kubernetes_dataplane_identity=anyscale_operator_iam_identity,
|
2826
|
-
cloud_storage_bucket_name=cloud_storage_bucket_name,
|
2827
|
-
)
|
2828
|
-
|
2829
2840
|
# Verification is only performed for VM compute stack.
|
2830
2841
|
# TODO (shomilj): Add verification to the K8S compute stack as well.
|
2831
2842
|
if compute_stack == ComputeStack.VM:
|
2832
2843
|
with self.log.spinner("Verifying cloud resources...") as spinner:
|
2833
|
-
if boto3_session is None:
|
2834
|
-
boto3_session = boto3.Session(region_name=region)
|
2835
2844
|
if (
|
2836
2845
|
not skip_verifications
|
2837
|
-
and not self.
|
2838
|
-
cloud_resource=create_cloud_resource,
|
2839
|
-
boto3_session=boto3_session,
|
2840
|
-
region=region,
|
2841
|
-
is_bring_your_own_resource=True,
|
2842
|
-
is_private_network=private_network,
|
2846
|
+
and not self.verify_aws_cloud_resources_for_cloud_deployment(
|
2843
2847
|
cloud_id=cloud_id,
|
2848
|
+
cloud_deployment=cloud_resource,
|
2844
2849
|
logger=CloudSetupLogger(spinner_manager=spinner),
|
2845
2850
|
)
|
2846
2851
|
):
|
@@ -2888,12 +2893,9 @@ class CloudController(BaseController):
|
|
2888
2893
|
with self.log.spinner(
|
2889
2894
|
"Updating Anyscale cloud with cloud resource..."
|
2890
2895
|
) as spinner:
|
2891
|
-
#
|
2892
|
-
|
2893
|
-
cloud_id=cloud_id,
|
2894
|
-
update_cloud_with_cloud_resource=UpdateCloudWithCloudResource(
|
2895
|
-
cloud_resource_to_update=create_cloud_resource,
|
2896
|
-
),
|
2896
|
+
# Update cloud with verified cloud resources.
|
2897
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
2898
|
+
cloud_id=cloud_id, cloud_deployment=cloud_resource,
|
2897
2899
|
)
|
2898
2900
|
# For now, only wait for the cloud to be active if the compute stack is VM.
|
2899
2901
|
# TODO (shomilj): support this fully for Kubernetes after provider metadata
|
@@ -2901,7 +2903,15 @@ class CloudController(BaseController):
|
|
2901
2903
|
if compute_stack == ComputeStack.VM:
|
2902
2904
|
self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.AWS)
|
2903
2905
|
if compute_stack == ComputeStack.K8S:
|
2904
|
-
|
2906
|
+
# Get the cloud resource ID to pass to the helm command.
|
2907
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2908
|
+
cloud_id=cloud_id,
|
2909
|
+
).results
|
2910
|
+
assert (
|
2911
|
+
len(cloud_resources) == 1
|
2912
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
2913
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
2914
|
+
|
2905
2915
|
helm_command = self._generate_helm_upgrade_command(
|
2906
2916
|
provider="aws",
|
2907
2917
|
cloud_deployment_id=cloud_resource_id,
|
@@ -2947,7 +2957,12 @@ class CloudController(BaseController):
|
|
2947
2957
|
).start_verification(cloud_id, CloudProviders.AWS, functions_to_verify, yes)
|
2948
2958
|
|
2949
2959
|
def verify_gcp_cloud_resources_from_cloud_deployment(
|
2950
|
-
self,
|
2960
|
+
self,
|
2961
|
+
cloud_id: str,
|
2962
|
+
cloud_deployment: CloudDeployment,
|
2963
|
+
strict: bool = False,
|
2964
|
+
yes: bool = False,
|
2965
|
+
is_private_service_cloud: bool = False,
|
2951
2966
|
) -> bool:
|
2952
2967
|
assert cloud_deployment.region
|
2953
2968
|
assert cloud_deployment.gcp_config
|
@@ -2979,6 +2994,8 @@ class CloudController(BaseController):
|
|
2979
2994
|
cloud_id=cloud_id,
|
2980
2995
|
host_project_id=gcp_config.host_project_id,
|
2981
2996
|
strict=strict,
|
2997
|
+
yes=yes,
|
2998
|
+
is_private_service_cloud=is_private_service_cloud,
|
2982
2999
|
)
|
2983
3000
|
|
2984
3001
|
def verify_gcp_cloud_resources_from_create_cloud_resource(
|
@@ -3187,11 +3204,12 @@ class CloudController(BaseController):
|
|
3187
3204
|
compute_stack: ComputeStack = ComputeStack.VM,
|
3188
3205
|
kubernetes_zones: Optional[List[str]] = None,
|
3189
3206
|
anyscale_operator_iam_identity: Optional[str] = None,
|
3207
|
+
persistent_volume_claim: Optional[str] = None,
|
3208
|
+
csi_ephemeral_volume_driver: Optional[str] = None,
|
3190
3209
|
):
|
3191
3210
|
functions_to_verify = self._validate_functional_verification_args(
|
3192
3211
|
functional_verify
|
3193
3212
|
)
|
3194
|
-
gcp_utils = try_import_gcp_utils()
|
3195
3213
|
|
3196
3214
|
# Create a cloud without cloud resources first
|
3197
3215
|
# Provider ID is optional for K8s clouds.
|
@@ -3267,11 +3285,6 @@ class CloudController(BaseController):
|
|
3267
3285
|
raise
|
3268
3286
|
|
3269
3287
|
try:
|
3270
|
-
# Set defaults for Kubernetes clouds.
|
3271
|
-
if compute_stack == ComputeStack.K8S:
|
3272
|
-
instance_service_account_email = ""
|
3273
|
-
subnet_names = []
|
3274
|
-
|
3275
3288
|
enable_filestore = filestore_location and filestore_instance_id
|
3276
3289
|
|
3277
3290
|
# Normally, for Kubernetes clouds, we don't need a VPC name, since networking is managed by Kubernetes.
|
@@ -3285,72 +3298,62 @@ class CloudController(BaseController):
|
|
3285
3298
|
if (enable_filestore or memorystore_instance_name) and not project_id:
|
3286
3299
|
raise ClickException("Please provide a project ID.")
|
3287
3300
|
|
3288
|
-
if project_id:
|
3289
|
-
factory = gcp_utils.get_google_cloud_client_factory(
|
3290
|
-
self.log, project_id
|
3291
|
-
)
|
3292
|
-
|
3293
|
-
if enable_filestore:
|
3294
|
-
filestore_config = gcp_utils.get_gcp_filestore_config(
|
3295
|
-
factory,
|
3296
|
-
project_id,
|
3297
|
-
vpc_name,
|
3298
|
-
filestore_location,
|
3299
|
-
filestore_instance_id,
|
3300
|
-
self.log,
|
3301
|
-
)
|
3302
|
-
else:
|
3303
|
-
filestore_config = GCPFileStoreConfig(
|
3304
|
-
instance_name="", mount_target_ip="", root_dir=""
|
3305
|
-
)
|
3306
|
-
if compute_stack == ComputeStack.K8S:
|
3307
|
-
# Set vpc_name to empty string for Kubernetes clouds
|
3308
|
-
vpc_name = ""
|
3309
|
-
|
3310
|
-
if memorystore_instance_name:
|
3311
|
-
memorystore_instance_config = gcp_utils.get_gcp_memorystore_config(
|
3312
|
-
factory, memorystore_instance_name
|
3313
|
-
)
|
3314
|
-
else:
|
3315
|
-
memorystore_instance_config = None
|
3316
|
-
|
3317
3301
|
if not cloud_storage_bucket_name.startswith(GCS_STORAGE_PREFIX):
|
3318
3302
|
cloud_storage_bucket_name = (
|
3319
3303
|
GCS_STORAGE_PREFIX + cloud_storage_bucket_name
|
3320
3304
|
)
|
3321
3305
|
|
3322
|
-
|
3323
|
-
create_cloud_resource_gcp = CreateCloudResourceGCP(
|
3324
|
-
gcp_vpc_id=vpc_name,
|
3325
|
-
gcp_subnet_ids=subnet_names,
|
3326
|
-
gcp_cluster_node_service_account_email=instance_service_account_email,
|
3327
|
-
gcp_anyscale_iam_service_account_email=anyscale_service_account_email
|
3328
|
-
or "",
|
3329
|
-
gcp_filestore_config=filestore_config,
|
3330
|
-
gcp_firewall_policy_ids=firewall_policy_names,
|
3331
|
-
gcp_cloud_storage_bucket_id=cloud_storage_bucket_name[
|
3332
|
-
len(GCS_STORAGE_PREFIX) :
|
3333
|
-
],
|
3334
|
-
memorystore_instance_config=memorystore_instance_config,
|
3306
|
+
cloud_resource = CloudDeployment(
|
3335
3307
|
compute_stack=compute_stack,
|
3336
|
-
|
3337
|
-
|
3338
|
-
|
3308
|
+
provider=CloudProviders.GCP,
|
3309
|
+
region=region,
|
3310
|
+
networking_mode=NetworkingMode.PRIVATE
|
3311
|
+
if private_network
|
3312
|
+
else NetworkingMode.PUBLIC,
|
3313
|
+
object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
|
3314
|
+
file_storage=FileStorage(
|
3315
|
+
file_storage_id="projects/{}/locations/{}/instances/{}".format(
|
3316
|
+
project_id, filestore_location, filestore_instance_id
|
3317
|
+
)
|
3318
|
+
if filestore_instance_id
|
3319
|
+
else None,
|
3320
|
+
persistent_volume_claim=persistent_volume_claim,
|
3321
|
+
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
3322
|
+
)
|
3323
|
+
if filestore_instance_id
|
3324
|
+
or persistent_volume_claim
|
3325
|
+
or csi_ephemeral_volume_driver
|
3326
|
+
else None,
|
3327
|
+
gcp_config=GCPConfig(
|
3328
|
+
project_id=project_id,
|
3329
|
+
host_project_id=host_project_id,
|
3330
|
+
provider_name=provider_id,
|
3331
|
+
vpc_name=vpc_name,
|
3332
|
+
subnet_names=subnet_names,
|
3333
|
+
firewall_policy_names=firewall_policy_names,
|
3334
|
+
anyscale_service_account_email=anyscale_service_account_email,
|
3335
|
+
cluster_service_account_email=instance_service_account_email,
|
3336
|
+
memorystore_instance_name=memorystore_instance_name,
|
3337
|
+
),
|
3338
|
+
kubernetes_config=KubernetesConfig(
|
3339
|
+
anyscale_operator_iam_identity=anyscale_operator_iam_identity,
|
3340
|
+
zones=kubernetes_zones,
|
3341
|
+
)
|
3342
|
+
if compute_stack == ComputeStack.K8S
|
3343
|
+
else None,
|
3339
3344
|
)
|
3340
3345
|
|
3346
|
+
self._preprocess_gcp(cloud_resource)
|
3347
|
+
|
3341
3348
|
# Verification is only performed for VM compute stack.
|
3342
3349
|
# TODO (shomilj): Add verification to the K8S compute stack as well.
|
3343
3350
|
if compute_stack == ComputeStack.VM:
|
3344
3351
|
if (
|
3345
3352
|
not skip_verifications
|
3346
|
-
and not self.
|
3347
|
-
cloud_resource=create_cloud_resource_gcp,
|
3348
|
-
project_id=project_id,
|
3349
|
-
host_project_id=host_project_id,
|
3350
|
-
region=region,
|
3353
|
+
and not self.verify_gcp_cloud_resources_from_cloud_deployment(
|
3351
3354
|
cloud_id=cloud_id,
|
3355
|
+
cloud_deployment=cloud_resource,
|
3352
3356
|
yes=yes,
|
3353
|
-
factory=factory,
|
3354
3357
|
is_private_service_cloud=is_private_service_cloud,
|
3355
3358
|
)
|
3356
3359
|
):
|
@@ -3384,13 +3387,10 @@ class CloudController(BaseController):
|
|
3384
3387
|
raise ClickException(f"Cloud registration failed! {e}")
|
3385
3388
|
|
3386
3389
|
try:
|
3387
|
-
# update cloud with verified cloud resources
|
3388
3390
|
with self.log.spinner("Updating Anyscale cloud with cloud resources..."):
|
3389
|
-
|
3390
|
-
|
3391
|
-
|
3392
|
-
cloud_resource_to_update=create_cloud_resource_gcp,
|
3393
|
-
),
|
3391
|
+
# Update cloud with verified cloud resources.
|
3392
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
3393
|
+
cloud_id=cloud_id, cloud_deployment=cloud_resource,
|
3394
3394
|
)
|
3395
3395
|
# For now, only wait for the cloud to be active if the compute stack is VM.
|
3396
3396
|
# TODO (shomilj): support this fully for Kubernetes after provider metadata
|
@@ -3398,7 +3398,15 @@ class CloudController(BaseController):
|
|
3398
3398
|
if compute_stack == ComputeStack.VM:
|
3399
3399
|
self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.GCP)
|
3400
3400
|
if compute_stack == ComputeStack.K8S:
|
3401
|
-
|
3401
|
+
# Get the cloud resource ID to pass to the helm command.
|
3402
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
3403
|
+
cloud_id=cloud_id,
|
3404
|
+
).results
|
3405
|
+
assert (
|
3406
|
+
len(cloud_resources) == 1
|
3407
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
3408
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
3409
|
+
|
3402
3410
|
helm_command = self._generate_helm_upgrade_command(
|
3403
3411
|
provider="gcp",
|
3404
3412
|
cloud_deployment_id=cloud_resource_id,
|
@@ -3518,13 +3526,6 @@ class CloudController(BaseController):
|
|
3518
3526
|
)
|
3519
3527
|
|
3520
3528
|
cloud = response.result
|
3521
|
-
except ClickException as e:
|
3522
|
-
raise ClickException(
|
3523
|
-
f"Failed to update cloud state to deleting for cloud {cloud_name}: {e}"
|
3524
|
-
)
|
3525
|
-
|
3526
|
-
# Clean up cloud resources
|
3527
|
-
try:
|
3528
3529
|
if cloud_provider == CloudProviders.AWS:
|
3529
3530
|
if not (cloud.is_aioa or cloud.compute_stack == ComputeStack.K8S):
|
3530
3531
|
# Delete services resources
|