anyscale 0.26.52__py3-none-any.whl → 0.26.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/_private/anyscale_client/anyscale_client.py +26 -26
- anyscale/_private/anyscale_client/common.py +5 -5
- anyscale/_private/anyscale_client/fake_anyscale_client.py +6 -6
- anyscale/_private/docgen/__main__.py +8 -8
- anyscale/_private/docgen/generator.py +48 -10
- anyscale/_private/docgen/models.md +2 -2
- anyscale/_private/sdk/__init__.py +124 -1
- anyscale/_private/workload/workload_config.py +4 -6
- anyscale/_private/workload/workload_sdk.py +9 -11
- anyscale/client/README.md +14 -13
- anyscale/client/openapi_client/__init__.py +4 -4
- anyscale/client/openapi_client/api/default_api.py +395 -325
- anyscale/client/openapi_client/models/__init__.py +4 -4
- anyscale/client/openapi_client/models/aws_config.py +2 -2
- anyscale/client/openapi_client/models/baseimagesenum.py +76 -1
- anyscale/client/openapi_client/models/cloud_data_bucket_file_type.py +2 -1
- anyscale/client/openapi_client/models/cloud_data_bucket_presigned_url_request.py +31 -3
- anyscale/client/openapi_client/models/cloud_deployment.py +37 -36
- anyscale/client/openapi_client/models/create_resource_notification.py +31 -3
- anyscale/client/openapi_client/models/{decorated_cloud_deployment.py → decorated_cloud_resource.py} +124 -96
- anyscale/client/openapi_client/models/{clouddeployment_list_response.py → decoratedcloudresource_list_response.py} +15 -15
- anyscale/client/openapi_client/models/{clouddeployment_response.py → decoratedcloudresource_response.py} +11 -11
- anyscale/client/openapi_client/models/file_storage.py +4 -4
- anyscale/client/openapi_client/models/gcp_config.py +2 -2
- anyscale/client/openapi_client/models/ha_job_error_types.py +9 -2
- anyscale/client/openapi_client/models/object_storage.py +2 -2
- anyscale/client/openapi_client/models/{decoratedclouddeployment_response.py → presigned_url_response.py} +24 -22
- anyscale/client/openapi_client/models/production_job_event.py +31 -3
- anyscale/client/openapi_client/models/resource_alert_event_type.py +2 -1
- anyscale/client/openapi_client/models/resource_notification.py +29 -1
- anyscale/client/openapi_client/models/supportedbaseimagesenum.py +76 -1
- anyscale/client/openapi_client/models/workload_info.py +31 -3
- anyscale/client/openapi_client/models/workload_state_info.py +29 -1
- anyscale/cloud/models.py +39 -42
- anyscale/commands/cloud_commands.py +25 -23
- anyscale/commands/command_examples.py +10 -10
- anyscale/commands/exec_commands.py +12 -1
- anyscale/commands/list_commands.py +42 -12
- anyscale/commands/project_commands.py +23 -10
- anyscale/commands/schedule_commands.py +22 -11
- anyscale/commands/service_commands.py +11 -6
- anyscale/commands/util.py +94 -1
- anyscale/commands/workspace_commands.py +92 -38
- anyscale/compute_config/__init__.py +1 -1
- anyscale/compute_config/_private/compute_config_sdk.py +8 -11
- anyscale/compute_config/commands.py +3 -3
- anyscale/compute_config/models.py +30 -30
- anyscale/controllers/cloud_controller.py +306 -300
- anyscale/controllers/kubernetes_verifier.py +1 -1
- anyscale/job/_private/job_sdk.py +12 -12
- anyscale/job/models.py +1 -1
- anyscale/sdk/anyscale_client/models/baseimagesenum.py +76 -1
- anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +76 -1
- anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
- anyscale/version.py +1 -1
- anyscale/workspace/commands.py +114 -23
- anyscale/workspace/models.py +3 -5
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/METADATA +1 -1
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/RECORD +64 -64
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/WHEEL +0 -0
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.52.dist-info → anyscale-0.26.54.dist-info}/top_level.txt +0 -0
@@ -42,11 +42,13 @@ from anyscale.client.openapi_client.models import (
|
|
42
42
|
ComputeStack,
|
43
43
|
CreateCloudResource,
|
44
44
|
CreateCloudResourceGCP,
|
45
|
+
DecoratedCloudResource,
|
45
46
|
EditableCloudResource,
|
46
47
|
EditableCloudResourceGCP,
|
47
48
|
FileStorage,
|
48
49
|
GCPConfig,
|
49
50
|
GCPFileStoreConfig,
|
51
|
+
KubernetesConfig,
|
50
52
|
NetworkingMode,
|
51
53
|
NFSMountTarget,
|
52
54
|
ObjectStorage,
|
@@ -1437,33 +1439,52 @@ class CloudController(BaseController):
|
|
1437
1439
|
return [self._remove_empty_values(v) for v in d]
|
1438
1440
|
return d
|
1439
1441
|
|
1440
|
-
def
|
1442
|
+
def get_decorated_cloud_resources(
|
1443
|
+
self, cloud_id: str
|
1444
|
+
) -> List[DecoratedCloudResource]:
|
1441
1445
|
cloud = self.api_client.get_cloud_api_v2_clouds_cloud_id_get(
|
1442
1446
|
cloud_id=cloud_id,
|
1443
1447
|
).result
|
1444
1448
|
|
1445
1449
|
if cloud.is_aioa:
|
1446
1450
|
raise ValueError(
|
1447
|
-
"Listing cloud
|
1451
|
+
"Listing cloud resources is only supported for customer-hosted clouds."
|
1448
1452
|
)
|
1449
1453
|
|
1450
1454
|
try:
|
1451
|
-
|
1455
|
+
return self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
1452
1456
|
cloud_id=cloud_id,
|
1453
1457
|
).results
|
1454
1458
|
except Exception as e: # noqa: BLE001
|
1455
1459
|
raise ClickException(
|
1456
|
-
f"Failed to get cloud
|
1460
|
+
f"Failed to get cloud resources for cloud {cloud.name} ({cloud_id}). Error: {e}"
|
1457
1461
|
)
|
1458
1462
|
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
|
1463
|
+
def get_formatted_cloud_resources(self, cloud_id: str) -> List[Any]:
|
1464
|
+
cloud_resources = self.get_decorated_cloud_resources(cloud_id)
|
1465
|
+
formatted_cloud_resources = [
|
1466
|
+
self._remove_empty_values(cloud_resource.to_dict())
|
1467
|
+
for cloud_resource in cloud_resources
|
1468
|
+
]
|
1469
|
+
# Remove the deprecated cloud_deployment_id field.
|
1470
|
+
for d in formatted_cloud_resources:
|
1471
|
+
d.pop("cloud_deployment_id", None)
|
1472
|
+
return formatted_cloud_resources
|
1473
|
+
|
1474
|
+
def get_cloud_resources(self, cloud_id: str) -> List[CloudDeployment]:
|
1475
|
+
decorated_cloud_resources = self.get_decorated_cloud_resources(cloud_id)
|
1476
|
+
|
1477
|
+
# DecoratedCloudResource has extra fields that are not in CloudDeployment.
|
1478
|
+
allowed_keys = set(CloudDeployment.attribute_map.keys())
|
1479
|
+
allowed_keys.remove(
|
1480
|
+
"cloud_deployment_id"
|
1481
|
+
) # Remove deprecated cloud_deployment_id field.
|
1482
|
+
return [
|
1483
|
+
CloudDeployment(
|
1484
|
+
**{k: v for k, v in resource.to_dict().items() if k in allowed_keys}
|
1485
|
+
)
|
1486
|
+
for resource in decorated_cloud_resources
|
1487
|
+
]
|
1467
1488
|
|
1468
1489
|
def update_aws_anyscale_iam_role(
|
1469
1490
|
self,
|
@@ -1529,7 +1550,7 @@ class CloudController(BaseController):
|
|
1529
1550
|
|
1530
1551
|
return role, iam_role_original_policy
|
1531
1552
|
|
1532
|
-
def _generate_diff(self, existing:
|
1553
|
+
def _generate_diff(self, existing: List[Any], new: List[Any]) -> str:
|
1533
1554
|
"""
|
1534
1555
|
Generates a diff between the existing and new dicts.
|
1535
1556
|
"""
|
@@ -1555,48 +1576,59 @@ class CloudController(BaseController):
|
|
1555
1576
|
|
1556
1577
|
return formatted_diff.strip()
|
1557
1578
|
|
1558
|
-
|
1579
|
+
# Returns the role and original IAM policy, so that we can revert it if creating the cloud resource fails.
|
1580
|
+
def _preprocess_aws( # noqa: PLR0912
|
1581
|
+
self, cloud_id: str, deployment: CloudDeployment
|
1582
|
+
) -> Tuple[Optional[Boto3Resource], Optional[str]]:
|
1559
1583
|
if not deployment.aws_config and not deployment.file_storage:
|
1560
|
-
return
|
1584
|
+
return None, None
|
1561
1585
|
|
1562
1586
|
if not validate_aws_credentials(self.log):
|
1563
1587
|
raise ClickException(
|
1564
|
-
"Updating cloud
|
1588
|
+
"Updating cloud resources requires valid AWS credentials to be set locally. Learn more: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html"
|
1565
1589
|
)
|
1566
1590
|
|
1591
|
+
role, iam_role_original_policy = None, None
|
1592
|
+
|
1567
1593
|
# Get EFS mount target IP.
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
|
1594
|
+
file_storage = None
|
1595
|
+
if deployment.file_storage:
|
1596
|
+
if isinstance(deployment.file_storage, dict):
|
1597
|
+
file_storage = FileStorage(**deployment.file_storage)
|
1598
|
+
else:
|
1599
|
+
file_storage = deployment.file_storage
|
1573
1600
|
|
1574
|
-
|
1575
|
-
|
1576
|
-
|
1577
|
-
|
1578
|
-
|
1579
|
-
if not efs_mount_target_ip:
|
1580
|
-
raise ClickException(
|
1581
|
-
f"EFS mount target IP not found for {file_storage.file_storage_id}."
|
1601
|
+
if file_storage.file_storage_id:
|
1602
|
+
try:
|
1603
|
+
boto3_session = boto3.Session(region_name=deployment.region)
|
1604
|
+
efs_mount_target_ip = _get_aws_efs_mount_target_ip(
|
1605
|
+
boto3_session, file_storage.file_storage_id,
|
1582
1606
|
)
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1607
|
+
if not efs_mount_target_ip:
|
1608
|
+
raise ClickException(
|
1609
|
+
f"EFS mount target IP not found for {file_storage.file_storage_id}."
|
1610
|
+
)
|
1611
|
+
file_storage.mount_targets = [
|
1612
|
+
NFSMountTarget(address=efs_mount_target_ip)
|
1613
|
+
]
|
1614
|
+
except ClientError as e:
|
1615
|
+
self.log.log_resource_exception(
|
1616
|
+
CloudAnalyticsEventCloudResource.AWS_EFS, e
|
1617
|
+
)
|
1618
|
+
raise e
|
1591
1619
|
|
1592
1620
|
deployment.file_storage = file_storage
|
1593
1621
|
|
1594
1622
|
if deployment.aws_config:
|
1595
|
-
|
1623
|
+
if isinstance(deployment.aws_config, dict):
|
1624
|
+
aws_config = AWSConfig(**deployment.aws_config)
|
1625
|
+
else:
|
1626
|
+
aws_config = deployment.aws_config
|
1627
|
+
|
1596
1628
|
assert deployment.region
|
1597
1629
|
|
1598
1630
|
# Update Anyscale IAM role's assume policy to include the cloud ID as the external ID.
|
1599
|
-
self.update_aws_anyscale_iam_role(
|
1631
|
+
role, iam_role_original_policy = self.update_aws_anyscale_iam_role(
|
1600
1632
|
cloud_id,
|
1601
1633
|
deployment.region,
|
1602
1634
|
aws_config.anyscale_iam_role_id,
|
@@ -1623,13 +1655,19 @@ class CloudController(BaseController):
|
|
1623
1655
|
|
1624
1656
|
deployment.aws_config = aws_config
|
1625
1657
|
|
1658
|
+
return role, iam_role_original_policy
|
1659
|
+
|
1626
1660
|
def _preprocess_gcp(
|
1627
1661
|
self, deployment: CloudDeployment,
|
1628
1662
|
):
|
1629
1663
|
if not deployment.gcp_config:
|
1630
1664
|
return
|
1631
1665
|
|
1632
|
-
|
1666
|
+
if isinstance(deployment.gcp_config, dict):
|
1667
|
+
gcp_config = GCPConfig(**deployment.gcp_config)
|
1668
|
+
else:
|
1669
|
+
gcp_config = deployment.gcp_config
|
1670
|
+
|
1633
1671
|
deployment.gcp_config = gcp_config
|
1634
1672
|
if not deployment.file_storage and not gcp_config.memorystore_instance_name:
|
1635
1673
|
return
|
@@ -1646,7 +1684,11 @@ class CloudController(BaseController):
|
|
1646
1684
|
|
1647
1685
|
# Get Filestore mount target IP and root dir.
|
1648
1686
|
if deployment.file_storage:
|
1649
|
-
|
1687
|
+
if isinstance(deployment.file_storage, dict):
|
1688
|
+
fs = FileStorage(**deployment.file_storage)
|
1689
|
+
else:
|
1690
|
+
fs = deployment.file_storage
|
1691
|
+
|
1650
1692
|
if fs.file_storage_id:
|
1651
1693
|
if not gcp_config.vpc_name:
|
1652
1694
|
raise ClickException(
|
@@ -1664,7 +1706,7 @@ class CloudController(BaseController):
|
|
1664
1706
|
NFSMountTarget(address=filestore_config.mount_target_ip)
|
1665
1707
|
]
|
1666
1708
|
|
1667
|
-
|
1709
|
+
deployment.file_storage = fs
|
1668
1710
|
|
1669
1711
|
# Get Memorystore config.
|
1670
1712
|
if gcp_config.memorystore_instance_name:
|
@@ -1676,7 +1718,7 @@ class CloudController(BaseController):
|
|
1676
1718
|
|
1677
1719
|
deployment.gcp_config = gcp_config
|
1678
1720
|
|
1679
|
-
def
|
1721
|
+
def create_cloud_resource(
|
1680
1722
|
self,
|
1681
1723
|
cloud_name: str,
|
1682
1724
|
spec_file: str,
|
@@ -1696,7 +1738,7 @@ class CloudController(BaseController):
|
|
1696
1738
|
try:
|
1697
1739
|
new_deployment = CloudDeployment(**spec)
|
1698
1740
|
except Exception as e: # noqa: BLE001
|
1699
|
-
raise ClickException(f"Failed to parse
|
1741
|
+
raise ClickException(f"Failed to parse cloud resource: {e}")
|
1700
1742
|
|
1701
1743
|
if new_deployment.provider == CloudProviders.AWS:
|
1702
1744
|
self._preprocess_aws(cloud_id=cloud_id, deployment=new_deployment)
|
@@ -1706,17 +1748,16 @@ class CloudController(BaseController):
|
|
1706
1748
|
if not skip_verification and not self.verify_cloud_deployment(
|
1707
1749
|
cloud_id=cloud_id, cloud_deployment=new_deployment
|
1708
1750
|
):
|
1709
|
-
raise ClickException("Cloud
|
1751
|
+
raise ClickException("Cloud resource verification failed.")
|
1710
1752
|
|
1711
1753
|
# Log an additional warning if a new deployment is being added but a deployment with the same AWS/GCP region already exists.
|
1712
|
-
|
1713
|
-
|
1714
|
-
|
1715
|
-
for deployment in existing_spec["deployments"]
|
1754
|
+
existing_resources = {
|
1755
|
+
resource.cloud_resource_id: resource
|
1756
|
+
for resource in self.get_cloud_resources(cloud_id)
|
1716
1757
|
}
|
1717
1758
|
existing_stack_provider_regions = {
|
1718
1759
|
(d.compute_stack, d.provider, d.region)
|
1719
|
-
for d in
|
1760
|
+
for d in existing_resources.values()
|
1720
1761
|
if d.provider in (CloudProviders.AWS, CloudProviders.GCP)
|
1721
1762
|
}
|
1722
1763
|
if (
|
@@ -1725,23 +1766,23 @@ class CloudController(BaseController):
|
|
1725
1766
|
new_deployment.region,
|
1726
1767
|
) in existing_stack_provider_regions:
|
1727
1768
|
self.log.warning(
|
1728
|
-
f"A {new_deployment.provider} {new_deployment.compute_stack}
|
1769
|
+
f"A {new_deployment.provider} {new_deployment.compute_stack} resource in region {new_deployment.region} already exists."
|
1729
1770
|
)
|
1730
|
-
confirm("Would you like to proceed with adding this
|
1771
|
+
confirm("Would you like to proceed with adding this cloud resource?", yes)
|
1731
1772
|
|
1732
|
-
# Add the
|
1773
|
+
# Add the resource.
|
1733
1774
|
try:
|
1734
|
-
self.api_client.
|
1775
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
1735
1776
|
cloud_id=cloud_id, cloud_deployment=new_deployment,
|
1736
1777
|
)
|
1737
1778
|
except Exception as e: # noqa: BLE001
|
1738
|
-
raise ClickException(f"Failed to add cloud
|
1779
|
+
raise ClickException(f"Failed to add cloud resource: {e}")
|
1739
1780
|
|
1740
1781
|
self.log.info(
|
1741
|
-
f"Successfully created cloud
|
1782
|
+
f"Successfully created cloud resource{' ' + new_deployment.name if new_deployment.name else ''} in cloud {cloud_name}!"
|
1742
1783
|
)
|
1743
1784
|
|
1744
|
-
def
|
1785
|
+
def update_cloud_resources( # noqa: PLR0912, C901
|
1745
1786
|
self,
|
1746
1787
|
cloud_name: Optional[str],
|
1747
1788
|
cloud_id: Optional[str],
|
@@ -1763,26 +1804,27 @@ class CloudController(BaseController):
|
|
1763
1804
|
spec = yaml.safe_load(path.read_text())
|
1764
1805
|
|
1765
1806
|
# Get the existing spec.
|
1766
|
-
|
1807
|
+
existing_resources = self.get_cloud_resources(cloud_id=cloud_id)
|
1767
1808
|
|
1768
|
-
if len(
|
1809
|
+
if len(existing_resources) > len(spec):
|
1769
1810
|
raise ClickException(
|
1770
|
-
"Please use `anyscale cloud
|
1811
|
+
"Please use `anyscale cloud resource delete` to remove cloud resources."
|
1771
1812
|
)
|
1772
|
-
if len(
|
1813
|
+
if len(existing_resources) < len(spec):
|
1773
1814
|
raise ClickException(
|
1774
|
-
"Please use `anyscale cloud
|
1815
|
+
"Please use `anyscale cloud resource create` to add cloud resources."
|
1775
1816
|
)
|
1776
1817
|
|
1777
1818
|
# Diff the existing and new specs
|
1778
|
-
diff = self._generate_diff(
|
1819
|
+
diff = self._generate_diff(
|
1820
|
+
[self._remove_empty_values(r.to_dict()) for r in existing_resources], spec
|
1821
|
+
)
|
1779
1822
|
if not diff:
|
1780
1823
|
self.log.info("No changes detected.")
|
1781
1824
|
return
|
1782
1825
|
|
1783
|
-
|
1784
|
-
|
1785
|
-
for deployment in existing_spec["deployments"]
|
1826
|
+
existing_resources_dict = {
|
1827
|
+
resource.cloud_resource_id: resource for resource in existing_resources
|
1786
1828
|
}
|
1787
1829
|
|
1788
1830
|
updated_deployments: List[CloudDeployment] = []
|
@@ -1790,21 +1832,21 @@ class CloudController(BaseController):
|
|
1790
1832
|
try:
|
1791
1833
|
deployment = CloudDeployment(**d)
|
1792
1834
|
except Exception as e: # noqa: BLE001
|
1793
|
-
raise ClickException(f"Failed to parse
|
1835
|
+
raise ClickException(f"Failed to parse cloud resource: {e}")
|
1794
1836
|
|
1795
|
-
if not deployment.
|
1837
|
+
if not deployment.cloud_resource_id:
|
1796
1838
|
raise ClickException(
|
1797
|
-
"All cloud
|
1839
|
+
"All cloud resources must include a cloud_resource_id."
|
1798
1840
|
)
|
1799
|
-
if deployment.
|
1841
|
+
if deployment.cloud_resource_id not in existing_resources_dict:
|
1800
1842
|
raise ClickException(
|
1801
|
-
f"Cloud
|
1843
|
+
f"Cloud resource {deployment.cloud_resource_id} not found."
|
1802
1844
|
)
|
1803
1845
|
if deployment.provider == CloudProviders.PCP:
|
1804
1846
|
raise ClickException(
|
1805
1847
|
"Please use the `anyscale machine-pool` CLI to update machine pools."
|
1806
1848
|
)
|
1807
|
-
if deployment !=
|
1849
|
+
if deployment != existing_resources_dict[deployment.cloud_resource_id]:
|
1808
1850
|
updated_deployments.append(deployment)
|
1809
1851
|
|
1810
1852
|
# Log the diff and confirm.
|
@@ -1826,42 +1868,42 @@ class CloudController(BaseController):
|
|
1826
1868
|
cloud_id=cloud_id, cloud_deployment=deployment
|
1827
1869
|
):
|
1828
1870
|
raise ClickException(
|
1829
|
-
f"Verification failed for cloud
|
1871
|
+
f"Verification failed for cloud resource {deployment.name or deployment.cloud_resource_id}."
|
1830
1872
|
)
|
1831
1873
|
|
1832
|
-
# Update the
|
1874
|
+
# Update the cloud resources.
|
1833
1875
|
try:
|
1834
|
-
self.api_client.
|
1876
|
+
self.api_client.update_cloud_resources_api_v2_clouds_cloud_id_resources_put(
|
1835
1877
|
cloud_id=cloud_id, cloud_deployment=updated_deployments,
|
1836
1878
|
)
|
1837
1879
|
except Exception as e: # noqa: BLE001
|
1838
|
-
raise ClickException(f"Failed to update cloud
|
1880
|
+
raise ClickException(f"Failed to update cloud resources: {e}")
|
1839
1881
|
|
1840
1882
|
self.log.info(f"Successfully updated cloud {cloud_name or cloud_id}.")
|
1841
1883
|
|
1842
|
-
def
|
1843
|
-
self, cloud_name: str,
|
1884
|
+
def remove_cloud_resource(
|
1885
|
+
self, cloud_name: str, resource_name: str, yes: bool,
|
1844
1886
|
):
|
1845
1887
|
confirm(
|
1846
|
-
f"Please confirm that you would like to remove
|
1888
|
+
f"Please confirm that you would like to remove resource {resource_name} from cloud {cloud_name}.",
|
1847
1889
|
yes,
|
1848
1890
|
)
|
1849
1891
|
|
1850
1892
|
cloud_id, _ = get_cloud_id_and_name(self.api_client, cloud_name=cloud_name)
|
1851
1893
|
try:
|
1852
|
-
with self.log.spinner("Removing cloud
|
1853
|
-
self.api_client.
|
1854
|
-
cloud_id=cloud_id,
|
1894
|
+
with self.log.spinner("Removing cloud resource..."):
|
1895
|
+
self.api_client.remove_cloud_resource_api_v2_clouds_cloud_id_remove_resource_delete(
|
1896
|
+
cloud_id=cloud_id, cloud_resource_name=resource_name,
|
1855
1897
|
)
|
1856
1898
|
except Exception as e: # noqa: BLE001
|
1857
|
-
raise ClickException(f"Failed to remove cloud
|
1899
|
+
raise ClickException(f"Failed to remove cloud resource: {e}")
|
1858
1900
|
|
1859
1901
|
self.log.warning(
|
1860
1902
|
"The trust policy or service account that provides access to Anyscale's control plane needs to be deleted manually if you no longer wish for Anyscale to have access."
|
1861
1903
|
)
|
1862
1904
|
|
1863
1905
|
self.log.info(
|
1864
|
-
f"Successfully removed
|
1906
|
+
f"Successfully removed resource {resource_name} from cloud {cloud_name}!"
|
1865
1907
|
)
|
1866
1908
|
|
1867
1909
|
def get_cloud_config(
|
@@ -2010,7 +2052,7 @@ class CloudController(BaseController):
|
|
2010
2052
|
yes: bool = False,
|
2011
2053
|
) -> bool:
|
2012
2054
|
"""
|
2013
|
-
Verifies a cloud by name or id, including all cloud
|
2055
|
+
Verifies a cloud by name or id, including all cloud resources.
|
2014
2056
|
|
2015
2057
|
Note: If your changes involve operations that may require additional permissions
|
2016
2058
|
(for example, `boto3_session.client("efs").describe_backup_policy`), it's important
|
@@ -2036,15 +2078,15 @@ class CloudController(BaseController):
|
|
2036
2078
|
return False
|
2037
2079
|
|
2038
2080
|
try:
|
2039
|
-
|
2081
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2040
2082
|
cloud_id=cloud_id,
|
2041
2083
|
).results
|
2042
2084
|
except Exception as e: # noqa: BLE001
|
2043
|
-
self.log.error(f"Failed to retrieve cloud
|
2085
|
+
self.log.error(f"Failed to retrieve cloud resources: {e}")
|
2044
2086
|
return False
|
2045
2087
|
|
2046
|
-
if not
|
2047
|
-
self.log.error("No cloud
|
2088
|
+
if not cloud_resources:
|
2089
|
+
self.log.error("No cloud resources found for this cloud")
|
2048
2090
|
return False
|
2049
2091
|
|
2050
2092
|
self.cloud_event_producer.init_trace_context(
|
@@ -2056,31 +2098,35 @@ class CloudController(BaseController):
|
|
2056
2098
|
CloudAnalyticsEventName.COMMAND_START, succeeded=True
|
2057
2099
|
)
|
2058
2100
|
|
2059
|
-
|
2060
|
-
for
|
2101
|
+
cloud_resource_results = []
|
2102
|
+
for cloud_resource in cloud_resources:
|
2061
2103
|
try:
|
2062
|
-
|
2104
|
+
cloud_resource_name = (
|
2105
|
+
cloud_resource.name or cloud_resource.cloud_resource_id
|
2106
|
+
)
|
2063
2107
|
|
2064
|
-
self.log.info(f"Verifying
|
2108
|
+
self.log.info(f"Verifying cloud resource: {cloud_resource_name}")
|
2065
2109
|
result = self.verify_cloud_deployment(
|
2066
2110
|
cloud_id,
|
2067
|
-
|
2111
|
+
cloud_resource,
|
2068
2112
|
strict=strict,
|
2069
2113
|
_use_strict_iam_permissions=_use_strict_iam_permissions,
|
2070
2114
|
boto3_session=boto3_session,
|
2071
2115
|
)
|
2072
|
-
|
2116
|
+
cloud_resource_results.append((cloud_resource_name, result))
|
2073
2117
|
|
2074
2118
|
except (ValueError, TypeError, KeyError, AttributeError, RuntimeError) as e:
|
2075
|
-
|
2076
|
-
|
2119
|
+
cloud_resource_name = getattr(cloud_resource, "name", None) or getattr(
|
2120
|
+
cloud_resource, "cloud_resource_id", "unknown"
|
2121
|
+
)
|
2122
|
+
self.log.error(
|
2123
|
+
f"Failed to verify cloud resource {cloud_resource_name}: {e}"
|
2077
2124
|
)
|
2078
|
-
|
2079
|
-
deployment_results.append((deployment_name, False))
|
2125
|
+
cloud_resource_results.append((cloud_resource_name, False))
|
2080
2126
|
|
2081
|
-
self.
|
2127
|
+
self._print_cloud_resource_verification_results(cloud_resource_results)
|
2082
2128
|
|
2083
|
-
overall_success = all(result for _, result in
|
2129
|
+
overall_success = all(result for _, result in cloud_resource_results)
|
2084
2130
|
|
2085
2131
|
self.cloud_event_producer.produce(
|
2086
2132
|
CloudAnalyticsEventName.RESOURCES_VERIFIED, succeeded=overall_success,
|
@@ -2137,6 +2183,7 @@ class CloudController(BaseController):
|
|
2137
2183
|
strict: bool = False,
|
2138
2184
|
_use_strict_iam_permissions: bool = False, # This should only be used in testing.
|
2139
2185
|
boto3_session: Optional[boto3.Session] = None,
|
2186
|
+
logger: CloudSetupLogger = None,
|
2140
2187
|
) -> bool:
|
2141
2188
|
assert cloud_deployment.region
|
2142
2189
|
assert cloud_deployment.aws_config
|
@@ -2176,6 +2223,7 @@ class CloudController(BaseController):
|
|
2176
2223
|
== NetworkingMode.PRIVATE,
|
2177
2224
|
strict=strict,
|
2178
2225
|
_use_strict_iam_permissions=_use_strict_iam_permissions,
|
2226
|
+
logger=logger,
|
2179
2227
|
)
|
2180
2228
|
|
2181
2229
|
def _get_memorydb_config_for_verification(
|
@@ -2465,26 +2513,26 @@ class CloudController(BaseController):
|
|
2465
2513
|
f"{quota_error_str}\n\nFor instructions on how to increase quotas, visit this link: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html#request-increase"
|
2466
2514
|
)
|
2467
2515
|
|
2468
|
-
def
|
2469
|
-
self,
|
2516
|
+
def _print_cloud_resource_verification_results(
|
2517
|
+
self, cloud_resource_results: List[Tuple[str, bool]]
|
2470
2518
|
) -> None:
|
2471
|
-
"""Print verification results for multiple
|
2519
|
+
"""Print verification results for multiple cloud resources"""
|
2472
2520
|
self.log.info("=" * 60)
|
2473
|
-
self.log.info("
|
2521
|
+
self.log.info("CLOUD RESOURCE VERIFICATION RESULTS:")
|
2474
2522
|
self.log.info("=" * 60)
|
2475
2523
|
|
2476
|
-
for
|
2524
|
+
for cloud_resource_name, success in cloud_resource_results:
|
2477
2525
|
status = "PASSED" if success else "FAILED"
|
2478
|
-
self.log.info(f"{
|
2526
|
+
self.log.info(f"{cloud_resource_name}: {status}")
|
2479
2527
|
|
2480
2528
|
self.log.info("=" * 60)
|
2481
2529
|
|
2482
|
-
passed_count = sum(1 for _, success in
|
2483
|
-
total_count = len(
|
2530
|
+
passed_count = sum(1 for _, success in cloud_resource_results if success)
|
2531
|
+
total_count = len(cloud_resource_results)
|
2484
2532
|
|
2485
2533
|
if passed_count == total_count:
|
2486
2534
|
self.log.info(
|
2487
|
-
f"Overall Result: ALL {total_count}
|
2535
|
+
f"Overall Result: ALL {total_count} cloud resources verified successfully"
|
2488
2536
|
)
|
2489
2537
|
|
2490
2538
|
def register_azure_or_generic_cloud( # noqa: PLR0913
|
@@ -2561,21 +2609,30 @@ class CloudController(BaseController):
|
|
2561
2609
|
# Attempt to create the cloud resource.
|
2562
2610
|
try:
|
2563
2611
|
with self.log.spinner("Registering Anyscale cloud resources..."):
|
2564
|
-
|
2612
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
2565
2613
|
cloud_id=cloud_id,
|
2566
|
-
|
2567
|
-
|
2568
|
-
|
2569
|
-
|
2570
|
-
|
2571
|
-
|
2572
|
-
|
2573
|
-
|
2574
|
-
|
2575
|
-
|
2614
|
+
cloud_deployment=CloudDeployment(
|
2615
|
+
compute_stack=ComputeStack.K8S,
|
2616
|
+
provider=cloud_provider,
|
2617
|
+
region=region,
|
2618
|
+
object_storage=ObjectStorage(
|
2619
|
+
bucket_name=cloud_storage_bucket_name,
|
2620
|
+
region=cloud_storage_bucket_region or region,
|
2621
|
+
endpoint=cloud_storage_bucket_endpoint,
|
2622
|
+
)
|
2623
|
+
if cloud_storage_bucket_name
|
2624
|
+
else None,
|
2625
|
+
file_storage=FileStorage(
|
2626
|
+
mount_targets=mount_targets,
|
2627
|
+
mount_path=nfs_mount_path,
|
2576
2628
|
persistent_volume_claim=persistent_volume_claim,
|
2577
2629
|
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
2578
|
-
)
|
2630
|
+
)
|
2631
|
+
if mount_targets
|
2632
|
+
or persistent_volume_claim
|
2633
|
+
or csi_ephemeral_volume_driver
|
2634
|
+
else None,
|
2635
|
+
kubernetes_config=KubernetesConfig(zones=kubernetes_zones,),
|
2579
2636
|
),
|
2580
2637
|
)
|
2581
2638
|
|
@@ -2598,26 +2655,27 @@ class CloudController(BaseController):
|
|
2598
2655
|
raise ClickException(f"Cloud registration failed! {e}")
|
2599
2656
|
|
2600
2657
|
# TODO (shomilj): Fetch & optionally run the Helm installation here.
|
2601
|
-
|
2602
|
-
#
|
2603
|
-
|
2604
|
-
|
2605
|
-
|
2606
|
-
|
2607
|
-
|
2608
|
-
|
2609
|
-
|
2610
|
-
|
2611
|
-
|
2612
|
-
|
2613
|
-
|
2614
|
-
|
2615
|
-
|
2616
|
-
|
2617
|
-
|
2618
|
-
|
2619
|
-
|
2620
|
-
|
2658
|
+
|
2659
|
+
# Get the cloud resource ID to pass to the helm command.
|
2660
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2661
|
+
cloud_id=cloud_id,
|
2662
|
+
).results
|
2663
|
+
assert (
|
2664
|
+
len(cloud_resources) == 1
|
2665
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
2666
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
2667
|
+
|
2668
|
+
# Use CLI token to helm command
|
2669
|
+
helm_command = self._generate_helm_upgrade_command(
|
2670
|
+
provider=provider,
|
2671
|
+
cloud_deployment_id=cloud_resource_id,
|
2672
|
+
region=region if cloud_provider == CloudProviders.AZURE else None,
|
2673
|
+
kubernetes_zones=kubernetes_zones,
|
2674
|
+
operator_iam_identity=anyscale_operator_iam_identity
|
2675
|
+
if cloud_provider == CloudProviders.AZURE
|
2676
|
+
else None,
|
2677
|
+
anyscale_cli_token=None, # TODO: use $ANYSCALE_CLI_TOKEN placeholder
|
2678
|
+
)
|
2621
2679
|
|
2622
2680
|
self.log.info(
|
2623
2681
|
f"Cloud registration complete! To install the Anyscale operator, run:\n\n{helm_command}"
|
@@ -2710,55 +2768,40 @@ class CloudController(BaseController):
|
|
2710
2768
|
raise
|
2711
2769
|
|
2712
2770
|
try:
|
2713
|
-
|
2714
|
-
|
2715
|
-
|
2716
|
-
|
2717
|
-
|
2718
|
-
|
2719
|
-
|
2720
|
-
|
2721
|
-
|
2722
|
-
|
2723
|
-
|
2771
|
+
cloud_resource = CloudDeployment(
|
2772
|
+
compute_stack=compute_stack,
|
2773
|
+
provider=CloudProviders.AWS,
|
2774
|
+
region=region,
|
2775
|
+
networking_mode=NetworkingMode.PRIVATE
|
2776
|
+
if private_network
|
2777
|
+
else NetworkingMode.PUBLIC,
|
2778
|
+
object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
|
2779
|
+
file_storage=FileStorage(
|
2780
|
+
file_storage_id=efs_id,
|
2781
|
+
persistent_volume_claim=persistent_volume_claim,
|
2782
|
+
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
2783
|
+
)
|
2784
|
+
if efs_id or persistent_volume_claim or csi_ephemeral_volume_driver
|
2785
|
+
else None,
|
2786
|
+
aws_config=AWSConfig(
|
2787
|
+
vpc_id=vpc_id,
|
2788
|
+
subnet_ids=subnet_ids,
|
2789
|
+
security_group_ids=security_group_ids,
|
2724
2790
|
anyscale_iam_role_id=anyscale_iam_role_id,
|
2725
2791
|
external_id=external_id,
|
2792
|
+
cluster_iam_role_id=instance_iam_role_id,
|
2793
|
+
memorydb_cluster_name=memorydb_cluster_id,
|
2794
|
+
),
|
2795
|
+
kubernetes_config=KubernetesConfig(
|
2796
|
+
anyscale_operator_iam_identity=anyscale_operator_iam_identity,
|
2797
|
+
zones=kubernetes_zones,
|
2726
2798
|
)
|
2727
|
-
|
2728
|
-
|
2729
|
-
|
2730
|
-
|
2731
|
-
|
2732
|
-
|
2733
|
-
boto3_session = boto3.Session(region_name=region)
|
2734
|
-
aws_efs_mount_target_ip = _get_aws_efs_mount_target_ip(
|
2735
|
-
boto3_session, efs_id
|
2736
|
-
)
|
2737
|
-
except ClientError as e:
|
2738
|
-
self.log.log_resource_exception(
|
2739
|
-
CloudAnalyticsEventCloudResource.AWS_EFS, e
|
2740
|
-
)
|
2741
|
-
raise e
|
2742
|
-
else:
|
2743
|
-
boto3_session = None
|
2744
|
-
aws_efs_mount_target_ip = None
|
2745
|
-
|
2746
|
-
# When running on the VM compute stack, associate the AWS subnets with their availability zones.
|
2747
|
-
if compute_stack == ComputeStack.VM:
|
2748
|
-
aws_subnet_ids_with_availability_zones = associate_aws_subnets_with_azs(
|
2749
|
-
subnet_ids, region, self.log
|
2750
|
-
)
|
2751
|
-
else:
|
2752
|
-
aws_subnet_ids_with_availability_zones = None
|
2753
|
-
|
2754
|
-
# If memorydb cluster is provided, get the memorydb cluster config.
|
2755
|
-
if memorydb_cluster_id is not None:
|
2756
|
-
memorydb_cluster_config = _get_memorydb_cluster_config(
|
2757
|
-
memorydb_cluster_id, region, self.log
|
2758
|
-
)
|
2759
|
-
else:
|
2760
|
-
memorydb_cluster_config = None
|
2761
|
-
|
2799
|
+
if compute_stack == ComputeStack.K8S
|
2800
|
+
else None,
|
2801
|
+
)
|
2802
|
+
role, iam_role_original_policy = self._preprocess_aws(
|
2803
|
+
cloud_id=cloud_id, deployment=cloud_resource
|
2804
|
+
)
|
2762
2805
|
self.cloud_event_producer.produce(
|
2763
2806
|
CloudAnalyticsEventName.PREPROCESS_COMPLETE, succeeded=True
|
2764
2807
|
)
|
@@ -2792,46 +2835,17 @@ class CloudController(BaseController):
|
|
2792
2835
|
)
|
2793
2836
|
raise ClickException(f"Cloud registration failed! {error}")
|
2794
2837
|
|
2795
|
-
aws_iam_role_arns = None
|
2796
|
-
if compute_stack == ComputeStack.VM:
|
2797
|
-
aws_iam_role_arns = [anyscale_iam_role_id, instance_iam_role_id]
|
2798
|
-
elif compute_stack == ComputeStack.K8S and anyscale_iam_role_id:
|
2799
|
-
aws_iam_role_arns = [anyscale_iam_role_id]
|
2800
|
-
|
2801
2838
|
try:
|
2802
2839
|
# Verify cloud resources meet our requirement
|
2803
|
-
create_cloud_resource = CreateCloudResource(
|
2804
|
-
aws_vpc_id=vpc_id,
|
2805
|
-
aws_subnet_ids_with_availability_zones=aws_subnet_ids_with_availability_zones,
|
2806
|
-
aws_iam_role_arns=aws_iam_role_arns,
|
2807
|
-
aws_security_groups=security_group_ids,
|
2808
|
-
aws_s3_id=cloud_storage_bucket_name[len(S3_STORAGE_PREFIX) :],
|
2809
|
-
aws_efs_id=efs_id,
|
2810
|
-
aws_efs_mount_target_ip=aws_efs_mount_target_ip,
|
2811
|
-
memorydb_cluster_config=memorydb_cluster_config,
|
2812
|
-
compute_stack=compute_stack,
|
2813
|
-
kubernetes_zones=kubernetes_zones,
|
2814
|
-
kubernetes_dataplane_identity=anyscale_operator_iam_identity,
|
2815
|
-
cloud_storage_bucket_name=cloud_storage_bucket_name,
|
2816
|
-
persistent_volume_claim=persistent_volume_claim,
|
2817
|
-
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
2818
|
-
)
|
2819
|
-
|
2820
2840
|
# Verification is only performed for VM compute stack.
|
2821
2841
|
# TODO (shomilj): Add verification to the K8S compute stack as well.
|
2822
2842
|
if compute_stack == ComputeStack.VM:
|
2823
2843
|
with self.log.spinner("Verifying cloud resources...") as spinner:
|
2824
|
-
if boto3_session is None:
|
2825
|
-
boto3_session = boto3.Session(region_name=region)
|
2826
2844
|
if (
|
2827
2845
|
not skip_verifications
|
2828
|
-
and not self.
|
2829
|
-
cloud_resource=create_cloud_resource,
|
2830
|
-
boto3_session=boto3_session,
|
2831
|
-
region=region,
|
2832
|
-
is_bring_your_own_resource=True,
|
2833
|
-
is_private_network=private_network,
|
2846
|
+
and not self.verify_aws_cloud_resources_for_cloud_deployment(
|
2834
2847
|
cloud_id=cloud_id,
|
2848
|
+
cloud_deployment=cloud_resource,
|
2835
2849
|
logger=CloudSetupLogger(spinner_manager=spinner),
|
2836
2850
|
)
|
2837
2851
|
):
|
@@ -2879,12 +2893,9 @@ class CloudController(BaseController):
|
|
2879
2893
|
with self.log.spinner(
|
2880
2894
|
"Updating Anyscale cloud with cloud resource..."
|
2881
2895
|
) as spinner:
|
2882
|
-
#
|
2883
|
-
|
2884
|
-
cloud_id=cloud_id,
|
2885
|
-
update_cloud_with_cloud_resource=UpdateCloudWithCloudResource(
|
2886
|
-
cloud_resource_to_update=create_cloud_resource,
|
2887
|
-
),
|
2896
|
+
# Update cloud with verified cloud resources.
|
2897
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
2898
|
+
cloud_id=cloud_id, cloud_deployment=cloud_resource,
|
2888
2899
|
)
|
2889
2900
|
# For now, only wait for the cloud to be active if the compute stack is VM.
|
2890
2901
|
# TODO (shomilj): support this fully for Kubernetes after provider metadata
|
@@ -2892,7 +2903,15 @@ class CloudController(BaseController):
|
|
2892
2903
|
if compute_stack == ComputeStack.VM:
|
2893
2904
|
self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.AWS)
|
2894
2905
|
if compute_stack == ComputeStack.K8S:
|
2895
|
-
|
2906
|
+
# Get the cloud resource ID to pass to the helm command.
|
2907
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
2908
|
+
cloud_id=cloud_id,
|
2909
|
+
).results
|
2910
|
+
assert (
|
2911
|
+
len(cloud_resources) == 1
|
2912
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
2913
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
2914
|
+
|
2896
2915
|
helm_command = self._generate_helm_upgrade_command(
|
2897
2916
|
provider="aws",
|
2898
2917
|
cloud_deployment_id=cloud_resource_id,
|
@@ -2938,7 +2957,12 @@ class CloudController(BaseController):
|
|
2938
2957
|
).start_verification(cloud_id, CloudProviders.AWS, functions_to_verify, yes)
|
2939
2958
|
|
2940
2959
|
def verify_gcp_cloud_resources_from_cloud_deployment(
|
2941
|
-
self,
|
2960
|
+
self,
|
2961
|
+
cloud_id: str,
|
2962
|
+
cloud_deployment: CloudDeployment,
|
2963
|
+
strict: bool = False,
|
2964
|
+
yes: bool = False,
|
2965
|
+
is_private_service_cloud: bool = False,
|
2942
2966
|
) -> bool:
|
2943
2967
|
assert cloud_deployment.region
|
2944
2968
|
assert cloud_deployment.gcp_config
|
@@ -2970,6 +2994,8 @@ class CloudController(BaseController):
|
|
2970
2994
|
cloud_id=cloud_id,
|
2971
2995
|
host_project_id=gcp_config.host_project_id,
|
2972
2996
|
strict=strict,
|
2997
|
+
yes=yes,
|
2998
|
+
is_private_service_cloud=is_private_service_cloud,
|
2973
2999
|
)
|
2974
3000
|
|
2975
3001
|
def verify_gcp_cloud_resources_from_create_cloud_resource(
|
@@ -3184,7 +3210,6 @@ class CloudController(BaseController):
|
|
3184
3210
|
functions_to_verify = self._validate_functional_verification_args(
|
3185
3211
|
functional_verify
|
3186
3212
|
)
|
3187
|
-
gcp_utils = try_import_gcp_utils()
|
3188
3213
|
|
3189
3214
|
# Create a cloud without cloud resources first
|
3190
3215
|
# Provider ID is optional for K8s clouds.
|
@@ -3260,11 +3285,6 @@ class CloudController(BaseController):
|
|
3260
3285
|
raise
|
3261
3286
|
|
3262
3287
|
try:
|
3263
|
-
# Set defaults for Kubernetes clouds.
|
3264
|
-
if compute_stack == ComputeStack.K8S:
|
3265
|
-
instance_service_account_email = ""
|
3266
|
-
subnet_names = []
|
3267
|
-
|
3268
3288
|
enable_filestore = filestore_location and filestore_instance_id
|
3269
3289
|
|
3270
3290
|
# Normally, for Kubernetes clouds, we don't need a VPC name, since networking is managed by Kubernetes.
|
@@ -3278,74 +3298,62 @@ class CloudController(BaseController):
|
|
3278
3298
|
if (enable_filestore or memorystore_instance_name) and not project_id:
|
3279
3299
|
raise ClickException("Please provide a project ID.")
|
3280
3300
|
|
3281
|
-
if project_id:
|
3282
|
-
factory = gcp_utils.get_google_cloud_client_factory(
|
3283
|
-
self.log, project_id
|
3284
|
-
)
|
3285
|
-
|
3286
|
-
if enable_filestore:
|
3287
|
-
filestore_config = gcp_utils.get_gcp_filestore_config(
|
3288
|
-
factory,
|
3289
|
-
project_id,
|
3290
|
-
vpc_name,
|
3291
|
-
filestore_location,
|
3292
|
-
filestore_instance_id,
|
3293
|
-
self.log,
|
3294
|
-
)
|
3295
|
-
else:
|
3296
|
-
filestore_config = GCPFileStoreConfig(
|
3297
|
-
instance_name="", mount_target_ip="", root_dir=""
|
3298
|
-
)
|
3299
|
-
if compute_stack == ComputeStack.K8S:
|
3300
|
-
# Set vpc_name to empty string for Kubernetes clouds
|
3301
|
-
vpc_name = ""
|
3302
|
-
|
3303
|
-
if memorystore_instance_name:
|
3304
|
-
memorystore_instance_config = gcp_utils.get_gcp_memorystore_config(
|
3305
|
-
factory, memorystore_instance_name
|
3306
|
-
)
|
3307
|
-
else:
|
3308
|
-
memorystore_instance_config = None
|
3309
|
-
|
3310
3301
|
if not cloud_storage_bucket_name.startswith(GCS_STORAGE_PREFIX):
|
3311
3302
|
cloud_storage_bucket_name = (
|
3312
3303
|
GCS_STORAGE_PREFIX + cloud_storage_bucket_name
|
3313
3304
|
)
|
3314
3305
|
|
3315
|
-
|
3316
|
-
create_cloud_resource_gcp = CreateCloudResourceGCP(
|
3317
|
-
gcp_vpc_id=vpc_name,
|
3318
|
-
gcp_subnet_ids=subnet_names,
|
3319
|
-
gcp_cluster_node_service_account_email=instance_service_account_email,
|
3320
|
-
gcp_anyscale_iam_service_account_email=anyscale_service_account_email
|
3321
|
-
or "",
|
3322
|
-
gcp_filestore_config=filestore_config,
|
3323
|
-
gcp_firewall_policy_ids=firewall_policy_names,
|
3324
|
-
gcp_cloud_storage_bucket_id=cloud_storage_bucket_name[
|
3325
|
-
len(GCS_STORAGE_PREFIX) :
|
3326
|
-
],
|
3327
|
-
memorystore_instance_config=memorystore_instance_config,
|
3306
|
+
cloud_resource = CloudDeployment(
|
3328
3307
|
compute_stack=compute_stack,
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3308
|
+
provider=CloudProviders.GCP,
|
3309
|
+
region=region,
|
3310
|
+
networking_mode=NetworkingMode.PRIVATE
|
3311
|
+
if private_network
|
3312
|
+
else NetworkingMode.PUBLIC,
|
3313
|
+
object_storage=ObjectStorage(bucket_name=cloud_storage_bucket_name),
|
3314
|
+
file_storage=FileStorage(
|
3315
|
+
file_storage_id="projects/{}/locations/{}/instances/{}".format(
|
3316
|
+
project_id, filestore_location, filestore_instance_id
|
3317
|
+
)
|
3318
|
+
if filestore_instance_id
|
3319
|
+
else None,
|
3320
|
+
persistent_volume_claim=persistent_volume_claim,
|
3321
|
+
csi_ephemeral_volume_driver=csi_ephemeral_volume_driver,
|
3322
|
+
)
|
3323
|
+
if filestore_instance_id
|
3324
|
+
or persistent_volume_claim
|
3325
|
+
or csi_ephemeral_volume_driver
|
3326
|
+
else None,
|
3327
|
+
gcp_config=GCPConfig(
|
3328
|
+
project_id=project_id,
|
3329
|
+
host_project_id=host_project_id,
|
3330
|
+
provider_name=provider_id,
|
3331
|
+
vpc_name=vpc_name,
|
3332
|
+
subnet_names=subnet_names,
|
3333
|
+
firewall_policy_names=firewall_policy_names,
|
3334
|
+
anyscale_service_account_email=anyscale_service_account_email,
|
3335
|
+
cluster_service_account_email=instance_service_account_email,
|
3336
|
+
memorystore_instance_name=memorystore_instance_name,
|
3337
|
+
),
|
3338
|
+
kubernetes_config=KubernetesConfig(
|
3339
|
+
anyscale_operator_iam_identity=anyscale_operator_iam_identity,
|
3340
|
+
zones=kubernetes_zones,
|
3341
|
+
)
|
3342
|
+
if compute_stack == ComputeStack.K8S
|
3343
|
+
else None,
|
3334
3344
|
)
|
3335
3345
|
|
3346
|
+
self._preprocess_gcp(cloud_resource)
|
3347
|
+
|
3336
3348
|
# Verification is only performed for VM compute stack.
|
3337
3349
|
# TODO (shomilj): Add verification to the K8S compute stack as well.
|
3338
3350
|
if compute_stack == ComputeStack.VM:
|
3339
3351
|
if (
|
3340
3352
|
not skip_verifications
|
3341
|
-
and not self.
|
3342
|
-
cloud_resource=create_cloud_resource_gcp,
|
3343
|
-
project_id=project_id,
|
3344
|
-
host_project_id=host_project_id,
|
3345
|
-
region=region,
|
3353
|
+
and not self.verify_gcp_cloud_resources_from_cloud_deployment(
|
3346
3354
|
cloud_id=cloud_id,
|
3355
|
+
cloud_deployment=cloud_resource,
|
3347
3356
|
yes=yes,
|
3348
|
-
factory=factory,
|
3349
3357
|
is_private_service_cloud=is_private_service_cloud,
|
3350
3358
|
)
|
3351
3359
|
):
|
@@ -3379,13 +3387,10 @@ class CloudController(BaseController):
|
|
3379
3387
|
raise ClickException(f"Cloud registration failed! {e}")
|
3380
3388
|
|
3381
3389
|
try:
|
3382
|
-
# update cloud with verified cloud resources
|
3383
3390
|
with self.log.spinner("Updating Anyscale cloud with cloud resources..."):
|
3384
|
-
|
3385
|
-
|
3386
|
-
|
3387
|
-
cloud_resource_to_update=create_cloud_resource_gcp,
|
3388
|
-
),
|
3391
|
+
# Update cloud with verified cloud resources.
|
3392
|
+
self.api_client.add_cloud_resource_api_v2_clouds_cloud_id_add_resource_put(
|
3393
|
+
cloud_id=cloud_id, cloud_deployment=cloud_resource,
|
3389
3394
|
)
|
3390
3395
|
# For now, only wait for the cloud to be active if the compute stack is VM.
|
3391
3396
|
# TODO (shomilj): support this fully for Kubernetes after provider metadata
|
@@ -3393,7 +3398,15 @@ class CloudController(BaseController):
|
|
3393
3398
|
if compute_stack == ComputeStack.VM:
|
3394
3399
|
self.wait_for_cloud_to_be_active(cloud_id, CloudProviders.GCP)
|
3395
3400
|
if compute_stack == ComputeStack.K8S:
|
3396
|
-
|
3401
|
+
# Get the cloud resource ID to pass to the helm command.
|
3402
|
+
cloud_resources = self.api_client.get_cloud_resources_api_v2_clouds_cloud_id_resources_get(
|
3403
|
+
cloud_id=cloud_id,
|
3404
|
+
).results
|
3405
|
+
assert (
|
3406
|
+
len(cloud_resources) == 1
|
3407
|
+
), f"Expected 1 cloud resource, got {len(cloud_resources)}"
|
3408
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
3409
|
+
|
3397
3410
|
helm_command = self._generate_helm_upgrade_command(
|
3398
3411
|
provider="gcp",
|
3399
3412
|
cloud_deployment_id=cloud_resource_id,
|
@@ -3513,13 +3526,6 @@ class CloudController(BaseController):
|
|
3513
3526
|
)
|
3514
3527
|
|
3515
3528
|
cloud = response.result
|
3516
|
-
except ClickException as e:
|
3517
|
-
raise ClickException(
|
3518
|
-
f"Failed to update cloud state to deleting for cloud {cloud_name}: {e}"
|
3519
|
-
)
|
3520
|
-
|
3521
|
-
# Clean up cloud resources
|
3522
|
-
try:
|
3523
3529
|
if cloud_provider == CloudProviders.AWS:
|
3524
3530
|
if not (cloud.is_aioa or cloud.compute_stack == ComputeStack.K8S):
|
3525
3531
|
# Delete services resources
|