anyscale 0.26.69__py3-none-any.whl → 0.26.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/_private/anyscale_client/anyscale_client.py +67 -1
- anyscale/_private/anyscale_client/common.py +20 -1
- anyscale/_private/anyscale_client/fake_anyscale_client.py +77 -10
- anyscale/client/README.md +14 -4
- anyscale/client/openapi_client/__init__.py +11 -4
- anyscale/client/openapi_client/api/default_api.py +462 -23
- anyscale/client/openapi_client/models/__init__.py +11 -4
- anyscale/client/openapi_client/models/api_key_info.py +29 -3
- anyscale/client/openapi_client/models/apply_autoscaling_config_update_model.py +350 -0
- anyscale/client/openapi_client/models/apply_production_service_multi_version_v2_model.py +207 -0
- anyscale/client/openapi_client/models/apply_production_service_v2_model.py +31 -3
- anyscale/client/openapi_client/models/baseimagesenum.py +70 -1
- anyscale/client/openapi_client/models/cloud_data_bucket_file_type.py +2 -1
- anyscale/client/openapi_client/models/{oauthconnectionresponse_response.py → clouddeployment_response.py} +11 -11
- anyscale/client/openapi_client/models/create_experimental_workspace.py +29 -1
- anyscale/client/openapi_client/models/create_workspace_from_template.py +29 -1
- anyscale/client/openapi_client/models/create_workspace_template_version.py +31 -3
- anyscale/client/openapi_client/models/decorated_list_service_api_model.py +58 -1
- anyscale/client/openapi_client/models/decorated_production_service_v2_api_model.py +60 -3
- anyscale/client/openapi_client/models/decorated_service_event_api_model.py +3 -3
- anyscale/client/openapi_client/models/describe_machine_pool_machines_filters.py +33 -5
- anyscale/client/openapi_client/models/describe_machine_pool_workloads_filters.py +33 -5
- anyscale/client/openapi_client/models/{service_event_level.py → entity_type.py} +9 -9
- anyscale/client/openapi_client/models/event_level.py +2 -1
- anyscale/client/openapi_client/models/job_event_fields.py +206 -0
- anyscale/client/openapi_client/models/machine_type_partition_filter.py +152 -0
- anyscale/client/openapi_client/models/partition_info.py +30 -1
- anyscale/client/openapi_client/models/production_job_event.py +3 -3
- anyscale/client/openapi_client/models/rollout_strategy.py +2 -1
- anyscale/client/openapi_client/models/service_event_fields.py +318 -0
- anyscale/client/openapi_client/models/supportedbaseimagesenum.py +70 -1
- anyscale/client/openapi_client/models/task_summary_config.py +29 -3
- anyscale/client/openapi_client/models/task_table_config.py +29 -3
- anyscale/client/openapi_client/models/unified_event.py +377 -0
- anyscale/client/openapi_client/models/{ha_job_event_level.py → unified_origin_filter.py} +21 -9
- anyscale/client/openapi_client/models/unifiedevent_list_response.py +147 -0
- anyscale/client/openapi_client/models/workspace_event_fields.py +122 -0
- anyscale/client/openapi_client/models/workspace_template_version.py +30 -1
- anyscale/client/openapi_client/models/workspace_template_version_data_object.py +30 -1
- anyscale/cloud/models.py +2 -2
- anyscale/commands/cloud_commands.py +133 -2
- anyscale/commands/job_commands.py +1 -1
- anyscale/commands/service_commands.py +130 -67
- anyscale/commands/setup_k8s.py +546 -31
- anyscale/controllers/cloud_controller.py +15 -2
- anyscale/controllers/kubernetes_verifier.py +80 -66
- anyscale/job/_private/job_sdk.py +47 -1
- anyscale/job/commands.py +3 -0
- anyscale/sdk/anyscale_client/models/apply_production_service_v2_model.py +31 -3
- anyscale/sdk/anyscale_client/models/apply_service_model.py +31 -3
- anyscale/sdk/anyscale_client/models/baseimagesenum.py +70 -1
- anyscale/sdk/anyscale_client/models/rollout_strategy.py +2 -1
- anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +70 -1
- anyscale/service/__init__.py +11 -3
- anyscale/service/_private/service_sdk.py +361 -35
- anyscale/service/commands.py +15 -3
- anyscale/service/models.py +12 -0
- anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
- anyscale/version.py +1 -1
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/METADATA +1 -1
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/RECORD +66 -59
- anyscale/client/openapi_client/models/o_auth_connection_response.py +0 -229
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/WHEEL +0 -0
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/top_level.txt +0 -0
anyscale/commands/setup_k8s.py
CHANGED
|
@@ -76,14 +76,20 @@ class KubernetesCloudSetupCommand:
|
|
|
76
76
|
yes: bool,
|
|
77
77
|
values_file: Optional[str] = None,
|
|
78
78
|
operator_chart: Optional[str] = None,
|
|
79
|
+
cloud_id: Optional[str] = None,
|
|
80
|
+
resource_name: Optional[str] = None,
|
|
79
81
|
) -> None:
|
|
80
82
|
"""
|
|
81
83
|
Main entry point for Kubernetes cloud setup.
|
|
82
84
|
|
|
85
|
+
This method handles both:
|
|
86
|
+
1. Creating a new cloud (when cloud_id is None)
|
|
87
|
+
2. Adding a resource to an existing cloud (when cloud_id is provided)
|
|
88
|
+
|
|
83
89
|
Args:
|
|
84
90
|
provider: Cloud provider (aws, gcp)
|
|
85
91
|
region: AWS/GCP region
|
|
86
|
-
name: Name for the Anyscale cloud
|
|
92
|
+
name: Name for the Anyscale cloud. If cloud_id is not provided, this will be used to create a new cloud.
|
|
87
93
|
cluster_name: Kubernetes cluster name/context
|
|
88
94
|
namespace: Namespace for the Anyscale operator
|
|
89
95
|
project_id: GCP project ID (required for GCP)
|
|
@@ -91,16 +97,37 @@ class KubernetesCloudSetupCommand:
|
|
|
91
97
|
yes: Skip confirmation prompts
|
|
92
98
|
values_file: Optional custom path for Helm values file
|
|
93
99
|
operator_chart: Optional path to operator chart (skips helm repo add/update)
|
|
100
|
+
cloud_id: Optional cloud ID for the Anyscale cloud to add the resource to
|
|
101
|
+
resource_name: Optional name for the cloud resource (will be auto-generated if not provided)
|
|
94
102
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
103
|
+
# Determine if we're creating a new cloud or adding to existing
|
|
104
|
+
create_cloud = cloud_id is None
|
|
105
|
+
|
|
106
|
+
# Validate cloud_id is provided when adding to existing cloud
|
|
107
|
+
if not create_cloud:
|
|
108
|
+
assert (
|
|
109
|
+
cloud_id
|
|
110
|
+
), "cloud_id is required when adding a resource to an existing cloud"
|
|
111
|
+
|
|
112
|
+
# Set up logging message based on mode
|
|
113
|
+
if create_cloud:
|
|
114
|
+
setup_message = (
|
|
115
|
+
f"Setting up Kubernetes cloud '{name}' on {provider.upper()}"
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
setup_message = f"Setting up Kubernetes cloud resource for '{name}' on {provider.upper()}"
|
|
119
|
+
|
|
120
|
+
self.log.open_block("Setup", setup_message)
|
|
98
121
|
|
|
99
122
|
# Set confirmation flag
|
|
100
123
|
self.skip_confirmation = yes
|
|
101
124
|
|
|
125
|
+
# Track what resources were created for cleanup messaging
|
|
126
|
+
infrastructure = None
|
|
127
|
+
cluster_info = None
|
|
128
|
+
cloud_resource_id = None
|
|
129
|
+
|
|
102
130
|
try:
|
|
103
|
-
# Step 0: Check required CLI tools are installed
|
|
104
131
|
self._check_required_tools(provider)
|
|
105
132
|
|
|
106
133
|
# Step 1: Prompt for namespace BEFORE infrastructure setup
|
|
@@ -119,30 +146,74 @@ class KubernetesCloudSetupCommand:
|
|
|
119
146
|
provider, region, name, cluster_info
|
|
120
147
|
)
|
|
121
148
|
|
|
122
|
-
# Step 4: Register cloud
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
149
|
+
# Step 4: Register cloud OR create cloud resource
|
|
150
|
+
if create_cloud:
|
|
151
|
+
# Register new cloud with Anyscale
|
|
152
|
+
cloud_id = self._register_cloud(
|
|
153
|
+
name, provider, region, infrastructure, cluster_info
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Get the cloud resource ID from the newly registered cloud
|
|
157
|
+
cloud_resources = self.cloud_controller.get_decorated_cloud_resources(
|
|
158
|
+
cloud_id
|
|
159
|
+
)
|
|
160
|
+
if not cloud_resources:
|
|
161
|
+
raise click.ClickException(
|
|
162
|
+
"No cloud resources found after registration"
|
|
163
|
+
)
|
|
164
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
|
165
|
+
else:
|
|
166
|
+
# Should have been validated earlier, but just in case
|
|
167
|
+
assert (
|
|
168
|
+
cloud_id
|
|
169
|
+
), "cloud_id is required when adding a resource to an existing cloud"
|
|
170
|
+
|
|
171
|
+
# Create cloud resource in existing cloud
|
|
172
|
+
cloud_resource_id = self._create_cloud_resource(
|
|
173
|
+
cloud_id,
|
|
174
|
+
provider,
|
|
175
|
+
region,
|
|
176
|
+
infrastructure,
|
|
177
|
+
cluster_info,
|
|
178
|
+
resource_name,
|
|
179
|
+
)
|
|
126
180
|
|
|
127
181
|
# Step 5: Install Anyscale operator
|
|
128
182
|
self._install_operator(
|
|
129
|
-
|
|
183
|
+
cloud_resource_id,
|
|
130
184
|
provider,
|
|
131
185
|
region,
|
|
132
186
|
final_namespace,
|
|
133
187
|
infrastructure,
|
|
134
188
|
values_file,
|
|
135
189
|
operator_chart,
|
|
190
|
+
skip_confirmation=yes,
|
|
136
191
|
)
|
|
137
192
|
|
|
138
193
|
# Step 6: Verify installation
|
|
139
194
|
if functional_verify:
|
|
140
|
-
self._verify_installation(
|
|
195
|
+
self._verify_installation(
|
|
196
|
+
cloud_id, final_namespace, cluster_info, cloud_resource_id
|
|
197
|
+
)
|
|
141
198
|
|
|
142
199
|
self.log.close_block("Setup")
|
|
143
|
-
|
|
200
|
+
if create_cloud:
|
|
201
|
+
self.log.info(
|
|
202
|
+
f"Kubernetes cloud '{name}' setup completed successfully!"
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
self.log.info(
|
|
206
|
+
f"Kubernetes cloud resource setup for '{name}' completed successfully!"
|
|
207
|
+
)
|
|
144
208
|
except Exception: # noqa: BLE001
|
|
145
209
|
self.log.close_block("Setup")
|
|
210
|
+
self._handle_setup_failure(
|
|
211
|
+
provider,
|
|
212
|
+
infrastructure,
|
|
213
|
+
cloud_id,
|
|
214
|
+
name,
|
|
215
|
+
is_cloud_resource_setup=not create_cloud,
|
|
216
|
+
)
|
|
146
217
|
raise
|
|
147
218
|
|
|
148
219
|
def _debug(self, *msg: str) -> None:
|
|
@@ -1131,14 +1202,16 @@ class KubernetesCloudSetupCommand:
|
|
|
1131
1202
|
self.cloud_controller.log.info = original_log_info
|
|
1132
1203
|
|
|
1133
1204
|
self._debug("Cloud registration completed, fetching cloud ID...")
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1205
|
+
# Use get_cloud_id_and_name helper to fetch the registered cloud
|
|
1206
|
+
from anyscale.cloud_utils import get_cloud_id_and_name
|
|
1207
|
+
|
|
1208
|
+
try:
|
|
1209
|
+
cloud_id, _ = get_cloud_id_and_name(
|
|
1210
|
+
self.cloud_controller.api_client, cloud_name=name
|
|
1211
|
+
)
|
|
1212
|
+
except Exception as e: # noqa: BLE001
|
|
1213
|
+
raise click.ClickException(f"Failed to find registered cloud: {e}")
|
|
1140
1214
|
|
|
1141
|
-
cloud_id = getattr(cloud, "id", None) or getattr(cloud, "cloud_id", None)
|
|
1142
1215
|
if not cloud_id:
|
|
1143
1216
|
raise click.ClickException(
|
|
1144
1217
|
"Failed to get cloud ID from registered cloud"
|
|
@@ -1162,27 +1235,23 @@ class KubernetesCloudSetupCommand:
|
|
|
1162
1235
|
|
|
1163
1236
|
def _install_operator( # noqa: PLR0913
|
|
1164
1237
|
self,
|
|
1165
|
-
|
|
1238
|
+
cloud_resource_id: str,
|
|
1166
1239
|
provider: str,
|
|
1167
1240
|
region: str,
|
|
1168
1241
|
namespace: str,
|
|
1169
1242
|
infrastructure: InfrastructureResources,
|
|
1170
1243
|
values_file: Optional[str] = None,
|
|
1171
1244
|
operator_chart: Optional[str] = None,
|
|
1245
|
+
skip_confirmation: bool = False,
|
|
1172
1246
|
) -> None:
|
|
1173
1247
|
"""Install the Anyscale operator using Helm."""
|
|
1174
1248
|
self.log.info("Installing Anyscale operator...", block_label="Setup")
|
|
1175
1249
|
|
|
1176
|
-
# Get cloud resources to get the cloud resource ID
|
|
1177
|
-
cloud_resources = self.cloud_controller.get_decorated_cloud_resources(cloud_id)
|
|
1178
|
-
|
|
1179
|
-
if not cloud_resources:
|
|
1180
|
-
raise click.ClickException("No cloud resources found")
|
|
1181
|
-
|
|
1182
|
-
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
|
1183
|
-
|
|
1184
1250
|
release_name = "anyscale-operator"
|
|
1185
1251
|
|
|
1252
|
+
# Prompt user about nginx ingress installation
|
|
1253
|
+
install_nginx = self._prompt_for_nginx_ingress(skip_confirmation)
|
|
1254
|
+
|
|
1186
1255
|
# Generate Helm command and extract --set-string flags from it
|
|
1187
1256
|
self._debug("Generating Helm command to extract parameters...")
|
|
1188
1257
|
helm_command = self.cloud_controller._generate_helm_upgrade_command( # noqa: SLF001
|
|
@@ -1203,6 +1272,7 @@ class KubernetesCloudSetupCommand:
|
|
|
1203
1272
|
infrastructure=infrastructure,
|
|
1204
1273
|
custom_path=values_file,
|
|
1205
1274
|
additional_values=set_string_values,
|
|
1275
|
+
install_nginx_ingress=install_nginx,
|
|
1206
1276
|
)
|
|
1207
1277
|
|
|
1208
1278
|
# Determine chart reference based on operator_chart parameter
|
|
@@ -1352,6 +1422,36 @@ class KubernetesCloudSetupCommand:
|
|
|
1352
1422
|
|
|
1353
1423
|
return final_namespace
|
|
1354
1424
|
|
|
1425
|
+
def _prompt_for_nginx_ingress(self, skip_confirmation: bool = False) -> bool:
|
|
1426
|
+
"""Prompt user whether to install nginx ingress subchart."""
|
|
1427
|
+
if skip_confirmation:
|
|
1428
|
+
self.log.info("Using default: nginx ingress subchart will be installed")
|
|
1429
|
+
return True
|
|
1430
|
+
|
|
1431
|
+
self.log.info(
|
|
1432
|
+
"The Anyscale operator can install an nginx ingress controller as part of the setup.",
|
|
1433
|
+
block_label="Setup",
|
|
1434
|
+
)
|
|
1435
|
+
self.log.info(
|
|
1436
|
+
"If you already have an ingress controller installed, you may want to skip this.",
|
|
1437
|
+
block_label="Setup",
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
response = click.confirm(
|
|
1441
|
+
"Do you want to install the nginx ingress subchart?", default=True
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
if response:
|
|
1445
|
+
self.log.info(
|
|
1446
|
+
"nginx ingress subchart will be installed", block_label="Setup"
|
|
1447
|
+
)
|
|
1448
|
+
else:
|
|
1449
|
+
self.log.info(
|
|
1450
|
+
"nginx ingress subchart will NOT be installed", block_label="Setup"
|
|
1451
|
+
)
|
|
1452
|
+
|
|
1453
|
+
return response
|
|
1454
|
+
|
|
1355
1455
|
def _generate_helm_values_file( # noqa: PLR0913
|
|
1356
1456
|
self,
|
|
1357
1457
|
provider: str,
|
|
@@ -1361,6 +1461,7 @@ class KubernetesCloudSetupCommand:
|
|
|
1361
1461
|
infrastructure: InfrastructureResources,
|
|
1362
1462
|
custom_path: Optional[str] = None,
|
|
1363
1463
|
additional_values: Optional[Dict[str, str]] = None,
|
|
1464
|
+
install_nginx_ingress: bool = True,
|
|
1364
1465
|
) -> str:
|
|
1365
1466
|
"""Generate Helm values file and save it locally."""
|
|
1366
1467
|
self.log.info("Generating Helm values file...")
|
|
@@ -1380,7 +1481,7 @@ class KubernetesCloudSetupCommand:
|
|
|
1380
1481
|
self._set_nested_value(
|
|
1381
1482
|
values, "global.auth.iamIdentity", infrastructure.iam_role_arn
|
|
1382
1483
|
)
|
|
1383
|
-
self._set_nested_value(values, "ingress-nginx.enabled",
|
|
1484
|
+
self._set_nested_value(values, "ingress-nginx.enabled", install_nginx_ingress)
|
|
1384
1485
|
|
|
1385
1486
|
# Add region for AWS only (using global.aws.region)
|
|
1386
1487
|
# Region field is deprecated for other providers
|
|
@@ -1432,7 +1533,11 @@ class KubernetesCloudSetupCommand:
|
|
|
1432
1533
|
)
|
|
1433
1534
|
|
|
1434
1535
|
def _verify_installation(
|
|
1435
|
-
self,
|
|
1536
|
+
self,
|
|
1537
|
+
cloud_id: str,
|
|
1538
|
+
namespace: str,
|
|
1539
|
+
cluster_info: ClusterInfo,
|
|
1540
|
+
cloud_resource_id: Optional[str] = None,
|
|
1436
1541
|
) -> None:
|
|
1437
1542
|
"""Verify the Kubernetes installation."""
|
|
1438
1543
|
self.log.info("Verifying installation...")
|
|
@@ -1443,7 +1548,20 @@ class KubernetesCloudSetupCommand:
|
|
|
1443
1548
|
if not cloud_resources:
|
|
1444
1549
|
raise click.ClickException("No cloud resources found for verification")
|
|
1445
1550
|
|
|
1446
|
-
|
|
1551
|
+
# Find the specific cloud resource if cloud_resource_id is provided
|
|
1552
|
+
cloud_deployment = None
|
|
1553
|
+
if cloud_resource_id:
|
|
1554
|
+
for resource in cloud_resources:
|
|
1555
|
+
if resource.cloud_resource_id == cloud_resource_id:
|
|
1556
|
+
cloud_deployment = resource
|
|
1557
|
+
break
|
|
1558
|
+
if not cloud_deployment:
|
|
1559
|
+
raise click.ClickException(
|
|
1560
|
+
f"Could not find cloud resource with ID {cloud_resource_id}"
|
|
1561
|
+
)
|
|
1562
|
+
else:
|
|
1563
|
+
# Fallback to first resource for backward compatibility
|
|
1564
|
+
cloud_deployment = cloud_resources[0]
|
|
1447
1565
|
|
|
1448
1566
|
# Use the existing Kubernetes verifier
|
|
1449
1567
|
verifier = KubernetesCloudDeploymentVerifier(
|
|
@@ -1465,6 +1583,314 @@ class KubernetesCloudSetupCommand:
|
|
|
1465
1583
|
self.log.error("Verification failed - please check the logs above")
|
|
1466
1584
|
raise click.ClickException("Installation verification failed")
|
|
1467
1585
|
|
|
1586
|
+
def _create_cloud_resource( # noqa: PLR0912
|
|
1587
|
+
self,
|
|
1588
|
+
cloud_id: str,
|
|
1589
|
+
provider: str,
|
|
1590
|
+
region: str,
|
|
1591
|
+
infrastructure: InfrastructureResources,
|
|
1592
|
+
cluster_info: ClusterInfo,
|
|
1593
|
+
resource_name: Optional[str],
|
|
1594
|
+
) -> str:
|
|
1595
|
+
"""
|
|
1596
|
+
Create a cloud resource in an existing cloud and return the cloud_resource_id.
|
|
1597
|
+
|
|
1598
|
+
Args:
|
|
1599
|
+
cloud_id: ID of the existing cloud
|
|
1600
|
+
provider: Cloud provider (aws, gcp)
|
|
1601
|
+
region: Cloud region
|
|
1602
|
+
infrastructure: Infrastructure resources created
|
|
1603
|
+
cluster_info: Cluster information
|
|
1604
|
+
resource_name: Name for the cloud resource (optional, will be auto-generated if not provided)
|
|
1605
|
+
|
|
1606
|
+
Returns:
|
|
1607
|
+
The cloud_resource_id of the created resource
|
|
1608
|
+
"""
|
|
1609
|
+
self.log.info("Creating cloud resource in Anyscale...", block_label="Setup")
|
|
1610
|
+
if resource_name:
|
|
1611
|
+
self.log.info(f"Using resource name: {resource_name}", block_label="Setup")
|
|
1612
|
+
else:
|
|
1613
|
+
self.log.info("Resource name will be auto-generated", block_label="Setup")
|
|
1614
|
+
|
|
1615
|
+
if provider == "aws":
|
|
1616
|
+
# Dynamically determine availability zones from the EKS cluster
|
|
1617
|
+
zones = self._get_eks_availability_zones(cluster_info.cluster_name, region)
|
|
1618
|
+
|
|
1619
|
+
cloud_deployment = CloudDeployment(
|
|
1620
|
+
name=resource_name,
|
|
1621
|
+
provider=CloudProviders.AWS,
|
|
1622
|
+
region=region,
|
|
1623
|
+
compute_stack=ComputeStack.K8S,
|
|
1624
|
+
object_storage=ObjectStorage(
|
|
1625
|
+
bucket_name=infrastructure.bucket_name, region=region
|
|
1626
|
+
),
|
|
1627
|
+
aws_config=AWSConfig(),
|
|
1628
|
+
kubernetes_config=OpenAPIKubernetesConfig(
|
|
1629
|
+
anyscale_operator_iam_identity=infrastructure.iam_role_arn,
|
|
1630
|
+
zones=zones,
|
|
1631
|
+
),
|
|
1632
|
+
)
|
|
1633
|
+
elif provider == "gcp":
|
|
1634
|
+
assert infrastructure.project_id, "Project ID is required for GCP"
|
|
1635
|
+
|
|
1636
|
+
from anyscale.client.openapi_client.models import GCPConfig
|
|
1637
|
+
|
|
1638
|
+
# Dynamically determine zones from the GKE cluster
|
|
1639
|
+
zones = self._get_gke_zones(
|
|
1640
|
+
cluster_info.cluster_name, region, infrastructure.project_id
|
|
1641
|
+
)
|
|
1642
|
+
|
|
1643
|
+
cloud_deployment = CloudDeployment(
|
|
1644
|
+
name=resource_name,
|
|
1645
|
+
provider=CloudProviders.GCP,
|
|
1646
|
+
region=region,
|
|
1647
|
+
compute_stack=ComputeStack.K8S,
|
|
1648
|
+
object_storage=ObjectStorage(
|
|
1649
|
+
bucket_name=infrastructure.bucket_name, region=region
|
|
1650
|
+
),
|
|
1651
|
+
gcp_config=GCPConfig(project_id=infrastructure.project_id),
|
|
1652
|
+
kubernetes_config=OpenAPIKubernetesConfig(
|
|
1653
|
+
anyscale_operator_iam_identity=infrastructure.iam_role_arn,
|
|
1654
|
+
zones=zones,
|
|
1655
|
+
),
|
|
1656
|
+
)
|
|
1657
|
+
else:
|
|
1658
|
+
raise click.ClickException(f"Unsupported provider: {provider}")
|
|
1659
|
+
|
|
1660
|
+
# Create cloud resource using the API
|
|
1661
|
+
try:
|
|
1662
|
+
self._debug("Cloud deployment details:")
|
|
1663
|
+
self._debug(f" Provider: {cloud_deployment.provider}")
|
|
1664
|
+
self._debug(f" Region: {cloud_deployment.region}")
|
|
1665
|
+
self._debug(f" Compute Stack: {cloud_deployment.compute_stack}")
|
|
1666
|
+
self._debug(f" Bucket Name: {cloud_deployment.object_storage.bucket_name}")
|
|
1667
|
+
self._debug(
|
|
1668
|
+
f" IAM Identity: {cloud_deployment.kubernetes_config.anyscale_operator_iam_identity}"
|
|
1669
|
+
)
|
|
1670
|
+
|
|
1671
|
+
# Save cloud deployment to a temporary file and use create_cloud_resource
|
|
1672
|
+
self._debug("Saving cloud deployment to temporary file...")
|
|
1673
|
+
import tempfile
|
|
1674
|
+
|
|
1675
|
+
with tempfile.NamedTemporaryFile(
|
|
1676
|
+
mode="w", suffix=".yaml", delete=False
|
|
1677
|
+
) as temp_file:
|
|
1678
|
+
# Convert CloudDeployment to dict for YAML serialization
|
|
1679
|
+
deployment_dict = cloud_deployment.to_dict()
|
|
1680
|
+
yaml.dump(deployment_dict, temp_file, default_flow_style=False)
|
|
1681
|
+
temp_file_path = temp_file.name
|
|
1682
|
+
|
|
1683
|
+
try:
|
|
1684
|
+
self._debug(f"Created temporary spec file: {temp_file_path}")
|
|
1685
|
+
self._debug("Calling cloud_controller.create_cloud_resource...")
|
|
1686
|
+
|
|
1687
|
+
# Use cloud_controller's create_cloud_resource method which now returns the cloud_resource_id
|
|
1688
|
+
cloud_resource_id = self.cloud_controller.create_cloud_resource(
|
|
1689
|
+
cloud=None,
|
|
1690
|
+
cloud_id=cloud_id,
|
|
1691
|
+
spec_file=temp_file_path,
|
|
1692
|
+
skip_verification=True, # We will do verification in the _verify_installation method
|
|
1693
|
+
yes=True, # Skip confirmation prompts
|
|
1694
|
+
)
|
|
1695
|
+
finally:
|
|
1696
|
+
# Clean up the temporary file
|
|
1697
|
+
import os as os_module
|
|
1698
|
+
|
|
1699
|
+
try:
|
|
1700
|
+
os_module.unlink(temp_file_path)
|
|
1701
|
+
self._debug(f"Cleaned up temporary file: {temp_file_path}")
|
|
1702
|
+
except Exception as e: # noqa: BLE001
|
|
1703
|
+
self._debug(f"Failed to clean up temporary file: {e}")
|
|
1704
|
+
|
|
1705
|
+
self.log.info(
|
|
1706
|
+
f"Cloud resource created with ID: {cloud_resource_id}",
|
|
1707
|
+
block_label="Setup",
|
|
1708
|
+
)
|
|
1709
|
+
|
|
1710
|
+
return cloud_resource_id
|
|
1711
|
+
|
|
1712
|
+
except Exception as e: # noqa: BLE001
|
|
1713
|
+
self.log.error(f"Cloud resource creation failed with error: {e}")
|
|
1714
|
+
self.log.error(f"Error type: {type(e).__name__}")
|
|
1715
|
+
if hasattr(e, "response"):
|
|
1716
|
+
self.log.error(f"Response details: {getattr(e, 'response', 'N/A')}")
|
|
1717
|
+
if hasattr(e, "args"):
|
|
1718
|
+
self.log.error(f"Error args: {e.args}")
|
|
1719
|
+
import traceback
|
|
1720
|
+
|
|
1721
|
+
self.log.error(f"Full traceback: {traceback.format_exc()}")
|
|
1722
|
+
raise click.ClickException(f"Failed to create cloud resource: {e}")
|
|
1723
|
+
|
|
1724
|
+
def _handle_setup_failure(
|
|
1725
|
+
self,
|
|
1726
|
+
provider: str,
|
|
1727
|
+
infrastructure: Optional[InfrastructureResources],
|
|
1728
|
+
cloud_id: Optional[str],
|
|
1729
|
+
name: str,
|
|
1730
|
+
is_cloud_resource_setup: bool = False,
|
|
1731
|
+
) -> None:
|
|
1732
|
+
"""Handle setup failure by providing cleanup instructions to the user."""
|
|
1733
|
+
self.log.error("")
|
|
1734
|
+
self.log.error("=" * 80)
|
|
1735
|
+
self.log.error("SETUP FAILED - MANUAL CLEANUP REQUIRED")
|
|
1736
|
+
self.log.error("=" * 80)
|
|
1737
|
+
self.log.error("")
|
|
1738
|
+
|
|
1739
|
+
if is_cloud_resource_setup:
|
|
1740
|
+
self.log.error(
|
|
1741
|
+
"The Kubernetes cloud resource setup failed, leaving resources in an incomplete state."
|
|
1742
|
+
)
|
|
1743
|
+
else:
|
|
1744
|
+
self.log.error(
|
|
1745
|
+
"The Kubernetes cloud setup failed, leaving resources in an incomplete state."
|
|
1746
|
+
)
|
|
1747
|
+
|
|
1748
|
+
self.log.error(
|
|
1749
|
+
"You must manually clean up the following resources to avoid charges:"
|
|
1750
|
+
)
|
|
1751
|
+
self.log.error("")
|
|
1752
|
+
|
|
1753
|
+
if provider == "aws":
|
|
1754
|
+
self._log_aws_cleanup_instructions(
|
|
1755
|
+
infrastructure, cloud_id, name, is_cloud_resource_setup
|
|
1756
|
+
)
|
|
1757
|
+
elif provider == "gcp":
|
|
1758
|
+
self._log_gcp_cleanup_instructions(
|
|
1759
|
+
infrastructure, cloud_id, name, is_cloud_resource_setup
|
|
1760
|
+
)
|
|
1761
|
+
|
|
1762
|
+
self.log.error("")
|
|
1763
|
+
self.log.error("=" * 80)
|
|
1764
|
+
|
|
1765
|
+
def _log_aws_cleanup_instructions(
|
|
1766
|
+
self,
|
|
1767
|
+
infrastructure: Optional[InfrastructureResources],
|
|
1768
|
+
cloud_id: Optional[str],
|
|
1769
|
+
name: str,
|
|
1770
|
+
is_cloud_resource_setup: bool = False,
|
|
1771
|
+
) -> None:
|
|
1772
|
+
"""Log AWS-specific cleanup instructions."""
|
|
1773
|
+
self.log.error("AWS Resources to clean up:")
|
|
1774
|
+
self.log.error("")
|
|
1775
|
+
|
|
1776
|
+
if infrastructure:
|
|
1777
|
+
self.log.error("1. CloudFormation Stack:")
|
|
1778
|
+
# Stack name pattern: k8s-{name}-{random} with underscores replaced by hyphens
|
|
1779
|
+
stack_name_pattern = f"k8s-{name}-*".replace("_", "-").lower()
|
|
1780
|
+
self.log.error(
|
|
1781
|
+
f" - Find and delete the CloudFormation stack matching pattern: {stack_name_pattern}"
|
|
1782
|
+
)
|
|
1783
|
+
self.log.error(f" - Region: {infrastructure.region}")
|
|
1784
|
+
self.log.error(
|
|
1785
|
+
" - AWS Console: CloudFormation > Stacks > Select stack > Delete"
|
|
1786
|
+
)
|
|
1787
|
+
self.log.error(
|
|
1788
|
+
f" - AWS CLI: aws cloudformation delete-stack --stack-name <stack-name> --region {infrastructure.region}"
|
|
1789
|
+
)
|
|
1790
|
+
self.log.error("")
|
|
1791
|
+
self.log.error(" This will automatically delete:")
|
|
1792
|
+
self.log.error(f" - S3 Bucket: {infrastructure.bucket_name}")
|
|
1793
|
+
self.log.error(f" - IAM Role: {infrastructure.iam_role_arn}")
|
|
1794
|
+
self.log.error("")
|
|
1795
|
+
|
|
1796
|
+
if cloud_id:
|
|
1797
|
+
if is_cloud_resource_setup:
|
|
1798
|
+
self.log.error("2. Anyscale Cloud Resource:")
|
|
1799
|
+
self.log.error(
|
|
1800
|
+
f" - Delete the cloud resource from cloud '{name}' (ID: {cloud_id})"
|
|
1801
|
+
)
|
|
1802
|
+
self.log.error(
|
|
1803
|
+
f" - CLI: anyscale cloud resource delete --cloud '{name}' --resource <resource-name>"
|
|
1804
|
+
)
|
|
1805
|
+
self.log.error(
|
|
1806
|
+
" - To find the resource name, run: anyscale cloud get --name '{name}'"
|
|
1807
|
+
)
|
|
1808
|
+
self.log.error(
|
|
1809
|
+
f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
|
|
1810
|
+
)
|
|
1811
|
+
else:
|
|
1812
|
+
self.log.error("2. Anyscale Cloud Registration:")
|
|
1813
|
+
self.log.error(
|
|
1814
|
+
f" - Delete the cloud '{name}' (ID: {cloud_id}) from Anyscale"
|
|
1815
|
+
)
|
|
1816
|
+
self.log.error(f" - CLI: anyscale cloud delete --name '{name}'")
|
|
1817
|
+
self.log.error(
|
|
1818
|
+
f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
|
|
1819
|
+
)
|
|
1820
|
+
self.log.error("")
|
|
1821
|
+
|
|
1822
|
+
if not infrastructure:
|
|
1823
|
+
self.log.error(
|
|
1824
|
+
"No infrastructure resources were created before the failure."
|
|
1825
|
+
)
|
|
1826
|
+
self.log.error("")
|
|
1827
|
+
|
|
1828
|
+
def _log_gcp_cleanup_instructions(
|
|
1829
|
+
self,
|
|
1830
|
+
infrastructure: Optional[InfrastructureResources],
|
|
1831
|
+
cloud_id: Optional[str],
|
|
1832
|
+
name: str,
|
|
1833
|
+
is_cloud_resource_setup: bool = False,
|
|
1834
|
+
) -> None:
|
|
1835
|
+
"""Log GCP-specific cleanup instructions."""
|
|
1836
|
+
self.log.error("GCP Resources to clean up:")
|
|
1837
|
+
self.log.error("")
|
|
1838
|
+
|
|
1839
|
+
if infrastructure:
|
|
1840
|
+
self.log.error("1. GCS Bucket:")
|
|
1841
|
+
self.log.error(f" - Bucket: {infrastructure.bucket_name}")
|
|
1842
|
+
self.log.error(f" - Project: {infrastructure.project_id}")
|
|
1843
|
+
self.log.error(
|
|
1844
|
+
" - GCP Console: Cloud Storage > Buckets > Select bucket > Delete"
|
|
1845
|
+
)
|
|
1846
|
+
self.log.error(
|
|
1847
|
+
f" - gcloud CLI: gsutil rm -r gs://{infrastructure.bucket_name}"
|
|
1848
|
+
)
|
|
1849
|
+
self.log.error("")
|
|
1850
|
+
|
|
1851
|
+
self.log.error("2. Service Account:")
|
|
1852
|
+
self.log.error(f" - Service Account: {infrastructure.iam_role_arn}")
|
|
1853
|
+
self.log.error(f" - Project: {infrastructure.project_id}")
|
|
1854
|
+
self.log.error(
|
|
1855
|
+
" - GCP Console: IAM & Admin > Service Accounts > Select account > Delete"
|
|
1856
|
+
)
|
|
1857
|
+
self.log.error(
|
|
1858
|
+
f" - gcloud CLI: gcloud iam service-accounts delete {infrastructure.iam_role_arn} --project={infrastructure.project_id}"
|
|
1859
|
+
)
|
|
1860
|
+
self.log.error("")
|
|
1861
|
+
|
|
1862
|
+
if cloud_id:
|
|
1863
|
+
if is_cloud_resource_setup:
|
|
1864
|
+
self.log.error("3. Anyscale Cloud Resource:")
|
|
1865
|
+
self.log.error(
|
|
1866
|
+
f" - Delete the cloud resource from cloud '{name}' (ID: {cloud_id})"
|
|
1867
|
+
)
|
|
1868
|
+
self.log.error(
|
|
1869
|
+
f" - CLI: anyscale cloud resource delete --cloud '{name}' --resource <resource-name>"
|
|
1870
|
+
)
|
|
1871
|
+
self.log.error(
|
|
1872
|
+
" - To find the resource name, run: anyscale cloud get --name '{name}'"
|
|
1873
|
+
)
|
|
1874
|
+
self.log.error(
|
|
1875
|
+
f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
|
|
1876
|
+
)
|
|
1877
|
+
else:
|
|
1878
|
+
self.log.error("3. Anyscale Cloud Registration:")
|
|
1879
|
+
self.log.error(
|
|
1880
|
+
f" - Delete the cloud '{name}' (ID: {cloud_id}) from Anyscale"
|
|
1881
|
+
)
|
|
1882
|
+
self.log.error(f" - CLI: anyscale cloud delete --name '{name}'")
|
|
1883
|
+
self.log.error(
|
|
1884
|
+
f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
|
|
1885
|
+
)
|
|
1886
|
+
self.log.error("")
|
|
1887
|
+
|
|
1888
|
+
if not infrastructure:
|
|
1889
|
+
self.log.error(
|
|
1890
|
+
"No infrastructure resources were created before the failure."
|
|
1891
|
+
)
|
|
1892
|
+
self.log.error("")
|
|
1893
|
+
|
|
1468
1894
|
|
|
1469
1895
|
def setup_kubernetes_cloud( # noqa: PLR0913
|
|
1470
1896
|
provider: str,
|
|
@@ -1516,3 +1942,92 @@ def setup_kubernetes_cloud( # noqa: PLR0913
|
|
|
1516
1942
|
except Exception as e: # noqa: BLE001
|
|
1517
1943
|
click.echo(f"Setup failed: {e}", err=True)
|
|
1518
1944
|
raise click.Abort()
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
def setup_kubernetes_cloud_resource( # noqa: PLR0913
|
|
1948
|
+
provider: str,
|
|
1949
|
+
region: str,
|
|
1950
|
+
cloud_name: Optional[str],
|
|
1951
|
+
cloud_id: Optional[str],
|
|
1952
|
+
cluster_name: str,
|
|
1953
|
+
resource_name: Optional[str],
|
|
1954
|
+
namespace: str = "anyscale-operator",
|
|
1955
|
+
project_id: Optional[str] = None,
|
|
1956
|
+
functional_verify: bool = False,
|
|
1957
|
+
yes: bool = False,
|
|
1958
|
+
values_file: Optional[str] = None,
|
|
1959
|
+
debug: bool = False,
|
|
1960
|
+
operator_chart: Optional[str] = None,
|
|
1961
|
+
) -> None:
|
|
1962
|
+
"""
|
|
1963
|
+
Set up cloud resources for an existing Anyscale cloud on a Kubernetes cluster.
|
|
1964
|
+
|
|
1965
|
+
This function sets up infrastructure and installs the operator without
|
|
1966
|
+
registering a new cloud.
|
|
1967
|
+
|
|
1968
|
+
Args:
|
|
1969
|
+
provider: Cloud provider (aws, gcp)
|
|
1970
|
+
region: Cloud region
|
|
1971
|
+
cloud_name: Name of existing Anyscale cloud
|
|
1972
|
+
cloud_id: ID of existing Anyscale cloud
|
|
1973
|
+
cluster_name: Kubernetes cluster name
|
|
1974
|
+
resource_name: Name for the cloud resource (optional, will be auto-generated if not provided)
|
|
1975
|
+
namespace: Namespace for Anyscale operator (default: anyscale-operator)
|
|
1976
|
+
project_id: GCP project ID (optional, for GCP)
|
|
1977
|
+
functional_verify: Whether to run functional verification
|
|
1978
|
+
yes: Skip confirmation prompts
|
|
1979
|
+
values_file: Optional path for Helm values file
|
|
1980
|
+
debug: Enable debug logging
|
|
1981
|
+
operator_chart: Optional path to operator chart (skips helm repo add/update)
|
|
1982
|
+
"""
|
|
1983
|
+
cmd = KubernetesCloudSetupCommand(debug=debug)
|
|
1984
|
+
|
|
1985
|
+
# Preprocessing: Fetch full cloud info to ensure cloud exists and get both name and ID
|
|
1986
|
+
if not cloud_id and not cloud_name:
|
|
1987
|
+
click.echo("Either cloud_name or cloud_id must be provided", err=True)
|
|
1988
|
+
raise click.Abort()
|
|
1989
|
+
|
|
1990
|
+
if cloud_id and cloud_name:
|
|
1991
|
+
click.echo("Only one of cloud_name or cloud_id can be provided", err=True)
|
|
1992
|
+
raise click.Abort()
|
|
1993
|
+
|
|
1994
|
+
# Use get_cloud_id_and_name to validate cloud exists and get both ID and name
|
|
1995
|
+
try:
|
|
1996
|
+
from anyscale.cloud_utils import get_cloud_id_and_name
|
|
1997
|
+
|
|
1998
|
+
if cloud_id:
|
|
1999
|
+
cloud_id, cloud_name = get_cloud_id_and_name(
|
|
2000
|
+
cmd.cloud_controller.api_client, cloud_id=cloud_id
|
|
2001
|
+
)
|
|
2002
|
+
else:
|
|
2003
|
+
cloud_id, cloud_name = get_cloud_id_and_name(
|
|
2004
|
+
cmd.cloud_controller.api_client, cloud_name=cloud_name
|
|
2005
|
+
)
|
|
2006
|
+
|
|
2007
|
+
except Exception as e: # noqa: BLE001
|
|
2008
|
+
click.echo(f"Failed to fetch cloud information: {e}", err=True)
|
|
2009
|
+
raise click.Abort()
|
|
2010
|
+
|
|
2011
|
+
if not cloud_id or not cloud_name:
|
|
2012
|
+
click.echo("Could not find cloud with provided name or ID", err=True)
|
|
2013
|
+
raise click.Abort()
|
|
2014
|
+
|
|
2015
|
+
try:
|
|
2016
|
+
# Use the unified run method with cloud_id to indicate resource-only mode
|
|
2017
|
+
cmd.run(
|
|
2018
|
+
provider=provider,
|
|
2019
|
+
region=region,
|
|
2020
|
+
name=cloud_name,
|
|
2021
|
+
cluster_name=cluster_name,
|
|
2022
|
+
namespace=namespace,
|
|
2023
|
+
project_id=project_id,
|
|
2024
|
+
functional_verify=functional_verify,
|
|
2025
|
+
yes=yes,
|
|
2026
|
+
values_file=values_file,
|
|
2027
|
+
operator_chart=operator_chart,
|
|
2028
|
+
cloud_id=cloud_id,
|
|
2029
|
+
resource_name=resource_name,
|
|
2030
|
+
)
|
|
2031
|
+
except Exception as e: # noqa: BLE001
|
|
2032
|
+
click.echo(f"Setup failed: {e}", err=True)
|
|
2033
|
+
raise click.Abort()
|