anyscale 0.26.69__py3-none-any.whl → 0.26.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. anyscale/_private/anyscale_client/anyscale_client.py +67 -1
  2. anyscale/_private/anyscale_client/common.py +20 -1
  3. anyscale/_private/anyscale_client/fake_anyscale_client.py +77 -10
  4. anyscale/client/README.md +14 -4
  5. anyscale/client/openapi_client/__init__.py +11 -4
  6. anyscale/client/openapi_client/api/default_api.py +462 -23
  7. anyscale/client/openapi_client/models/__init__.py +11 -4
  8. anyscale/client/openapi_client/models/api_key_info.py +29 -3
  9. anyscale/client/openapi_client/models/apply_autoscaling_config_update_model.py +350 -0
  10. anyscale/client/openapi_client/models/apply_production_service_multi_version_v2_model.py +207 -0
  11. anyscale/client/openapi_client/models/apply_production_service_v2_model.py +31 -3
  12. anyscale/client/openapi_client/models/baseimagesenum.py +70 -1
  13. anyscale/client/openapi_client/models/cloud_data_bucket_file_type.py +2 -1
  14. anyscale/client/openapi_client/models/{oauthconnectionresponse_response.py → clouddeployment_response.py} +11 -11
  15. anyscale/client/openapi_client/models/create_experimental_workspace.py +29 -1
  16. anyscale/client/openapi_client/models/create_workspace_from_template.py +29 -1
  17. anyscale/client/openapi_client/models/create_workspace_template_version.py +31 -3
  18. anyscale/client/openapi_client/models/decorated_list_service_api_model.py +58 -1
  19. anyscale/client/openapi_client/models/decorated_production_service_v2_api_model.py +60 -3
  20. anyscale/client/openapi_client/models/decorated_service_event_api_model.py +3 -3
  21. anyscale/client/openapi_client/models/describe_machine_pool_machines_filters.py +33 -5
  22. anyscale/client/openapi_client/models/describe_machine_pool_workloads_filters.py +33 -5
  23. anyscale/client/openapi_client/models/{service_event_level.py → entity_type.py} +9 -9
  24. anyscale/client/openapi_client/models/event_level.py +2 -1
  25. anyscale/client/openapi_client/models/job_event_fields.py +206 -0
  26. anyscale/client/openapi_client/models/machine_type_partition_filter.py +152 -0
  27. anyscale/client/openapi_client/models/partition_info.py +30 -1
  28. anyscale/client/openapi_client/models/production_job_event.py +3 -3
  29. anyscale/client/openapi_client/models/rollout_strategy.py +2 -1
  30. anyscale/client/openapi_client/models/service_event_fields.py +318 -0
  31. anyscale/client/openapi_client/models/supportedbaseimagesenum.py +70 -1
  32. anyscale/client/openapi_client/models/task_summary_config.py +29 -3
  33. anyscale/client/openapi_client/models/task_table_config.py +29 -3
  34. anyscale/client/openapi_client/models/unified_event.py +377 -0
  35. anyscale/client/openapi_client/models/{ha_job_event_level.py → unified_origin_filter.py} +21 -9
  36. anyscale/client/openapi_client/models/unifiedevent_list_response.py +147 -0
  37. anyscale/client/openapi_client/models/workspace_event_fields.py +122 -0
  38. anyscale/client/openapi_client/models/workspace_template_version.py +30 -1
  39. anyscale/client/openapi_client/models/workspace_template_version_data_object.py +30 -1
  40. anyscale/cloud/models.py +2 -2
  41. anyscale/commands/cloud_commands.py +133 -2
  42. anyscale/commands/job_commands.py +1 -1
  43. anyscale/commands/service_commands.py +130 -67
  44. anyscale/commands/setup_k8s.py +546 -31
  45. anyscale/controllers/cloud_controller.py +15 -2
  46. anyscale/controllers/kubernetes_verifier.py +80 -66
  47. anyscale/job/_private/job_sdk.py +47 -1
  48. anyscale/job/commands.py +3 -0
  49. anyscale/sdk/anyscale_client/models/apply_production_service_v2_model.py +31 -3
  50. anyscale/sdk/anyscale_client/models/apply_service_model.py +31 -3
  51. anyscale/sdk/anyscale_client/models/baseimagesenum.py +70 -1
  52. anyscale/sdk/anyscale_client/models/rollout_strategy.py +2 -1
  53. anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +70 -1
  54. anyscale/service/__init__.py +11 -3
  55. anyscale/service/_private/service_sdk.py +361 -35
  56. anyscale/service/commands.py +15 -3
  57. anyscale/service/models.py +12 -0
  58. anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
  59. anyscale/version.py +1 -1
  60. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/METADATA +1 -1
  61. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/RECORD +66 -59
  62. anyscale/client/openapi_client/models/o_auth_connection_response.py +0 -229
  63. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/WHEEL +0 -0
  64. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/entry_points.txt +0 -0
  65. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/licenses/LICENSE +0 -0
  66. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/licenses/NOTICE +0 -0
  67. {anyscale-0.26.69.dist-info → anyscale-0.26.70.dist-info}/top_level.txt +0 -0
@@ -76,14 +76,20 @@ class KubernetesCloudSetupCommand:
76
76
  yes: bool,
77
77
  values_file: Optional[str] = None,
78
78
  operator_chart: Optional[str] = None,
79
+ cloud_id: Optional[str] = None,
80
+ resource_name: Optional[str] = None,
79
81
  ) -> None:
80
82
  """
81
83
  Main entry point for Kubernetes cloud setup.
82
84
 
85
+ This method handles both:
86
+ 1. Creating a new cloud (when cloud_id is None)
87
+ 2. Adding a resource to an existing cloud (when cloud_id is provided)
88
+
83
89
  Args:
84
90
  provider: Cloud provider (aws, gcp)
85
91
  region: AWS/GCP region
86
- name: Name for the Anyscale cloud
92
+ name: Name for the Anyscale cloud. If cloud_id is not provided, this will be used to create a new cloud.
87
93
  cluster_name: Kubernetes cluster name/context
88
94
  namespace: Namespace for the Anyscale operator
89
95
  project_id: GCP project ID (required for GCP)
@@ -91,16 +97,37 @@ class KubernetesCloudSetupCommand:
91
97
  yes: Skip confirmation prompts
92
98
  values_file: Optional custom path for Helm values file
93
99
  operator_chart: Optional path to operator chart (skips helm repo add/update)
100
+ cloud_id: Optional cloud ID for the Anyscale cloud to add the resource to
101
+ resource_name: Optional name for the cloud resource (will be auto-generated if not provided)
94
102
  """
95
- self.log.open_block(
96
- "Setup", f"Setting up Kubernetes cloud '{name}' on {provider.upper()}"
97
- )
103
+ # Determine if we're creating a new cloud or adding to existing
104
+ create_cloud = cloud_id is None
105
+
106
+ # Validate cloud_id is provided when adding to existing cloud
107
+ if not create_cloud:
108
+ assert (
109
+ cloud_id
110
+ ), "cloud_id is required when adding a resource to an existing cloud"
111
+
112
+ # Set up logging message based on mode
113
+ if create_cloud:
114
+ setup_message = (
115
+ f"Setting up Kubernetes cloud '{name}' on {provider.upper()}"
116
+ )
117
+ else:
118
+ setup_message = f"Setting up Kubernetes cloud resource for '{name}' on {provider.upper()}"
119
+
120
+ self.log.open_block("Setup", setup_message)
98
121
 
99
122
  # Set confirmation flag
100
123
  self.skip_confirmation = yes
101
124
 
125
+ # Track what resources were created for cleanup messaging
126
+ infrastructure = None
127
+ cluster_info = None
128
+ cloud_resource_id = None
129
+
102
130
  try:
103
- # Step 0: Check required CLI tools are installed
104
131
  self._check_required_tools(provider)
105
132
 
106
133
  # Step 1: Prompt for namespace BEFORE infrastructure setup
@@ -119,30 +146,74 @@ class KubernetesCloudSetupCommand:
119
146
  provider, region, name, cluster_info
120
147
  )
121
148
 
122
- # Step 4: Register cloud with Anyscale
123
- cloud_id = self._register_cloud(
124
- name, provider, region, infrastructure, cluster_info
125
- )
149
+ # Step 4: Register cloud OR create cloud resource
150
+ if create_cloud:
151
+ # Register new cloud with Anyscale
152
+ cloud_id = self._register_cloud(
153
+ name, provider, region, infrastructure, cluster_info
154
+ )
155
+
156
+ # Get the cloud resource ID from the newly registered cloud
157
+ cloud_resources = self.cloud_controller.get_decorated_cloud_resources(
158
+ cloud_id
159
+ )
160
+ if not cloud_resources:
161
+ raise click.ClickException(
162
+ "No cloud resources found after registration"
163
+ )
164
+ cloud_resource_id = cloud_resources[0].cloud_resource_id
165
+ else:
166
+ # Should have been validated earlier, but just in case
167
+ assert (
168
+ cloud_id
169
+ ), "cloud_id is required when adding a resource to an existing cloud"
170
+
171
+ # Create cloud resource in existing cloud
172
+ cloud_resource_id = self._create_cloud_resource(
173
+ cloud_id,
174
+ provider,
175
+ region,
176
+ infrastructure,
177
+ cluster_info,
178
+ resource_name,
179
+ )
126
180
 
127
181
  # Step 5: Install Anyscale operator
128
182
  self._install_operator(
129
- cloud_id,
183
+ cloud_resource_id,
130
184
  provider,
131
185
  region,
132
186
  final_namespace,
133
187
  infrastructure,
134
188
  values_file,
135
189
  operator_chart,
190
+ skip_confirmation=yes,
136
191
  )
137
192
 
138
193
  # Step 6: Verify installation
139
194
  if functional_verify:
140
- self._verify_installation(cloud_id, final_namespace, cluster_info)
195
+ self._verify_installation(
196
+ cloud_id, final_namespace, cluster_info, cloud_resource_id
197
+ )
141
198
 
142
199
  self.log.close_block("Setup")
143
- self.log.info(f"Kubernetes cloud '{name}' setup completed successfully!")
200
+ if create_cloud:
201
+ self.log.info(
202
+ f"Kubernetes cloud '{name}' setup completed successfully!"
203
+ )
204
+ else:
205
+ self.log.info(
206
+ f"Kubernetes cloud resource setup for '{name}' completed successfully!"
207
+ )
144
208
  except Exception: # noqa: BLE001
145
209
  self.log.close_block("Setup")
210
+ self._handle_setup_failure(
211
+ provider,
212
+ infrastructure,
213
+ cloud_id,
214
+ name,
215
+ is_cloud_resource_setup=not create_cloud,
216
+ )
146
217
  raise
147
218
 
148
219
  def _debug(self, *msg: str) -> None:
@@ -1131,14 +1202,16 @@ class KubernetesCloudSetupCommand:
1131
1202
  self.cloud_controller.log.info = original_log_info
1132
1203
 
1133
1204
  self._debug("Cloud registration completed, fetching cloud ID...")
1134
- clouds = (
1135
- self.cloud_controller.api_client.list_clouds_api_v2_clouds_get().results
1136
- )
1137
- cloud = next((c for c in clouds if c.name == name), None)
1138
- if not cloud:
1139
- raise click.ClickException("Failed to find registered cloud")
1205
+ # Use get_cloud_id_and_name helper to fetch the registered cloud
1206
+ from anyscale.cloud_utils import get_cloud_id_and_name
1207
+
1208
+ try:
1209
+ cloud_id, _ = get_cloud_id_and_name(
1210
+ self.cloud_controller.api_client, cloud_name=name
1211
+ )
1212
+ except Exception as e: # noqa: BLE001
1213
+ raise click.ClickException(f"Failed to find registered cloud: {e}")
1140
1214
 
1141
- cloud_id = getattr(cloud, "id", None) or getattr(cloud, "cloud_id", None)
1142
1215
  if not cloud_id:
1143
1216
  raise click.ClickException(
1144
1217
  "Failed to get cloud ID from registered cloud"
@@ -1162,27 +1235,23 @@ class KubernetesCloudSetupCommand:
1162
1235
 
1163
1236
  def _install_operator( # noqa: PLR0913
1164
1237
  self,
1165
- cloud_id: str,
1238
+ cloud_resource_id: str,
1166
1239
  provider: str,
1167
1240
  region: str,
1168
1241
  namespace: str,
1169
1242
  infrastructure: InfrastructureResources,
1170
1243
  values_file: Optional[str] = None,
1171
1244
  operator_chart: Optional[str] = None,
1245
+ skip_confirmation: bool = False,
1172
1246
  ) -> None:
1173
1247
  """Install the Anyscale operator using Helm."""
1174
1248
  self.log.info("Installing Anyscale operator...", block_label="Setup")
1175
1249
 
1176
- # Get cloud resources to get the cloud resource ID
1177
- cloud_resources = self.cloud_controller.get_decorated_cloud_resources(cloud_id)
1178
-
1179
- if not cloud_resources:
1180
- raise click.ClickException("No cloud resources found")
1181
-
1182
- cloud_resource_id = cloud_resources[0].cloud_resource_id
1183
-
1184
1250
  release_name = "anyscale-operator"
1185
1251
 
1252
+ # Prompt user about nginx ingress installation
1253
+ install_nginx = self._prompt_for_nginx_ingress(skip_confirmation)
1254
+
1186
1255
  # Generate Helm command and extract --set-string flags from it
1187
1256
  self._debug("Generating Helm command to extract parameters...")
1188
1257
  helm_command = self.cloud_controller._generate_helm_upgrade_command( # noqa: SLF001
@@ -1203,6 +1272,7 @@ class KubernetesCloudSetupCommand:
1203
1272
  infrastructure=infrastructure,
1204
1273
  custom_path=values_file,
1205
1274
  additional_values=set_string_values,
1275
+ install_nginx_ingress=install_nginx,
1206
1276
  )
1207
1277
 
1208
1278
  # Determine chart reference based on operator_chart parameter
@@ -1352,6 +1422,36 @@ class KubernetesCloudSetupCommand:
1352
1422
 
1353
1423
  return final_namespace
1354
1424
 
1425
+ def _prompt_for_nginx_ingress(self, skip_confirmation: bool = False) -> bool:
1426
+ """Prompt user whether to install nginx ingress subchart."""
1427
+ if skip_confirmation:
1428
+ self.log.info("Using default: nginx ingress subchart will be installed")
1429
+ return True
1430
+
1431
+ self.log.info(
1432
+ "The Anyscale operator can install an nginx ingress controller as part of the setup.",
1433
+ block_label="Setup",
1434
+ )
1435
+ self.log.info(
1436
+ "If you already have an ingress controller installed, you may want to skip this.",
1437
+ block_label="Setup",
1438
+ )
1439
+
1440
+ response = click.confirm(
1441
+ "Do you want to install the nginx ingress subchart?", default=True
1442
+ )
1443
+
1444
+ if response:
1445
+ self.log.info(
1446
+ "nginx ingress subchart will be installed", block_label="Setup"
1447
+ )
1448
+ else:
1449
+ self.log.info(
1450
+ "nginx ingress subchart will NOT be installed", block_label="Setup"
1451
+ )
1452
+
1453
+ return response
1454
+
1355
1455
  def _generate_helm_values_file( # noqa: PLR0913
1356
1456
  self,
1357
1457
  provider: str,
@@ -1361,6 +1461,7 @@ class KubernetesCloudSetupCommand:
1361
1461
  infrastructure: InfrastructureResources,
1362
1462
  custom_path: Optional[str] = None,
1363
1463
  additional_values: Optional[Dict[str, str]] = None,
1464
+ install_nginx_ingress: bool = True,
1364
1465
  ) -> str:
1365
1466
  """Generate Helm values file and save it locally."""
1366
1467
  self.log.info("Generating Helm values file...")
@@ -1380,7 +1481,7 @@ class KubernetesCloudSetupCommand:
1380
1481
  self._set_nested_value(
1381
1482
  values, "global.auth.iamIdentity", infrastructure.iam_role_arn
1382
1483
  )
1383
- self._set_nested_value(values, "ingress-nginx.enabled", True)
1484
+ self._set_nested_value(values, "ingress-nginx.enabled", install_nginx_ingress)
1384
1485
 
1385
1486
  # Add region for AWS only (using global.aws.region)
1386
1487
  # Region field is deprecated for other providers
@@ -1432,7 +1533,11 @@ class KubernetesCloudSetupCommand:
1432
1533
  )
1433
1534
 
1434
1535
  def _verify_installation(
1435
- self, cloud_id: str, namespace: str, cluster_info: ClusterInfo
1536
+ self,
1537
+ cloud_id: str,
1538
+ namespace: str,
1539
+ cluster_info: ClusterInfo,
1540
+ cloud_resource_id: Optional[str] = None,
1436
1541
  ) -> None:
1437
1542
  """Verify the Kubernetes installation."""
1438
1543
  self.log.info("Verifying installation...")
@@ -1443,7 +1548,20 @@ class KubernetesCloudSetupCommand:
1443
1548
  if not cloud_resources:
1444
1549
  raise click.ClickException("No cloud resources found for verification")
1445
1550
 
1446
- cloud_deployment = cloud_resources[0]
1551
+ # Find the specific cloud resource if cloud_resource_id is provided
1552
+ cloud_deployment = None
1553
+ if cloud_resource_id:
1554
+ for resource in cloud_resources:
1555
+ if resource.cloud_resource_id == cloud_resource_id:
1556
+ cloud_deployment = resource
1557
+ break
1558
+ if not cloud_deployment:
1559
+ raise click.ClickException(
1560
+ f"Could not find cloud resource with ID {cloud_resource_id}"
1561
+ )
1562
+ else:
1563
+ # Fallback to first resource for backward compatibility
1564
+ cloud_deployment = cloud_resources[0]
1447
1565
 
1448
1566
  # Use the existing Kubernetes verifier
1449
1567
  verifier = KubernetesCloudDeploymentVerifier(
@@ -1465,6 +1583,314 @@ class KubernetesCloudSetupCommand:
1465
1583
  self.log.error("Verification failed - please check the logs above")
1466
1584
  raise click.ClickException("Installation verification failed")
1467
1585
 
1586
+ def _create_cloud_resource( # noqa: PLR0912
1587
+ self,
1588
+ cloud_id: str,
1589
+ provider: str,
1590
+ region: str,
1591
+ infrastructure: InfrastructureResources,
1592
+ cluster_info: ClusterInfo,
1593
+ resource_name: Optional[str],
1594
+ ) -> str:
1595
+ """
1596
+ Create a cloud resource in an existing cloud and return the cloud_resource_id.
1597
+
1598
+ Args:
1599
+ cloud_id: ID of the existing cloud
1600
+ provider: Cloud provider (aws, gcp)
1601
+ region: Cloud region
1602
+ infrastructure: Infrastructure resources created
1603
+ cluster_info: Cluster information
1604
+ resource_name: Name for the cloud resource (optional, will be auto-generated if not provided)
1605
+
1606
+ Returns:
1607
+ The cloud_resource_id of the created resource
1608
+ """
1609
+ self.log.info("Creating cloud resource in Anyscale...", block_label="Setup")
1610
+ if resource_name:
1611
+ self.log.info(f"Using resource name: {resource_name}", block_label="Setup")
1612
+ else:
1613
+ self.log.info("Resource name will be auto-generated", block_label="Setup")
1614
+
1615
+ if provider == "aws":
1616
+ # Dynamically determine availability zones from the EKS cluster
1617
+ zones = self._get_eks_availability_zones(cluster_info.cluster_name, region)
1618
+
1619
+ cloud_deployment = CloudDeployment(
1620
+ name=resource_name,
1621
+ provider=CloudProviders.AWS,
1622
+ region=region,
1623
+ compute_stack=ComputeStack.K8S,
1624
+ object_storage=ObjectStorage(
1625
+ bucket_name=infrastructure.bucket_name, region=region
1626
+ ),
1627
+ aws_config=AWSConfig(),
1628
+ kubernetes_config=OpenAPIKubernetesConfig(
1629
+ anyscale_operator_iam_identity=infrastructure.iam_role_arn,
1630
+ zones=zones,
1631
+ ),
1632
+ )
1633
+ elif provider == "gcp":
1634
+ assert infrastructure.project_id, "Project ID is required for GCP"
1635
+
1636
+ from anyscale.client.openapi_client.models import GCPConfig
1637
+
1638
+ # Dynamically determine zones from the GKE cluster
1639
+ zones = self._get_gke_zones(
1640
+ cluster_info.cluster_name, region, infrastructure.project_id
1641
+ )
1642
+
1643
+ cloud_deployment = CloudDeployment(
1644
+ name=resource_name,
1645
+ provider=CloudProviders.GCP,
1646
+ region=region,
1647
+ compute_stack=ComputeStack.K8S,
1648
+ object_storage=ObjectStorage(
1649
+ bucket_name=infrastructure.bucket_name, region=region
1650
+ ),
1651
+ gcp_config=GCPConfig(project_id=infrastructure.project_id),
1652
+ kubernetes_config=OpenAPIKubernetesConfig(
1653
+ anyscale_operator_iam_identity=infrastructure.iam_role_arn,
1654
+ zones=zones,
1655
+ ),
1656
+ )
1657
+ else:
1658
+ raise click.ClickException(f"Unsupported provider: {provider}")
1659
+
1660
+ # Create cloud resource using the API
1661
+ try:
1662
+ self._debug("Cloud deployment details:")
1663
+ self._debug(f" Provider: {cloud_deployment.provider}")
1664
+ self._debug(f" Region: {cloud_deployment.region}")
1665
+ self._debug(f" Compute Stack: {cloud_deployment.compute_stack}")
1666
+ self._debug(f" Bucket Name: {cloud_deployment.object_storage.bucket_name}")
1667
+ self._debug(
1668
+ f" IAM Identity: {cloud_deployment.kubernetes_config.anyscale_operator_iam_identity}"
1669
+ )
1670
+
1671
+ # Save cloud deployment to a temporary file and use create_cloud_resource
1672
+ self._debug("Saving cloud deployment to temporary file...")
1673
+ import tempfile
1674
+
1675
+ with tempfile.NamedTemporaryFile(
1676
+ mode="w", suffix=".yaml", delete=False
1677
+ ) as temp_file:
1678
+ # Convert CloudDeployment to dict for YAML serialization
1679
+ deployment_dict = cloud_deployment.to_dict()
1680
+ yaml.dump(deployment_dict, temp_file, default_flow_style=False)
1681
+ temp_file_path = temp_file.name
1682
+
1683
+ try:
1684
+ self._debug(f"Created temporary spec file: {temp_file_path}")
1685
+ self._debug("Calling cloud_controller.create_cloud_resource...")
1686
+
1687
+ # Use cloud_controller's create_cloud_resource method which now returns the cloud_resource_id
1688
+ cloud_resource_id = self.cloud_controller.create_cloud_resource(
1689
+ cloud=None,
1690
+ cloud_id=cloud_id,
1691
+ spec_file=temp_file_path,
1692
+ skip_verification=True, # We will do verification in the _verify_installation method
1693
+ yes=True, # Skip confirmation prompts
1694
+ )
1695
+ finally:
1696
+ # Clean up the temporary file
1697
+ import os as os_module
1698
+
1699
+ try:
1700
+ os_module.unlink(temp_file_path)
1701
+ self._debug(f"Cleaned up temporary file: {temp_file_path}")
1702
+ except Exception as e: # noqa: BLE001
1703
+ self._debug(f"Failed to clean up temporary file: {e}")
1704
+
1705
+ self.log.info(
1706
+ f"Cloud resource created with ID: {cloud_resource_id}",
1707
+ block_label="Setup",
1708
+ )
1709
+
1710
+ return cloud_resource_id
1711
+
1712
+ except Exception as e: # noqa: BLE001
1713
+ self.log.error(f"Cloud resource creation failed with error: {e}")
1714
+ self.log.error(f"Error type: {type(e).__name__}")
1715
+ if hasattr(e, "response"):
1716
+ self.log.error(f"Response details: {getattr(e, 'response', 'N/A')}")
1717
+ if hasattr(e, "args"):
1718
+ self.log.error(f"Error args: {e.args}")
1719
+ import traceback
1720
+
1721
+ self.log.error(f"Full traceback: {traceback.format_exc()}")
1722
+ raise click.ClickException(f"Failed to create cloud resource: {e}")
1723
+
1724
+ def _handle_setup_failure(
1725
+ self,
1726
+ provider: str,
1727
+ infrastructure: Optional[InfrastructureResources],
1728
+ cloud_id: Optional[str],
1729
+ name: str,
1730
+ is_cloud_resource_setup: bool = False,
1731
+ ) -> None:
1732
+ """Handle setup failure by providing cleanup instructions to the user."""
1733
+ self.log.error("")
1734
+ self.log.error("=" * 80)
1735
+ self.log.error("SETUP FAILED - MANUAL CLEANUP REQUIRED")
1736
+ self.log.error("=" * 80)
1737
+ self.log.error("")
1738
+
1739
+ if is_cloud_resource_setup:
1740
+ self.log.error(
1741
+ "The Kubernetes cloud resource setup failed, leaving resources in an incomplete state."
1742
+ )
1743
+ else:
1744
+ self.log.error(
1745
+ "The Kubernetes cloud setup failed, leaving resources in an incomplete state."
1746
+ )
1747
+
1748
+ self.log.error(
1749
+ "You must manually clean up the following resources to avoid charges:"
1750
+ )
1751
+ self.log.error("")
1752
+
1753
+ if provider == "aws":
1754
+ self._log_aws_cleanup_instructions(
1755
+ infrastructure, cloud_id, name, is_cloud_resource_setup
1756
+ )
1757
+ elif provider == "gcp":
1758
+ self._log_gcp_cleanup_instructions(
1759
+ infrastructure, cloud_id, name, is_cloud_resource_setup
1760
+ )
1761
+
1762
+ self.log.error("")
1763
+ self.log.error("=" * 80)
1764
+
1765
+ def _log_aws_cleanup_instructions(
1766
+ self,
1767
+ infrastructure: Optional[InfrastructureResources],
1768
+ cloud_id: Optional[str],
1769
+ name: str,
1770
+ is_cloud_resource_setup: bool = False,
1771
+ ) -> None:
1772
+ """Log AWS-specific cleanup instructions."""
1773
+ self.log.error("AWS Resources to clean up:")
1774
+ self.log.error("")
1775
+
1776
+ if infrastructure:
1777
+ self.log.error("1. CloudFormation Stack:")
1778
+ # Stack name pattern: k8s-{name}-{random} with underscores replaced by hyphens
1779
+ stack_name_pattern = f"k8s-{name}-*".replace("_", "-").lower()
1780
+ self.log.error(
1781
+ f" - Find and delete the CloudFormation stack matching pattern: {stack_name_pattern}"
1782
+ )
1783
+ self.log.error(f" - Region: {infrastructure.region}")
1784
+ self.log.error(
1785
+ " - AWS Console: CloudFormation > Stacks > Select stack > Delete"
1786
+ )
1787
+ self.log.error(
1788
+ f" - AWS CLI: aws cloudformation delete-stack --stack-name <stack-name> --region {infrastructure.region}"
1789
+ )
1790
+ self.log.error("")
1791
+ self.log.error(" This will automatically delete:")
1792
+ self.log.error(f" - S3 Bucket: {infrastructure.bucket_name}")
1793
+ self.log.error(f" - IAM Role: {infrastructure.iam_role_arn}")
1794
+ self.log.error("")
1795
+
1796
+ if cloud_id:
1797
+ if is_cloud_resource_setup:
1798
+ self.log.error("2. Anyscale Cloud Resource:")
1799
+ self.log.error(
1800
+ f" - Delete the cloud resource from cloud '{name}' (ID: {cloud_id})"
1801
+ )
1802
+ self.log.error(
1803
+ f" - CLI: anyscale cloud resource delete --cloud '{name}' --resource <resource-name>"
1804
+ )
1805
+ self.log.error(
1806
+ " - To find the resource name, run: anyscale cloud get --name '{name}'"
1807
+ )
1808
+ self.log.error(
1809
+ f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
1810
+ )
1811
+ else:
1812
+ self.log.error("2. Anyscale Cloud Registration:")
1813
+ self.log.error(
1814
+ f" - Delete the cloud '{name}' (ID: {cloud_id}) from Anyscale"
1815
+ )
1816
+ self.log.error(f" - CLI: anyscale cloud delete --name '{name}'")
1817
+ self.log.error(
1818
+ f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
1819
+ )
1820
+ self.log.error("")
1821
+
1822
+ if not infrastructure:
1823
+ self.log.error(
1824
+ "No infrastructure resources were created before the failure."
1825
+ )
1826
+ self.log.error("")
1827
+
1828
+ def _log_gcp_cleanup_instructions(
1829
+ self,
1830
+ infrastructure: Optional[InfrastructureResources],
1831
+ cloud_id: Optional[str],
1832
+ name: str,
1833
+ is_cloud_resource_setup: bool = False,
1834
+ ) -> None:
1835
+ """Log GCP-specific cleanup instructions."""
1836
+ self.log.error("GCP Resources to clean up:")
1837
+ self.log.error("")
1838
+
1839
+ if infrastructure:
1840
+ self.log.error("1. GCS Bucket:")
1841
+ self.log.error(f" - Bucket: {infrastructure.bucket_name}")
1842
+ self.log.error(f" - Project: {infrastructure.project_id}")
1843
+ self.log.error(
1844
+ " - GCP Console: Cloud Storage > Buckets > Select bucket > Delete"
1845
+ )
1846
+ self.log.error(
1847
+ f" - gcloud CLI: gsutil rm -r gs://{infrastructure.bucket_name}"
1848
+ )
1849
+ self.log.error("")
1850
+
1851
+ self.log.error("2. Service Account:")
1852
+ self.log.error(f" - Service Account: {infrastructure.iam_role_arn}")
1853
+ self.log.error(f" - Project: {infrastructure.project_id}")
1854
+ self.log.error(
1855
+ " - GCP Console: IAM & Admin > Service Accounts > Select account > Delete"
1856
+ )
1857
+ self.log.error(
1858
+ f" - gcloud CLI: gcloud iam service-accounts delete {infrastructure.iam_role_arn} --project={infrastructure.project_id}"
1859
+ )
1860
+ self.log.error("")
1861
+
1862
+ if cloud_id:
1863
+ if is_cloud_resource_setup:
1864
+ self.log.error("3. Anyscale Cloud Resource:")
1865
+ self.log.error(
1866
+ f" - Delete the cloud resource from cloud '{name}' (ID: {cloud_id})"
1867
+ )
1868
+ self.log.error(
1869
+ f" - CLI: anyscale cloud resource delete --cloud '{name}' --resource <resource-name>"
1870
+ )
1871
+ self.log.error(
1872
+ " - To find the resource name, run: anyscale cloud get --name '{name}'"
1873
+ )
1874
+ self.log.error(
1875
+ f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
1876
+ )
1877
+ else:
1878
+ self.log.error("3. Anyscale Cloud Registration:")
1879
+ self.log.error(
1880
+ f" - Delete the cloud '{name}' (ID: {cloud_id}) from Anyscale"
1881
+ )
1882
+ self.log.error(f" - CLI: anyscale cloud delete --name '{name}'")
1883
+ self.log.error(
1884
+ f" - Console: {ANYSCALE_HOST}/clouds (if using custom host)"
1885
+ )
1886
+ self.log.error("")
1887
+
1888
+ if not infrastructure:
1889
+ self.log.error(
1890
+ "No infrastructure resources were created before the failure."
1891
+ )
1892
+ self.log.error("")
1893
+
1468
1894
 
1469
1895
  def setup_kubernetes_cloud( # noqa: PLR0913
1470
1896
  provider: str,
@@ -1516,3 +1942,92 @@ def setup_kubernetes_cloud( # noqa: PLR0913
1516
1942
  except Exception as e: # noqa: BLE001
1517
1943
  click.echo(f"Setup failed: {e}", err=True)
1518
1944
  raise click.Abort()
1945
+
1946
+
1947
+ def setup_kubernetes_cloud_resource( # noqa: PLR0913
1948
+ provider: str,
1949
+ region: str,
1950
+ cloud_name: Optional[str],
1951
+ cloud_id: Optional[str],
1952
+ cluster_name: str,
1953
+ resource_name: Optional[str],
1954
+ namespace: str = "anyscale-operator",
1955
+ project_id: Optional[str] = None,
1956
+ functional_verify: bool = False,
1957
+ yes: bool = False,
1958
+ values_file: Optional[str] = None,
1959
+ debug: bool = False,
1960
+ operator_chart: Optional[str] = None,
1961
+ ) -> None:
1962
+ """
1963
+ Set up cloud resources for an existing Anyscale cloud on a Kubernetes cluster.
1964
+
1965
+ This function sets up infrastructure and installs the operator without
1966
+ registering a new cloud.
1967
+
1968
+ Args:
1969
+ provider: Cloud provider (aws, gcp)
1970
+ region: Cloud region
1971
+ cloud_name: Name of existing Anyscale cloud
1972
+ cloud_id: ID of existing Anyscale cloud
1973
+ cluster_name: Kubernetes cluster name
1974
+ resource_name: Name for the cloud resource (optional, will be auto-generated if not provided)
1975
+ namespace: Namespace for Anyscale operator (default: anyscale-operator)
1976
+ project_id: GCP project ID (optional, for GCP)
1977
+ functional_verify: Whether to run functional verification
1978
+ yes: Skip confirmation prompts
1979
+ values_file: Optional path for Helm values file
1980
+ debug: Enable debug logging
1981
+ operator_chart: Optional path to operator chart (skips helm repo add/update)
1982
+ """
1983
+ cmd = KubernetesCloudSetupCommand(debug=debug)
1984
+
1985
+ # Preprocessing: Fetch full cloud info to ensure cloud exists and get both name and ID
1986
+ if not cloud_id and not cloud_name:
1987
+ click.echo("Either cloud_name or cloud_id must be provided", err=True)
1988
+ raise click.Abort()
1989
+
1990
+ if cloud_id and cloud_name:
1991
+ click.echo("Only one of cloud_name or cloud_id can be provided", err=True)
1992
+ raise click.Abort()
1993
+
1994
+ # Use get_cloud_id_and_name to validate cloud exists and get both ID and name
1995
+ try:
1996
+ from anyscale.cloud_utils import get_cloud_id_and_name
1997
+
1998
+ if cloud_id:
1999
+ cloud_id, cloud_name = get_cloud_id_and_name(
2000
+ cmd.cloud_controller.api_client, cloud_id=cloud_id
2001
+ )
2002
+ else:
2003
+ cloud_id, cloud_name = get_cloud_id_and_name(
2004
+ cmd.cloud_controller.api_client, cloud_name=cloud_name
2005
+ )
2006
+
2007
+ except Exception as e: # noqa: BLE001
2008
+ click.echo(f"Failed to fetch cloud information: {e}", err=True)
2009
+ raise click.Abort()
2010
+
2011
+ if not cloud_id or not cloud_name:
2012
+ click.echo("Could not find cloud with provided name or ID", err=True)
2013
+ raise click.Abort()
2014
+
2015
+ try:
2016
+ # Use the unified run method with cloud_id to indicate resource-only mode
2017
+ cmd.run(
2018
+ provider=provider,
2019
+ region=region,
2020
+ name=cloud_name,
2021
+ cluster_name=cluster_name,
2022
+ namespace=namespace,
2023
+ project_id=project_id,
2024
+ functional_verify=functional_verify,
2025
+ yes=yes,
2026
+ values_file=values_file,
2027
+ operator_chart=operator_chart,
2028
+ cloud_id=cloud_id,
2029
+ resource_name=resource_name,
2030
+ )
2031
+ except Exception as e: # noqa: BLE001
2032
+ click.echo(f"Setup failed: {e}", err=True)
2033
+ raise click.Abort()