anyscale 0.26.66__py3-none-any.whl → 0.26.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/commands/cloud_commands.py +58 -0
- anyscale/commands/setup_k8s.py +1047 -0
- anyscale/controllers/cloud_controller.py +1 -0
- anyscale/controllers/kubernetes_verifier.py +8 -0
- anyscale/utils/cloudformation_utils.py +364 -0
- anyscale/version.py +1 -1
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/METADATA +1 -1
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/RECORD +13 -11
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/WHEEL +0 -0
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.66.dist-info → anyscale-0.26.67.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1047 @@
|
|
1
|
+
"""
|
2
|
+
Kubernetes Cloud Setup Command
|
3
|
+
|
4
|
+
This module provides a streamlined command for setting up Anyscale on Kubernetes clusters.
|
5
|
+
It handles infrastructure provisioning, cloud registration, and operator installation.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass
|
9
|
+
import json
|
10
|
+
import os
|
11
|
+
import re
|
12
|
+
import subprocess
|
13
|
+
from typing import Any, Dict, List, Optional
|
14
|
+
|
15
|
+
import click
|
16
|
+
import yaml
|
17
|
+
|
18
|
+
from anyscale.cli_logger import BlockLogger
|
19
|
+
from anyscale.client.openapi_client.models import (
|
20
|
+
AWSConfig,
|
21
|
+
CloudDeployment,
|
22
|
+
CloudProviders,
|
23
|
+
ComputeStack,
|
24
|
+
KubernetesConfig as OpenAPIKubernetesConfig,
|
25
|
+
ObjectStorage,
|
26
|
+
)
|
27
|
+
from anyscale.controllers.cloud_controller import CloudController
|
28
|
+
from anyscale.controllers.kubernetes_verifier import (
|
29
|
+
KubernetesCloudDeploymentVerifier,
|
30
|
+
KubernetesConfig,
|
31
|
+
)
|
32
|
+
from anyscale.shared_anyscale_utils.conf import ANYSCALE_CORS_ORIGIN, ANYSCALE_HOST
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class ClusterInfo:
|
37
|
+
"""Information about the target Kubernetes cluster."""
|
38
|
+
|
39
|
+
context: str
|
40
|
+
namespace: str
|
41
|
+
provider: str
|
42
|
+
region: str
|
43
|
+
cluster_name: str
|
44
|
+
project_id: Optional[str] = None
|
45
|
+
cluster_arn: Optional[str] = None
|
46
|
+
oidc_provider: Optional[str] = None
|
47
|
+
cluster_location: Optional[str] = None
|
48
|
+
workload_identity_pool: Optional[str] = None
|
49
|
+
cluster_version: Optional[str] = None
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class InfrastructureResources:
|
54
|
+
"""Resources created during infrastructure setup."""
|
55
|
+
|
56
|
+
bucket_name: str
|
57
|
+
iam_role_arn: str
|
58
|
+
region: str
|
59
|
+
project_id: Optional[str] = None
|
60
|
+
|
61
|
+
|
62
|
+
class KubernetesCloudSetupCommand:
|
63
|
+
"""Command to setup Kubernetes cloud."""
|
64
|
+
|
65
|
+
def __init__(self, logger: Optional[BlockLogger] = None, debug: bool = False):
|
66
|
+
self.log = logger or BlockLogger()
|
67
|
+
self.cloud_controller = CloudController(log=self.log)
|
68
|
+
self.skip_confirmation = False
|
69
|
+
self.debug = debug or os.environ.get("ANYSCALE_DEBUG") == "1"
|
70
|
+
|
71
|
+
def run( # noqa: PLR0913
|
72
|
+
self,
|
73
|
+
provider: str,
|
74
|
+
region: str,
|
75
|
+
name: str,
|
76
|
+
cluster_name: str,
|
77
|
+
namespace: str,
|
78
|
+
project_id: Optional[str],
|
79
|
+
functional_verify: bool,
|
80
|
+
yes: bool,
|
81
|
+
values_file: Optional[str] = None,
|
82
|
+
) -> None:
|
83
|
+
"""
|
84
|
+
Main entry point for Kubernetes cloud setup.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
provider: Cloud provider (aws, gcp)
|
88
|
+
region: AWS/GCP region
|
89
|
+
name: Name for the Anyscale cloud
|
90
|
+
cluster_name: Kubernetes cluster name/context
|
91
|
+
namespace: Namespace for the Anyscale operator
|
92
|
+
project_id: GCP project ID (required for GCP)
|
93
|
+
functional_verify: Whether to run functional verification
|
94
|
+
yes: Skip confirmation prompts
|
95
|
+
values_file: Optional custom path for Helm values file
|
96
|
+
"""
|
97
|
+
self.log.open_block(
|
98
|
+
"Setup", f"Setting up Kubernetes cloud '{name}' on {provider.upper()}"
|
99
|
+
)
|
100
|
+
|
101
|
+
# Set confirmation flag
|
102
|
+
self.skip_confirmation = yes
|
103
|
+
|
104
|
+
try:
|
105
|
+
# Step 0: Check required CLI tools are installed
|
106
|
+
self._check_required_tools(provider)
|
107
|
+
|
108
|
+
# Step 1: Prompt for namespace BEFORE infrastructure setup
|
109
|
+
# This is needed because the IAM role trust relationship depends on the namespace
|
110
|
+
final_namespace = self._prompt_for_namespace(
|
111
|
+
namespace, skip_confirmation=yes
|
112
|
+
)
|
113
|
+
|
114
|
+
# Step 2: Discover and validate cluster
|
115
|
+
cluster_info = self._discover_cluster(
|
116
|
+
cluster_name, final_namespace, provider, region, project_id
|
117
|
+
)
|
118
|
+
|
119
|
+
# Step 3: Set up cloud infrastructure
|
120
|
+
infrastructure = self._setup_infrastructure(
|
121
|
+
provider, region, name, cluster_info
|
122
|
+
)
|
123
|
+
|
124
|
+
# Step 4: Register cloud with Anyscale
|
125
|
+
cloud_id = self._register_cloud(
|
126
|
+
name, provider, region, infrastructure, cluster_info
|
127
|
+
)
|
128
|
+
|
129
|
+
# Step 5: Install Anyscale operator
|
130
|
+
self._install_operator(
|
131
|
+
cloud_id,
|
132
|
+
provider,
|
133
|
+
region,
|
134
|
+
final_namespace,
|
135
|
+
infrastructure,
|
136
|
+
values_file,
|
137
|
+
)
|
138
|
+
|
139
|
+
# Step 6: Verify installation
|
140
|
+
if functional_verify:
|
141
|
+
self._verify_installation(cloud_id, final_namespace, cluster_info)
|
142
|
+
|
143
|
+
self.log.close_block("Setup")
|
144
|
+
self.log.info(f"Kubernetes cloud '{name}' setup completed successfully!")
|
145
|
+
except Exception: # noqa: BLE001
|
146
|
+
self.log.close_block("Setup")
|
147
|
+
raise
|
148
|
+
|
149
|
+
def _debug(self, *msg: str) -> None:
|
150
|
+
"""Log debug messages only when debug mode is enabled."""
|
151
|
+
if self.debug:
|
152
|
+
self.log.debug(*msg)
|
153
|
+
|
154
|
+
def _check_required_tools(self, provider: str) -> None:
|
155
|
+
"""Check that required CLI tools are installed."""
|
156
|
+
# Common tools required for all providers
|
157
|
+
required_tools = ["kubectl", "helm"]
|
158
|
+
|
159
|
+
# Provider-specific tools
|
160
|
+
if provider == "aws":
|
161
|
+
required_tools.append("aws")
|
162
|
+
elif provider == "gcp":
|
163
|
+
required_tools.extend(["gcloud", "gsutil"])
|
164
|
+
|
165
|
+
self._debug(f"Checking for required tools: {', '.join(required_tools)}")
|
166
|
+
|
167
|
+
missing_tools = []
|
168
|
+
for tool in required_tools:
|
169
|
+
if not self._check_command_available(tool):
|
170
|
+
missing_tools.append(tool)
|
171
|
+
|
172
|
+
if missing_tools:
|
173
|
+
error_msg = f"Missing required CLI tools: {', '.join(missing_tools)}\n\n"
|
174
|
+
raise click.ClickException(error_msg.rstrip())
|
175
|
+
|
176
|
+
self.log.info(
|
177
|
+
f"Required CLI tools are installed ({', '.join(required_tools)})",
|
178
|
+
block_label="Setup",
|
179
|
+
)
|
180
|
+
|
181
|
+
def _check_command_available(self, command: str) -> bool:
|
182
|
+
"""Check if a command is available in the system PATH."""
|
183
|
+
try:
|
184
|
+
result = subprocess.run(
|
185
|
+
["which", command], capture_output=True, text=True, check=False
|
186
|
+
)
|
187
|
+
return result.returncode == 0
|
188
|
+
except Exception: # noqa: BLE001
|
189
|
+
return False
|
190
|
+
|
191
|
+
def _discover_cluster(
|
192
|
+
self,
|
193
|
+
cluster_name: str,
|
194
|
+
namespace: str,
|
195
|
+
provider: str,
|
196
|
+
region: str,
|
197
|
+
project_id: Optional[str], # noqa: ARG002
|
198
|
+
) -> ClusterInfo:
|
199
|
+
"""Discover and validate the target Kubernetes cluster using cloud provider APIs."""
|
200
|
+
self.log.info(
|
201
|
+
f"Discovering {provider.upper()} cluster: {cluster_name}",
|
202
|
+
block_label="Setup",
|
203
|
+
)
|
204
|
+
|
205
|
+
if provider == "aws":
|
206
|
+
return self._discover_aws_cluster(cluster_name, namespace, region)
|
207
|
+
elif provider == "gcp":
|
208
|
+
raise click.ClickException(
|
209
|
+
"GCP support is not yet implemented. Please use AWS for now."
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
raise click.ClickException(f"Unsupported provider: {provider}")
|
213
|
+
|
214
|
+
def _discover_aws_cluster(
|
215
|
+
self, cluster_name: str, namespace: str, region: str
|
216
|
+
) -> ClusterInfo:
|
217
|
+
"""Discover AWS EKS cluster details and configure kubeconfig."""
|
218
|
+
try:
|
219
|
+
self._debug(f"Fetching EKS cluster info for {cluster_name} in {region}...")
|
220
|
+
cluster_info = self._get_eks_cluster_info(cluster_name, region)
|
221
|
+
self._debug(f"EKS Cluster ARN: {cluster_info.get('arn', 'Unknown')}")
|
222
|
+
self._debug(
|
223
|
+
f"EKS Cluster Version: {cluster_info.get('version', 'Unknown')}"
|
224
|
+
)
|
225
|
+
except Exception as e: # noqa: BLE001
|
226
|
+
self.log.error(f"Failed to get EKS cluster info: {e}")
|
227
|
+
raise click.ClickException(
|
228
|
+
f"Failed to discover EKS cluster {cluster_name}: {e}"
|
229
|
+
)
|
230
|
+
|
231
|
+
try:
|
232
|
+
self._debug("Fetching OIDC provider information...")
|
233
|
+
oidc_provider = self._get_eks_oidc_provider(cluster_name, region)
|
234
|
+
self._debug(f"OIDC Provider: {oidc_provider}")
|
235
|
+
except Exception as e: # noqa: BLE001
|
236
|
+
self.log.error(f"Failed to get OIDC provider: {e}")
|
237
|
+
raise click.ClickException(
|
238
|
+
f"Failed to get OIDC provider for cluster {cluster_name}: {e}"
|
239
|
+
)
|
240
|
+
|
241
|
+
try:
|
242
|
+
self._debug("Configuring kubeconfig for EKS cluster...")
|
243
|
+
self._configure_aws_kubeconfig(cluster_name, region)
|
244
|
+
except Exception as e: # noqa: BLE001
|
245
|
+
self.log.error(f"Failed to configure kubeconfig: {e}")
|
246
|
+
raise click.ClickException(
|
247
|
+
f"Failed to configure kubeconfig for EKS cluster: {e}"
|
248
|
+
)
|
249
|
+
|
250
|
+
try:
|
251
|
+
self._debug("Verifying kubeconfig configuration...")
|
252
|
+
self._verify_kubeconfig()
|
253
|
+
current_context = self._get_current_kubectl_context()
|
254
|
+
self.log.info(f"Cluster discovered: {current_context}", block_label="Setup")
|
255
|
+
except Exception as e: # noqa: BLE001
|
256
|
+
self.log.error(f"Failed to verify kubeconfig: {e}")
|
257
|
+
raise click.ClickException(f"Failed to verify kubeconfig: {e}")
|
258
|
+
|
259
|
+
return ClusterInfo(
|
260
|
+
context=current_context,
|
261
|
+
namespace=namespace,
|
262
|
+
provider="aws",
|
263
|
+
region=region,
|
264
|
+
cluster_name=cluster_name,
|
265
|
+
cluster_arn=cluster_info.get("arn"),
|
266
|
+
oidc_provider=oidc_provider,
|
267
|
+
cluster_version=cluster_info.get("version"),
|
268
|
+
)
|
269
|
+
|
270
|
+
def _setup_infrastructure(
|
271
|
+
self, provider: str, region: str, name: str, cluster_info: ClusterInfo,
|
272
|
+
) -> InfrastructureResources:
|
273
|
+
"""Set up cloud infrastructure (S3/GCS bucket, IAM roles, etc.)."""
|
274
|
+
self.log.info(
|
275
|
+
f"Setting up {provider.upper()} infrastructure...", block_label="Setup"
|
276
|
+
)
|
277
|
+
|
278
|
+
if provider == "aws":
|
279
|
+
return self._setup_aws_infrastructure(region, name, cluster_info)
|
280
|
+
elif provider == "gcp":
|
281
|
+
raise click.ClickException(
|
282
|
+
"GCP support is not yet implemented. Please use AWS for now."
|
283
|
+
)
|
284
|
+
else:
|
285
|
+
raise click.ClickException(f"Unsupported provider: {provider}")
|
286
|
+
|
287
|
+
def _setup_aws_infrastructure( # noqa: PLR0912
|
288
|
+
self, region: str, name: str, cluster_info: ClusterInfo,
|
289
|
+
) -> InfrastructureResources:
|
290
|
+
"""Set up AWS infrastructure for Kubernetes using CloudFormation."""
|
291
|
+
try:
|
292
|
+
import boto3
|
293
|
+
|
294
|
+
from anyscale.utils.cloudformation_utils import CloudFormationUtils
|
295
|
+
except ImportError as e:
|
296
|
+
self.log.error(f"Failed to import required modules: {e}")
|
297
|
+
raise click.ClickException(f"Failed to import required modules: {e}")
|
298
|
+
|
299
|
+
try:
|
300
|
+
# Generate a unique cloud ID
|
301
|
+
cloud_id = f"k8s-{name}-{os.urandom(4).hex()}"
|
302
|
+
stack_name = cloud_id.replace("_", "-").lower()
|
303
|
+
self._debug(f"Generated cloud ID: {cloud_id}")
|
304
|
+
self._debug(f"CloudFormation stack name: {stack_name}")
|
305
|
+
except Exception as e: # noqa: BLE001
|
306
|
+
self.log.error(f"Failed to generate cloud ID: {e}")
|
307
|
+
raise click.ClickException(f"Failed to generate cloud ID: {e}")
|
308
|
+
|
309
|
+
try:
|
310
|
+
# Generate CloudFormation template for Kubernetes setup with actual OIDC provider
|
311
|
+
if not cluster_info.oidc_provider:
|
312
|
+
raise click.ClickException(
|
313
|
+
"OIDC provider information not found. Please ensure the EKS cluster has OIDC provider enabled."
|
314
|
+
)
|
315
|
+
self._debug("Generating CloudFormation template...")
|
316
|
+
self._debug(
|
317
|
+
f"Using namespace: {cluster_info.namespace} with service account: anyscale-operator"
|
318
|
+
)
|
319
|
+
cfn_template_body = self._generate_aws_cloudformation_template(
|
320
|
+
cloud_id, cluster_info.oidc_provider, cluster_info.namespace,
|
321
|
+
)
|
322
|
+
self._debug("CloudFormation template generated successfully")
|
323
|
+
except Exception as e: # noqa: BLE001
|
324
|
+
self.log.error(f"Failed to generate CloudFormation template: {e}")
|
325
|
+
raise click.ClickException(
|
326
|
+
f"Failed to generate CloudFormation template: {e}"
|
327
|
+
)
|
328
|
+
|
329
|
+
try:
|
330
|
+
self._debug("Preparing CloudFormation parameters...")
|
331
|
+
parameters = [{"ParameterKey": "CloudID", "ParameterValue": cloud_id}]
|
332
|
+
self._debug(f"Prepared {len(parameters)} CloudFormation parameters")
|
333
|
+
except Exception as e: # noqa: BLE001
|
334
|
+
self.log.error(f"Failed to prepare CloudFormation parameters: {e}")
|
335
|
+
raise click.ClickException(
|
336
|
+
f"Failed to prepare CloudFormation parameters: {e}"
|
337
|
+
)
|
338
|
+
|
339
|
+
try:
|
340
|
+
with self.log.indent():
|
341
|
+
self.log.info(
|
342
|
+
"Creating CloudFormation stack (this may take a few minutes)...",
|
343
|
+
block_label="Setup",
|
344
|
+
)
|
345
|
+
boto3_session = boto3.Session(region_name=region)
|
346
|
+
cfn_utils = CloudFormationUtils(self.log)
|
347
|
+
cfn_utils.create_and_wait_for_stack(
|
348
|
+
stack_name=stack_name,
|
349
|
+
template_body=cfn_template_body,
|
350
|
+
parameters=parameters,
|
351
|
+
region=region,
|
352
|
+
boto3_session=boto3_session,
|
353
|
+
timeout_seconds=600,
|
354
|
+
)
|
355
|
+
self.log.info("CloudFormation stack created", block_label="Setup")
|
356
|
+
except Exception as e: # noqa: BLE001
|
357
|
+
self.log.error(f"Failed to create CloudFormation stack: {e}")
|
358
|
+
raise click.ClickException(f"Failed to create CloudFormation stack: {e}")
|
359
|
+
|
360
|
+
try:
|
361
|
+
self._debug("Retrieving CloudFormation stack outputs...")
|
362
|
+
stack_outputs = cfn_utils.get_stack_outputs(
|
363
|
+
stack_name, region, boto3_session
|
364
|
+
)
|
365
|
+
bucket_name = stack_outputs.get("S3BucketName", f"anyscale-{cloud_id}")
|
366
|
+
iam_role_arn = stack_outputs.get("AnyscaleCrossAccountIAMRoleArn")
|
367
|
+
|
368
|
+
if not iam_role_arn:
|
369
|
+
raise click.ClickException(
|
370
|
+
"Failed to get IAM role ARN from CloudFormation stack"
|
371
|
+
)
|
372
|
+
|
373
|
+
self._debug(f"S3 Bucket: {bucket_name}")
|
374
|
+
self._debug(f"IAM Role ARN: {iam_role_arn}")
|
375
|
+
except Exception as e: # noqa: BLE001
|
376
|
+
self.log.error(f"Failed to get CloudFormation outputs: {e}")
|
377
|
+
raise click.ClickException(f"Failed to get CloudFormation outputs: {e}")
|
378
|
+
|
379
|
+
return InfrastructureResources(
|
380
|
+
bucket_name=bucket_name, iam_role_arn=iam_role_arn, region=region
|
381
|
+
)
|
382
|
+
|
383
|
+
def _generate_aws_cloudformation_template(
|
384
|
+
self, cloud_id: str, oidc_provider_arn: str, namespace: str,
|
385
|
+
) -> str:
|
386
|
+
"""Generate CloudFormation template for AWS Kubernetes setup."""
|
387
|
+
# Extract OIDC provider URL from ARN for the condition
|
388
|
+
# ARN format: arn:aws:iam::ACCOUNT:oidc-provider/oidc.eks.REGION.amazonaws.com/id/XXXXXX
|
389
|
+
# We need: oidc.eks.REGION.amazonaws.com/id/XXXXXX
|
390
|
+
if "oidc-provider/" not in oidc_provider_arn:
|
391
|
+
raise click.ClickException(
|
392
|
+
f"Invalid OIDC provider ARN format: {oidc_provider_arn}"
|
393
|
+
)
|
394
|
+
oidc_provider_url = oidc_provider_arn.split("oidc-provider/")[-1]
|
395
|
+
|
396
|
+
service_account_name = "anyscale-operator"
|
397
|
+
|
398
|
+
# Use ANYSCALE_CORS_ORIGIN from shared config
|
399
|
+
# This respects the ANYSCALE_HOST environment variable
|
400
|
+
allowed_origin = ANYSCALE_CORS_ORIGIN
|
401
|
+
|
402
|
+
template = {
|
403
|
+
"AWSTemplateFormatVersion": "2010-09-09",
|
404
|
+
"Description": f"Anyscale Kubernetes Cloud Infrastructure for {cloud_id}",
|
405
|
+
"Parameters": {
|
406
|
+
"CloudID": {
|
407
|
+
"Type": "String",
|
408
|
+
"Description": "Cloud ID for resource naming",
|
409
|
+
}
|
410
|
+
},
|
411
|
+
"Resources": {
|
412
|
+
"AnyscaleBucket": {
|
413
|
+
"Type": "AWS::S3::Bucket",
|
414
|
+
"Properties": {
|
415
|
+
"BucketName": {"Fn::Sub": "anyscale-${CloudID}"},
|
416
|
+
"VersioningConfiguration": {"Status": "Enabled"},
|
417
|
+
"PublicAccessBlockConfiguration": {
|
418
|
+
"BlockPublicAcls": True,
|
419
|
+
"BlockPublicPolicy": True,
|
420
|
+
"IgnorePublicAcls": True,
|
421
|
+
"RestrictPublicBuckets": True,
|
422
|
+
},
|
423
|
+
"CorsConfiguration": {
|
424
|
+
"CorsRules": [
|
425
|
+
{
|
426
|
+
"AllowedHeaders": ["*"],
|
427
|
+
"AllowedMethods": [
|
428
|
+
"GET",
|
429
|
+
"PUT",
|
430
|
+
"POST",
|
431
|
+
"HEAD",
|
432
|
+
"DELETE",
|
433
|
+
],
|
434
|
+
"AllowedOrigins": [allowed_origin],
|
435
|
+
"MaxAge": 3600,
|
436
|
+
}
|
437
|
+
]
|
438
|
+
},
|
439
|
+
},
|
440
|
+
},
|
441
|
+
"AnyscaleOperatorRole": {
|
442
|
+
"Type": "AWS::IAM::Role",
|
443
|
+
"Properties": {
|
444
|
+
"RoleName": {"Fn::Sub": "${CloudID}-anyscale-operator-role"},
|
445
|
+
"AssumeRolePolicyDocument": {
|
446
|
+
"Version": "2012-10-17",
|
447
|
+
"Statement": [
|
448
|
+
{
|
449
|
+
"Effect": "Allow",
|
450
|
+
"Principal": {"Federated": oidc_provider_arn},
|
451
|
+
"Action": "sts:AssumeRoleWithWebIdentity",
|
452
|
+
"Condition": {
|
453
|
+
"StringEquals": {
|
454
|
+
f"{oidc_provider_url}:sub": f"system:serviceaccount:{namespace}:{service_account_name}"
|
455
|
+
}
|
456
|
+
},
|
457
|
+
}
|
458
|
+
],
|
459
|
+
},
|
460
|
+
"Policies": [
|
461
|
+
{
|
462
|
+
"PolicyName": "AnyscaleS3AccessPolicy",
|
463
|
+
"PolicyDocument": {
|
464
|
+
"Version": "2012-10-17",
|
465
|
+
"Statement": [
|
466
|
+
{
|
467
|
+
"Effect": "Allow",
|
468
|
+
"Action": [
|
469
|
+
"s3:GetObject",
|
470
|
+
"s3:PutObject",
|
471
|
+
"s3:DeleteObject",
|
472
|
+
"s3:ListBucket",
|
473
|
+
],
|
474
|
+
"Resource": [
|
475
|
+
{
|
476
|
+
"Fn::GetAtt": [
|
477
|
+
"AnyscaleBucket",
|
478
|
+
"Arn",
|
479
|
+
]
|
480
|
+
},
|
481
|
+
{"Fn::Sub": "${AnyscaleBucket.Arn}/*"},
|
482
|
+
],
|
483
|
+
}
|
484
|
+
],
|
485
|
+
},
|
486
|
+
}
|
487
|
+
],
|
488
|
+
},
|
489
|
+
},
|
490
|
+
},
|
491
|
+
"Outputs": {
|
492
|
+
"S3BucketName": {
|
493
|
+
"Value": {"Ref": "AnyscaleBucket"},
|
494
|
+
"Description": "Name of the S3 bucket",
|
495
|
+
},
|
496
|
+
"AnyscaleCrossAccountIAMRoleArn": {
|
497
|
+
"Value": {"Fn::GetAtt": ["AnyscaleOperatorRole", "Arn"]},
|
498
|
+
"Description": "ARN of the Anyscale operator IAM role",
|
499
|
+
},
|
500
|
+
},
|
501
|
+
}
|
502
|
+
|
503
|
+
return json.dumps(template, indent=2)
|
504
|
+
|
505
|
+
def _get_eks_cluster_info(self, cluster_name: str, region: str) -> Dict[str, Any]:
|
506
|
+
"""Get EKS cluster information using AWS CLI."""
|
507
|
+
try:
|
508
|
+
result = subprocess.run(
|
509
|
+
[
|
510
|
+
"aws",
|
511
|
+
"eks",
|
512
|
+
"describe-cluster",
|
513
|
+
"--name",
|
514
|
+
cluster_name,
|
515
|
+
"--region",
|
516
|
+
region,
|
517
|
+
],
|
518
|
+
capture_output=True,
|
519
|
+
text=True,
|
520
|
+
check=True,
|
521
|
+
)
|
522
|
+
cluster_data = json.loads(result.stdout)
|
523
|
+
return cluster_data.get("cluster", {})
|
524
|
+
except subprocess.CalledProcessError as e:
|
525
|
+
raise click.ClickException(f"Failed to get EKS cluster info: {e.stderr}")
|
526
|
+
|
527
|
+
def _get_eks_availability_zones(self, cluster_name: str, region: str) -> List[str]:
|
528
|
+
"""Get availability zones where the EKS cluster's subnets are located."""
|
529
|
+
try:
|
530
|
+
cluster_info = self._get_eks_cluster_info(cluster_name, region)
|
531
|
+
subnet_ids = cluster_info.get("resourcesVpcConfig", {}).get("subnetIds", [])
|
532
|
+
|
533
|
+
if not subnet_ids:
|
534
|
+
self._debug(
|
535
|
+
"No subnets found in cluster info, falling back to default zones"
|
536
|
+
)
|
537
|
+
return [region + "a", region + "b", region + "c"]
|
538
|
+
|
539
|
+
# Get subnet details to find their availability zones
|
540
|
+
result = subprocess.run(
|
541
|
+
[
|
542
|
+
"aws",
|
543
|
+
"ec2",
|
544
|
+
"describe-subnets",
|
545
|
+
"--subnet-ids",
|
546
|
+
*subnet_ids,
|
547
|
+
"--region",
|
548
|
+
region,
|
549
|
+
"--query",
|
550
|
+
"Subnets[*].AvailabilityZone",
|
551
|
+
"--output",
|
552
|
+
"json",
|
553
|
+
],
|
554
|
+
capture_output=True,
|
555
|
+
text=True,
|
556
|
+
check=True,
|
557
|
+
)
|
558
|
+
|
559
|
+
zones = json.loads(result.stdout)
|
560
|
+
# Remove duplicates and sort
|
561
|
+
unique_zones = sorted(set(zones))
|
562
|
+
|
563
|
+
if unique_zones:
|
564
|
+
self._debug(f"Discovered availability zones: {', '.join(unique_zones)}")
|
565
|
+
return unique_zones
|
566
|
+
else:
|
567
|
+
self._debug(
|
568
|
+
"No availability zones found, falling back to default zones"
|
569
|
+
)
|
570
|
+
return [region + "a", region + "b", region + "c"]
|
571
|
+
|
572
|
+
except Exception as e: # noqa: BLE001
|
573
|
+
self._debug(f"Failed to get availability zones: {e}, using default zones")
|
574
|
+
return [region + "a", region + "b", region + "c"]
|
575
|
+
|
576
|
+
def _get_eks_oidc_provider(self, cluster_name: str, region: str) -> str:
|
577
|
+
"""Get EKS OIDC provider URL for IRSA."""
|
578
|
+
cluster_info = self._get_eks_cluster_info(cluster_name, region)
|
579
|
+
identity = cluster_info.get("identity", {})
|
580
|
+
oidc_issuer = identity.get("oidc", {}).get("issuer", "")
|
581
|
+
|
582
|
+
if not oidc_issuer:
|
583
|
+
raise click.ClickException(
|
584
|
+
"Could not find OIDC issuer for EKS cluster. IRSA setup requires OIDC provider."
|
585
|
+
)
|
586
|
+
|
587
|
+
# Extract OIDC provider ARN
|
588
|
+
# OIDC issuer URL format: https://oidc.eks.region.amazonaws.com/id/EXAMPLED539D4633E53CE8D
|
589
|
+
if "oidc.eks." in oidc_issuer and ".amazonaws.com/id/" in oidc_issuer:
|
590
|
+
oidc_id = oidc_issuer.split("/id/")[-1]
|
591
|
+
account_id = self._get_aws_account_id()
|
592
|
+
oidc_provider_arn = f"arn:aws:iam::{account_id}:oidc-provider/oidc.eks.{region}.amazonaws.com/id/{oidc_id}"
|
593
|
+
return oidc_provider_arn
|
594
|
+
|
595
|
+
raise click.ClickException(
|
596
|
+
f"Could not parse OIDC provider from issuer URL: {oidc_issuer}"
|
597
|
+
)
|
598
|
+
|
599
|
+
def _get_aws_account_id(self) -> str:
|
600
|
+
"""Get AWS account ID."""
|
601
|
+
try:
|
602
|
+
result = subprocess.run(
|
603
|
+
[
|
604
|
+
"aws",
|
605
|
+
"sts",
|
606
|
+
"get-caller-identity",
|
607
|
+
"--query",
|
608
|
+
"Account",
|
609
|
+
"--output",
|
610
|
+
"text",
|
611
|
+
],
|
612
|
+
capture_output=True,
|
613
|
+
text=True,
|
614
|
+
check=True,
|
615
|
+
)
|
616
|
+
return result.stdout.strip()
|
617
|
+
except subprocess.CalledProcessError as e:
|
618
|
+
raise click.ClickException(f"Failed to get AWS account ID: {e.stderr}")
|
619
|
+
|
620
|
+
def _configure_aws_kubeconfig(self, cluster_name: str, region: str) -> None:
|
621
|
+
"""Configure kubeconfig for AWS EKS cluster."""
|
622
|
+
self.log.info(f"Configuring kubeconfig for EKS cluster: {cluster_name}")
|
623
|
+
|
624
|
+
try:
|
625
|
+
subprocess.run(
|
626
|
+
[
|
627
|
+
"aws",
|
628
|
+
"eks",
|
629
|
+
"update-kubeconfig",
|
630
|
+
"--region",
|
631
|
+
region,
|
632
|
+
"--name",
|
633
|
+
cluster_name,
|
634
|
+
],
|
635
|
+
capture_output=True,
|
636
|
+
text=True,
|
637
|
+
check=True,
|
638
|
+
)
|
639
|
+
self.log.info("EKS kubeconfig configured successfully")
|
640
|
+
except subprocess.CalledProcessError as e:
|
641
|
+
raise click.ClickException(
|
642
|
+
f"Failed to configure EKS kubeconfig: {e.stderr}"
|
643
|
+
)
|
644
|
+
|
645
|
+
def _verify_kubeconfig(self) -> None:
|
646
|
+
"""Verify that kubeconfig is working correctly."""
|
647
|
+
self.log.info("Verifying kubeconfig configuration...")
|
648
|
+
|
649
|
+
try:
|
650
|
+
subprocess.run(
|
651
|
+
["kubectl", "cluster-info"], capture_output=True, text=True, check=True
|
652
|
+
)
|
653
|
+
self.log.info("Kubeconfig verification successful")
|
654
|
+
except subprocess.CalledProcessError as e:
|
655
|
+
raise click.ClickException(f"Kubeconfig verification failed: {e.stderr}")
|
656
|
+
|
657
|
+
def _get_current_kubectl_context(self) -> str:
|
658
|
+
"""Get the current kubectl context."""
|
659
|
+
try:
|
660
|
+
result = subprocess.run(
|
661
|
+
["kubectl", "config", "current-context"],
|
662
|
+
capture_output=True,
|
663
|
+
text=True,
|
664
|
+
check=True,
|
665
|
+
)
|
666
|
+
return result.stdout.strip()
|
667
|
+
except subprocess.CalledProcessError as e:
|
668
|
+
raise click.ClickException(
|
669
|
+
f"Failed to get current kubectl context: {e.stderr}"
|
670
|
+
)
|
671
|
+
|
672
|
+
def _register_cloud( # noqa: PLR0912
|
673
|
+
self,
|
674
|
+
name: str,
|
675
|
+
provider: str,
|
676
|
+
region: str,
|
677
|
+
infrastructure: InfrastructureResources,
|
678
|
+
cluster_info: ClusterInfo,
|
679
|
+
) -> str:
|
680
|
+
"""Register the cloud with Anyscale."""
|
681
|
+
self.log.info("Registering cloud with Anyscale...", block_label="Setup")
|
682
|
+
|
683
|
+
if provider == "aws":
|
684
|
+
# Dynamically determine availability zones from the EKS cluster
|
685
|
+
zones = self._get_eks_availability_zones(cluster_info.cluster_name, region)
|
686
|
+
|
687
|
+
cloud_deployment = CloudDeployment(
|
688
|
+
name=name,
|
689
|
+
provider=CloudProviders.AWS,
|
690
|
+
region=region,
|
691
|
+
compute_stack=ComputeStack.K8S,
|
692
|
+
object_storage=ObjectStorage(
|
693
|
+
bucket_name=infrastructure.bucket_name, region=region
|
694
|
+
),
|
695
|
+
aws_config=AWSConfig(),
|
696
|
+
kubernetes_config=OpenAPIKubernetesConfig(
|
697
|
+
anyscale_operator_iam_identity=infrastructure.iam_role_arn,
|
698
|
+
zones=zones,
|
699
|
+
),
|
700
|
+
)
|
701
|
+
else:
|
702
|
+
raise click.ClickException(
|
703
|
+
"GCP support is not yet implemented. Please use AWS for now."
|
704
|
+
)
|
705
|
+
|
706
|
+
# Register the cloud
|
707
|
+
try:
|
708
|
+
self._debug("Cloud deployment details:")
|
709
|
+
self._debug(f" Name: {cloud_deployment.name}")
|
710
|
+
self._debug(f" Provider: {cloud_deployment.provider}")
|
711
|
+
self._debug(f" Region: {cloud_deployment.region}")
|
712
|
+
self._debug(f" Compute Stack: {cloud_deployment.compute_stack}")
|
713
|
+
self._debug(f" Bucket Name: {cloud_deployment.object_storage.bucket_name}")
|
714
|
+
self._debug(
|
715
|
+
f" IAM Identity: {cloud_deployment.kubernetes_config.anyscale_operator_iam_identity}"
|
716
|
+
)
|
717
|
+
if cloud_deployment.aws_config:
|
718
|
+
self._debug(" AWS Config:")
|
719
|
+
self._debug(
|
720
|
+
f" IAM Role ID: {cloud_deployment.aws_config.anyscale_iam_role_id}"
|
721
|
+
)
|
722
|
+
|
723
|
+
# Temporarily suppress cloud controller logging to avoid Helm command output
|
724
|
+
original_log_info = self.cloud_controller.log.info
|
725
|
+
self.cloud_controller.log.info = lambda *_args, **_kwargs: None
|
726
|
+
|
727
|
+
try:
|
728
|
+
if provider == "aws":
|
729
|
+
self.log.info("Calling register_aws_cloud...")
|
730
|
+
self.cloud_controller.register_aws_cloud(
|
731
|
+
name=name,
|
732
|
+
cloud_resource=cloud_deployment,
|
733
|
+
functional_verify=None,
|
734
|
+
yes=True,
|
735
|
+
skip_verifications=True,
|
736
|
+
auto_add_user=True,
|
737
|
+
)
|
738
|
+
else:
|
739
|
+
raise click.ClickException(
|
740
|
+
"GCP support is not yet implemented. Please use AWS for now."
|
741
|
+
)
|
742
|
+
finally:
|
743
|
+
# Restore the original log.info method
|
744
|
+
self.cloud_controller.log.info = original_log_info
|
745
|
+
|
746
|
+
self._debug("Cloud registration completed, fetching cloud ID...")
|
747
|
+
clouds = (
|
748
|
+
self.cloud_controller.api_client.list_clouds_api_v2_clouds_get().results
|
749
|
+
)
|
750
|
+
cloud = next((c for c in clouds if c.name == name), None)
|
751
|
+
if not cloud:
|
752
|
+
raise click.ClickException("Failed to find registered cloud")
|
753
|
+
|
754
|
+
cloud_id = getattr(cloud, "id", None) or getattr(cloud, "cloud_id", None)
|
755
|
+
if not cloud_id:
|
756
|
+
raise click.ClickException(
|
757
|
+
"Failed to get cloud ID from registered cloud"
|
758
|
+
)
|
759
|
+
|
760
|
+
self.log.info(f"Cloud registered with ID: {cloud_id}", block_label="Setup")
|
761
|
+
|
762
|
+
return cloud_id
|
763
|
+
|
764
|
+
except Exception as e: # noqa: BLE001
|
765
|
+
self.log.error(f"Cloud registration failed with error: {e}")
|
766
|
+
self.log.error(f"Error type: {type(e).__name__}")
|
767
|
+
if hasattr(e, "response"):
|
768
|
+
self.log.error(f"Response details: {getattr(e, 'response', 'N/A')}")
|
769
|
+
if hasattr(e, "args"):
|
770
|
+
self.log.error(f"Error args: {e.args}")
|
771
|
+
import traceback
|
772
|
+
|
773
|
+
self.log.error(f"Full traceback: {traceback.format_exc()}")
|
774
|
+
raise click.ClickException(f"Failed to register cloud: {e}")
|
775
|
+
|
776
|
+
def _install_operator( # noqa: PLR0913
|
777
|
+
self,
|
778
|
+
cloud_id: str,
|
779
|
+
provider: str,
|
780
|
+
region: str,
|
781
|
+
namespace: str,
|
782
|
+
infrastructure: InfrastructureResources,
|
783
|
+
values_file: Optional[str] = None,
|
784
|
+
) -> None:
|
785
|
+
"""Install the Anyscale operator using Helm."""
|
786
|
+
self.log.info("Installing Anyscale operator...", block_label="Setup")
|
787
|
+
|
788
|
+
# Get cloud resources to get the cloud resource ID
|
789
|
+
cloud_resources = self.cloud_controller.get_decorated_cloud_resources(cloud_id)
|
790
|
+
|
791
|
+
if not cloud_resources:
|
792
|
+
raise click.ClickException("No cloud resources found")
|
793
|
+
|
794
|
+
cloud_resource_id = cloud_resources[0].cloud_resource_id
|
795
|
+
|
796
|
+
release_name = "anyscale-operator"
|
797
|
+
|
798
|
+
# Generate Helm command and extract --set-string flags from it
|
799
|
+
self._debug("Generating Helm command to extract parameters...")
|
800
|
+
helm_command = self.cloud_controller._generate_helm_upgrade_command( # noqa: SLF001
|
801
|
+
provider=provider,
|
802
|
+
cloud_deployment_id=cloud_resource_id,
|
803
|
+
region=region,
|
804
|
+
operator_iam_identity=infrastructure.iam_role_arn,
|
805
|
+
)
|
806
|
+
|
807
|
+
set_string_values = self._extract_set_string_values(helm_command)
|
808
|
+
self._debug(f"Extracted {len(set_string_values)} --set-string parameters")
|
809
|
+
|
810
|
+
values_file_path = self._generate_helm_values_file(
|
811
|
+
provider=provider,
|
812
|
+
cloud_deployment_id=cloud_resource_id,
|
813
|
+
region=region,
|
814
|
+
namespace=namespace,
|
815
|
+
infrastructure=infrastructure,
|
816
|
+
custom_path=values_file,
|
817
|
+
additional_values=set_string_values,
|
818
|
+
)
|
819
|
+
|
820
|
+
# Build a simple Helm command that only uses the values file
|
821
|
+
self._debug("Generating Helm command...")
|
822
|
+
helm_command = (
|
823
|
+
f"helm upgrade {release_name} anyscale/anyscale-operator "
|
824
|
+
f"--values {values_file_path} "
|
825
|
+
f"--namespace {namespace} "
|
826
|
+
f"--create-namespace "
|
827
|
+
f"--wait "
|
828
|
+
f"-i"
|
829
|
+
)
|
830
|
+
|
831
|
+
self._execute_helm_command(helm_command)
|
832
|
+
|
833
|
+
def _extract_set_string_values(self, helm_command: str) -> Dict[str, str]:
|
834
|
+
"""
|
835
|
+
Extract all --set-string key=value pairs from a Helm command.
|
836
|
+
|
837
|
+
Args:
|
838
|
+
helm_command: The Helm command string to parse
|
839
|
+
|
840
|
+
Returns:
|
841
|
+
Dictionary of key-value pairs from --set-string flags
|
842
|
+
"""
|
843
|
+
import re
|
844
|
+
|
845
|
+
set_string_values = {}
|
846
|
+
|
847
|
+
# Pattern to match --set-string key=value
|
848
|
+
pattern = r"--set-string\s+(\S+?)=(\S+)"
|
849
|
+
|
850
|
+
matches = re.findall(pattern, helm_command)
|
851
|
+
for key, value in matches:
|
852
|
+
set_string_values[key] = value
|
853
|
+
|
854
|
+
return set_string_values
|
855
|
+
|
856
|
+
def _prompt_for_namespace(
|
857
|
+
self, default_namespace: str, skip_confirmation: bool = False
|
858
|
+
) -> str:
|
859
|
+
"""Prompt user for namespace confirmation."""
|
860
|
+
final_namespace = default_namespace or "anyscale-operator"
|
861
|
+
|
862
|
+
if skip_confirmation:
|
863
|
+
self.log.info(f"Using namespace: {final_namespace}", block_label="Setup")
|
864
|
+
return final_namespace
|
865
|
+
|
866
|
+
self.log.info("Configuring Kubernetes namespace...")
|
867
|
+
|
868
|
+
self.log.info(
|
869
|
+
f"Enter the namespace to use for the Anyscale operator (default: {final_namespace}):"
|
870
|
+
)
|
871
|
+
final_namespace = click.prompt("", default=final_namespace, show_default=True)
|
872
|
+
|
873
|
+
# Validate namespace (Kubernetes DNS-1123 label requirements)
|
874
|
+
# Must be lowercase alphanumeric or hyphens, start and end with alphanumeric, max 63 chars
|
875
|
+
|
876
|
+
if not final_namespace:
|
877
|
+
raise click.ClickException("Namespace cannot be empty")
|
878
|
+
if len(final_namespace) > 63:
|
879
|
+
raise click.ClickException("Namespace must be 63 characters or less")
|
880
|
+
if not re.match(r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$", final_namespace):
|
881
|
+
raise click.ClickException(
|
882
|
+
"Namespace must consist of lowercase alphanumeric characters or hyphens, "
|
883
|
+
"and must start and end with an alphanumeric character"
|
884
|
+
)
|
885
|
+
|
886
|
+
self.log.info(f"Using namespace: {final_namespace}")
|
887
|
+
|
888
|
+
return final_namespace
|
889
|
+
|
890
|
+
def _generate_helm_values_file( # noqa: PLR0913
|
891
|
+
self,
|
892
|
+
provider: str,
|
893
|
+
cloud_deployment_id: str,
|
894
|
+
region: str,
|
895
|
+
namespace: str,
|
896
|
+
infrastructure: InfrastructureResources,
|
897
|
+
custom_path: Optional[str] = None,
|
898
|
+
additional_values: Optional[Dict[str, str]] = None,
|
899
|
+
) -> str:
|
900
|
+
"""Generate Helm values file and save it locally."""
|
901
|
+
self.log.info("Generating Helm values file...")
|
902
|
+
|
903
|
+
# Create values dictionary starting with base values
|
904
|
+
values: Dict[str, Any] = {
|
905
|
+
"cloudProvider": provider,
|
906
|
+
"cloudDeploymentId": cloud_deployment_id,
|
907
|
+
"region": region,
|
908
|
+
"operatorIamIdentity": infrastructure.iam_role_arn,
|
909
|
+
"ingress-nginx": {"enabled": True},
|
910
|
+
}
|
911
|
+
|
912
|
+
if additional_values:
|
913
|
+
for key, value in additional_values.items():
|
914
|
+
if key not in values:
|
915
|
+
values[key] = value
|
916
|
+
|
917
|
+
# Add control plane URL from ANYSCALE_HOST environment variable
|
918
|
+
if ANYSCALE_HOST:
|
919
|
+
values["controlPlaneURL"] = ANYSCALE_HOST
|
920
|
+
self._debug(f"Using control plane URL: {ANYSCALE_HOST}")
|
921
|
+
|
922
|
+
if custom_path:
|
923
|
+
values_file_path = custom_path
|
924
|
+
else:
|
925
|
+
# Create filename with timestamp
|
926
|
+
import datetime
|
927
|
+
|
928
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
929
|
+
filename = f"anyscale-helm-values-{provider}-{namespace}-{timestamp}.yaml"
|
930
|
+
values_file_path = os.path.join(os.getcwd(), filename)
|
931
|
+
|
932
|
+
with open(values_file_path, "w") as f:
|
933
|
+
yaml.dump(values, f, default_flow_style=False, sort_keys=False)
|
934
|
+
|
935
|
+
self.log.info(f"Generated Helm values file: {values_file_path}")
|
936
|
+
|
937
|
+
return values_file_path
|
938
|
+
|
939
|
+
def _execute_helm_command(self, helm_command: str) -> None:
|
940
|
+
"""Execute the helm command."""
|
941
|
+
# Convert multi-line command to single line and execute
|
942
|
+
single_line_command = helm_command.replace(" \\\n", " ").replace("\n", " ")
|
943
|
+
|
944
|
+
self.log.info(f"Executing: {single_line_command}")
|
945
|
+
|
946
|
+
try:
|
947
|
+
subprocess.run(
|
948
|
+
single_line_command,
|
949
|
+
shell=True,
|
950
|
+
check=True,
|
951
|
+
capture_output=True,
|
952
|
+
text=True,
|
953
|
+
)
|
954
|
+
self.log.info("Helm installation completed successfully")
|
955
|
+
except subprocess.CalledProcessError as e:
|
956
|
+
self.log.error(f"Helm installation failed: {e.stderr}")
|
957
|
+
raise click.ClickException(
|
958
|
+
f"Failed to install Anyscale operator: {e.stderr}"
|
959
|
+
)
|
960
|
+
|
961
|
+
def _verify_installation(
|
962
|
+
self, cloud_id: str, namespace: str, cluster_info: ClusterInfo
|
963
|
+
) -> None:
|
964
|
+
"""Verify the Kubernetes installation."""
|
965
|
+
self.log.info("Verifying installation...")
|
966
|
+
|
967
|
+
# Get the cloud deployment
|
968
|
+
cloud_resources = self.cloud_controller.get_cloud_resources(cloud_id)
|
969
|
+
|
970
|
+
if not cloud_resources:
|
971
|
+
raise click.ClickException("No cloud resources found for verification")
|
972
|
+
|
973
|
+
cloud_deployment = cloud_resources[0]
|
974
|
+
|
975
|
+
# Use the existing Kubernetes verifier
|
976
|
+
verifier = KubernetesCloudDeploymentVerifier(
|
977
|
+
self.log, self.cloud_controller.api_client
|
978
|
+
)
|
979
|
+
|
980
|
+
# Set up kubectl config for verification using the discovered context
|
981
|
+
verifier.k8s_config = KubernetesConfig(
|
982
|
+
context=cluster_info.context, # Use the discovered context to avoid re-prompting
|
983
|
+
operator_namespace=namespace,
|
984
|
+
)
|
985
|
+
|
986
|
+
# Sleep to avoid race condition where operator has not loaded its IAM identity
|
987
|
+
import time
|
988
|
+
|
989
|
+
time.sleep(5)
|
990
|
+
|
991
|
+
# Run verification
|
992
|
+
success = verifier.verify(cloud_deployment)
|
993
|
+
|
994
|
+
if success:
|
995
|
+
self.log.info("Verification completed successfully")
|
996
|
+
else:
|
997
|
+
self.log.error("Verification failed - please check the logs above")
|
998
|
+
raise click.ClickException("Installation verification failed")
|
999
|
+
|
1000
|
+
|
1001
|
+
def setup_kubernetes_cloud( # noqa: PLR0913
|
1002
|
+
provider: str,
|
1003
|
+
region: str,
|
1004
|
+
name: str,
|
1005
|
+
cluster_name: str,
|
1006
|
+
namespace: str = "anyscale-operator",
|
1007
|
+
project_id: Optional[str] = None,
|
1008
|
+
functional_verify: bool = False,
|
1009
|
+
yes: bool = False,
|
1010
|
+
values_file: Optional[str] = None,
|
1011
|
+
debug: bool = False,
|
1012
|
+
) -> None:
|
1013
|
+
"""
|
1014
|
+
Set up Anyscale on a Kubernetes cluster.
|
1015
|
+
|
1016
|
+
This function can be called from multiple CLI commands and provides
|
1017
|
+
the core K8s setup functionality.
|
1018
|
+
|
1019
|
+
Args:
|
1020
|
+
provider: Cloud provider (aws, gcp)
|
1021
|
+
region: Cloud region
|
1022
|
+
name: Name for the Anyscale cloud
|
1023
|
+
cluster_name: Kubernetes cluster name
|
1024
|
+
namespace: Namespace for Anyscale operator (default: anyscale-operator)
|
1025
|
+
project_id: GCP project ID (optional, for future GCP support)
|
1026
|
+
functional_verify: Whether to run functional verification
|
1027
|
+
yes: Skip confirmation prompts
|
1028
|
+
values_file: Optional path for Helm values file
|
1029
|
+
debug: Enable debug logging
|
1030
|
+
"""
|
1031
|
+
cmd = KubernetesCloudSetupCommand(debug=debug)
|
1032
|
+
|
1033
|
+
try:
|
1034
|
+
cmd.run(
|
1035
|
+
provider=provider,
|
1036
|
+
region=region,
|
1037
|
+
name=name,
|
1038
|
+
cluster_name=cluster_name,
|
1039
|
+
namespace=namespace,
|
1040
|
+
project_id=project_id,
|
1041
|
+
functional_verify=functional_verify,
|
1042
|
+
yes=yes,
|
1043
|
+
values_file=values_file,
|
1044
|
+
)
|
1045
|
+
except Exception as e: # noqa: BLE001
|
1046
|
+
click.echo(f"Setup failed: {e}", err=True)
|
1047
|
+
raise click.Abort()
|