anyscale 0.26.67__py3-none-any.whl → 0.26.68__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/client/README.md +20 -0
- anyscale/client/openapi_client/__init__.py +15 -0
- anyscale/client/openapi_client/api/default_api.py +656 -0
- anyscale/client/openapi_client/models/__init__.py +15 -0
- anyscale/client/openapi_client/models/lineage_artifact.py +383 -0
- anyscale/client/openapi_client/models/lineage_artifact_sort_field.py +101 -0
- anyscale/client/openapi_client/models/lineage_artifact_type.py +100 -0
- anyscale/client/openapi_client/models/lineage_direction.py +101 -0
- anyscale/client/openapi_client/models/lineage_graph.py +179 -0
- anyscale/client/openapi_client/models/lineage_graph_node.py +439 -0
- anyscale/client/openapi_client/models/lineage_node_type.py +100 -0
- anyscale/client/openapi_client/models/lineage_workload.py +355 -0
- anyscale/client/openapi_client/models/lineage_workload_sort_field.py +101 -0
- anyscale/client/openapi_client/models/lineage_workload_type.py +101 -0
- anyscale/client/openapi_client/models/lineageartifact_list_response.py +147 -0
- anyscale/client/openapi_client/models/lineageartifact_response.py +121 -0
- anyscale/client/openapi_client/models/lineagegraph_response.py +121 -0
- anyscale/client/openapi_client/models/lineageworkload_list_response.py +147 -0
- anyscale/client/openapi_client/models/lineageworkload_response.py +121 -0
- anyscale/commands/setup_k8s.py +460 -40
- anyscale/controllers/cloud_controller.py +10 -10
- anyscale/controllers/kubernetes_verifier.py +57 -11
- anyscale/version.py +1 -1
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/METADATA +1 -1
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/RECORD +30 -15
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/WHEEL +0 -0
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/top_level.txt +0 -0
anyscale/commands/setup_k8s.py
CHANGED
@@ -42,11 +42,7 @@ class ClusterInfo:
|
|
42
42
|
region: str
|
43
43
|
cluster_name: str
|
44
44
|
project_id: Optional[str] = None
|
45
|
-
cluster_arn: Optional[str] = None
|
46
45
|
oidc_provider: Optional[str] = None
|
47
|
-
cluster_location: Optional[str] = None
|
48
|
-
workload_identity_pool: Optional[str] = None
|
49
|
-
cluster_version: Optional[str] = None
|
50
46
|
|
51
47
|
|
52
48
|
@dataclass
|
@@ -194,7 +190,7 @@ class KubernetesCloudSetupCommand:
|
|
194
190
|
namespace: str,
|
195
191
|
provider: str,
|
196
192
|
region: str,
|
197
|
-
project_id: Optional[str],
|
193
|
+
project_id: Optional[str],
|
198
194
|
) -> ClusterInfo:
|
199
195
|
"""Discover and validate the target Kubernetes cluster using cloud provider APIs."""
|
200
196
|
self.log.info(
|
@@ -205,8 +201,12 @@ class KubernetesCloudSetupCommand:
|
|
205
201
|
if provider == "aws":
|
206
202
|
return self._discover_aws_cluster(cluster_name, namespace, region)
|
207
203
|
elif provider == "gcp":
|
208
|
-
|
209
|
-
|
204
|
+
if not project_id:
|
205
|
+
raise click.ClickException(
|
206
|
+
"GCP project ID is required. Please provide --project-id"
|
207
|
+
)
|
208
|
+
return self._discover_gcp_cluster(
|
209
|
+
cluster_name, namespace, region, project_id
|
210
210
|
)
|
211
211
|
else:
|
212
212
|
raise click.ClickException(f"Unsupported provider: {provider}")
|
@@ -215,18 +215,6 @@ class KubernetesCloudSetupCommand:
|
|
215
215
|
self, cluster_name: str, namespace: str, region: str
|
216
216
|
) -> ClusterInfo:
|
217
217
|
"""Discover AWS EKS cluster details and configure kubeconfig."""
|
218
|
-
try:
|
219
|
-
self._debug(f"Fetching EKS cluster info for {cluster_name} in {region}...")
|
220
|
-
cluster_info = self._get_eks_cluster_info(cluster_name, region)
|
221
|
-
self._debug(f"EKS Cluster ARN: {cluster_info.get('arn', 'Unknown')}")
|
222
|
-
self._debug(
|
223
|
-
f"EKS Cluster Version: {cluster_info.get('version', 'Unknown')}"
|
224
|
-
)
|
225
|
-
except Exception as e: # noqa: BLE001
|
226
|
-
self.log.error(f"Failed to get EKS cluster info: {e}")
|
227
|
-
raise click.ClickException(
|
228
|
-
f"Failed to discover EKS cluster {cluster_name}: {e}"
|
229
|
-
)
|
230
218
|
|
231
219
|
try:
|
232
220
|
self._debug("Fetching OIDC provider information...")
|
@@ -262,9 +250,39 @@ class KubernetesCloudSetupCommand:
|
|
262
250
|
provider="aws",
|
263
251
|
region=region,
|
264
252
|
cluster_name=cluster_name,
|
265
|
-
cluster_arn=cluster_info.get("arn"),
|
266
253
|
oidc_provider=oidc_provider,
|
267
|
-
|
254
|
+
)
|
255
|
+
|
256
|
+
def _discover_gcp_cluster(
|
257
|
+
self, cluster_name: str, namespace: str, region: str, project_id: str
|
258
|
+
) -> ClusterInfo:
|
259
|
+
"""Discover GCP GKE cluster details and configure kubeconfig."""
|
260
|
+
|
261
|
+
try:
|
262
|
+
self._debug("Configuring kubeconfig for GKE cluster...")
|
263
|
+
self._configure_gcp_kubeconfig(cluster_name, region, project_id)
|
264
|
+
except Exception as e: # noqa: BLE001
|
265
|
+
self.log.error(f"Failed to configure kubeconfig: {e}")
|
266
|
+
raise click.ClickException(
|
267
|
+
f"Failed to configure kubeconfig for GKE cluster: {e}"
|
268
|
+
)
|
269
|
+
|
270
|
+
try:
|
271
|
+
self._debug("Verifying kubeconfig configuration...")
|
272
|
+
self._verify_kubeconfig()
|
273
|
+
current_context = self._get_current_kubectl_context()
|
274
|
+
self.log.info(f"Cluster discovered: {current_context}", block_label="Setup")
|
275
|
+
except Exception as e: # noqa: BLE001
|
276
|
+
self.log.error(f"Failed to verify kubeconfig: {e}")
|
277
|
+
raise click.ClickException(f"Failed to verify kubeconfig: {e}")
|
278
|
+
|
279
|
+
return ClusterInfo(
|
280
|
+
context=current_context,
|
281
|
+
namespace=namespace,
|
282
|
+
provider="gcp",
|
283
|
+
region=region,
|
284
|
+
cluster_name=cluster_name,
|
285
|
+
project_id=project_id,
|
268
286
|
)
|
269
287
|
|
270
288
|
def _setup_infrastructure(
|
@@ -278,9 +296,7 @@ class KubernetesCloudSetupCommand:
|
|
278
296
|
if provider == "aws":
|
279
297
|
return self._setup_aws_infrastructure(region, name, cluster_info)
|
280
298
|
elif provider == "gcp":
|
281
|
-
|
282
|
-
"GCP support is not yet implemented. Please use AWS for now."
|
283
|
-
)
|
299
|
+
return self._setup_gcp_infrastructure(region, name, cluster_info)
|
284
300
|
else:
|
285
301
|
raise click.ClickException(f"Unsupported provider: {provider}")
|
286
302
|
|
@@ -502,6 +518,344 @@ class KubernetesCloudSetupCommand:
|
|
502
518
|
|
503
519
|
return json.dumps(template, indent=2)
|
504
520
|
|
521
|
+
def _setup_gcp_infrastructure( # noqa: PLR0912
|
522
|
+
self, region: str, name: str, cluster_info: ClusterInfo,
|
523
|
+
) -> InfrastructureResources:
|
524
|
+
"""Set up GCP infrastructure for Kubernetes using GCP Python SDK.
|
525
|
+
|
526
|
+
Note: Deployment Manager is deprecated so it is unused here.
|
527
|
+
Infrastructure Manager was tried but did not work well, so we rely
|
528
|
+
on the GCP Python SDK instead.
|
529
|
+
"""
|
530
|
+
try:
|
531
|
+
from anyscale.utils.gcp_utils import get_google_cloud_client_factory
|
532
|
+
except ImportError as e:
|
533
|
+
self.log.error(f"Failed to import required modules: {e}")
|
534
|
+
raise click.ClickException(f"Failed to import required modules: {e}")
|
535
|
+
|
536
|
+
try:
|
537
|
+
# Generate a unique cloud ID
|
538
|
+
cloud_id = f"k8s-{name}-{os.urandom(4).hex()}"
|
539
|
+
deployment_name = cloud_id.replace("_", "-").lower()
|
540
|
+
self._debug(f"Generated cloud ID: {cloud_id}")
|
541
|
+
self._debug(f"Infrastructure Manager deployment name: {deployment_name}")
|
542
|
+
except Exception as e: # noqa: BLE001
|
543
|
+
self.log.error(f"Failed to generate cloud ID: {e}")
|
544
|
+
raise click.ClickException(f"Failed to generate cloud ID: {e}")
|
545
|
+
|
546
|
+
try:
|
547
|
+
# Get Google Cloud client factory
|
548
|
+
factory = get_google_cloud_client_factory(self.log, cluster_info.project_id)
|
549
|
+
except Exception as e: # noqa: BLE001
|
550
|
+
self.log.error(f"Failed to initialize GCP client: {e}")
|
551
|
+
raise click.ClickException(f"Failed to initialize GCP client: {e}")
|
552
|
+
|
553
|
+
try:
|
554
|
+
with self.log.indent():
|
555
|
+
self.log.warning(
|
556
|
+
"NOTE: GCP resources (bucket and service account) created by this command are not managed by Anyscale.",
|
557
|
+
)
|
558
|
+
self.log.warning(
|
559
|
+
"You will need to manually delete these resources when the cloud is no longer needed.",
|
560
|
+
)
|
561
|
+
self.log.info(
|
562
|
+
"Creating GCP resources (bucket, service account, IAM bindings)...",
|
563
|
+
)
|
564
|
+
|
565
|
+
# Calculate resource names
|
566
|
+
# Service account name: anyscale-operator-<random 8 chars>
|
567
|
+
# Max length for GCP service account is 30 characters
|
568
|
+
random_suffix = os.urandom(4).hex() # 8 hex chars
|
569
|
+
anyscale_service_account_name = f"anyscale-operator-{random_suffix}"
|
570
|
+
bucket_name = f"anyscale-{cloud_id.replace('_', '-').lower()}"
|
571
|
+
|
572
|
+
# Create GCS bucket
|
573
|
+
self._debug(f"Creating GCS bucket: {bucket_name}")
|
574
|
+
storage_client = factory.storage.Client()
|
575
|
+
bucket = storage_client.bucket(bucket_name)
|
576
|
+
bucket.location = region
|
577
|
+
bucket.storage_class = "REGIONAL"
|
578
|
+
bucket.iam_configuration.uniform_bucket_level_access_enabled = True
|
579
|
+
bucket.iam_configuration.public_access_prevention = "enforced"
|
580
|
+
bucket.versioning_enabled = True
|
581
|
+
bucket.labels = {"anyscale-cloud-id": cloud_id.replace("-", "_")}
|
582
|
+
|
583
|
+
# Set CORS
|
584
|
+
# Use ANYSCALE_CORS_ORIGIN from shared config
|
585
|
+
# This respects the ANYSCALE_HOST environment variable
|
586
|
+
allowed_origin = ANYSCALE_CORS_ORIGIN
|
587
|
+
bucket.cors = [
|
588
|
+
{
|
589
|
+
"origin": [allowed_origin],
|
590
|
+
"responseHeader": ["*"],
|
591
|
+
"method": ["GET", "PUT", "POST", "HEAD", "DELETE"],
|
592
|
+
"maxAgeSeconds": 3600,
|
593
|
+
}
|
594
|
+
]
|
595
|
+
|
596
|
+
storage_client.create_bucket(bucket, location=region)
|
597
|
+
self.log.info(f"Created GCS bucket: {bucket_name}", block_label="Setup")
|
598
|
+
|
599
|
+
# Create service account
|
600
|
+
self._debug(
|
601
|
+
f"Creating service account: {anyscale_service_account_name}"
|
602
|
+
)
|
603
|
+
iam_client = factory.build("iam", "v1")
|
604
|
+
service_account_body = {
|
605
|
+
"accountId": anyscale_service_account_name,
|
606
|
+
"serviceAccount": {
|
607
|
+
"displayName": f"{cloud_id} Anyscale operator service account",
|
608
|
+
"description": "Service account for Anyscale Kubernetes operator",
|
609
|
+
},
|
610
|
+
}
|
611
|
+
|
612
|
+
service_account = (
|
613
|
+
iam_client.projects()
|
614
|
+
.serviceAccounts()
|
615
|
+
.create(
|
616
|
+
name=f"projects/{cluster_info.project_id}",
|
617
|
+
body=service_account_body,
|
618
|
+
)
|
619
|
+
.execute()
|
620
|
+
)
|
621
|
+
|
622
|
+
service_account_email = service_account["email"]
|
623
|
+
self.log.info(
|
624
|
+
f"Created service account: {service_account_email}",
|
625
|
+
block_label="Setup",
|
626
|
+
)
|
627
|
+
|
628
|
+
# Wait for service account to propagate through GCP systems
|
629
|
+
import time
|
630
|
+
|
631
|
+
self._debug("Waiting 10 seconds for service account to propagate...")
|
632
|
+
time.sleep(10)
|
633
|
+
|
634
|
+
# Grant Workload Identity binding
|
635
|
+
self._debug("Setting up Workload Identity binding")
|
636
|
+
|
637
|
+
# The K8s service account needs:
|
638
|
+
# 1. workloadIdentityUser role - to impersonate the GCP service account
|
639
|
+
# 2. serviceAccountTokenCreator - to generate tokens (for getOpenIdToken)
|
640
|
+
|
641
|
+
policy_body = {
|
642
|
+
"policy": {
|
643
|
+
"bindings": [
|
644
|
+
{
|
645
|
+
"role": "roles/iam.workloadIdentityUser",
|
646
|
+
"members": [
|
647
|
+
f"serviceAccount:{cluster_info.project_id}.svc.id.goog[{cluster_info.namespace}/anyscale-operator]"
|
648
|
+
],
|
649
|
+
},
|
650
|
+
{
|
651
|
+
"role": "roles/iam.serviceAccountTokenCreator",
|
652
|
+
"members": [f"serviceAccount:{service_account_email}"],
|
653
|
+
},
|
654
|
+
]
|
655
|
+
}
|
656
|
+
}
|
657
|
+
|
658
|
+
iam_client.projects().serviceAccounts().setIamPolicy(
|
659
|
+
resource=f"projects/{cluster_info.project_id}/serviceAccounts/{service_account_email}",
|
660
|
+
body=policy_body,
|
661
|
+
).execute()
|
662
|
+
|
663
|
+
self.log.info(
|
664
|
+
"Configured Workload Identity binding", block_label="Setup"
|
665
|
+
)
|
666
|
+
|
667
|
+
# Grant storage admin role to service account for the bucket
|
668
|
+
# Note: There's often a propagation delay after service account creation
|
669
|
+
# We need to retry with exponential backoff
|
670
|
+
self._debug("Granting storage permissions")
|
671
|
+
|
672
|
+
import time
|
673
|
+
|
674
|
+
max_retries = 5
|
675
|
+
retry_delay = 2 # Start with 2 seconds
|
676
|
+
|
677
|
+
for attempt in range(max_retries):
|
678
|
+
try:
|
679
|
+
bucket_policy = bucket.get_iam_policy(
|
680
|
+
requested_policy_version=3
|
681
|
+
)
|
682
|
+
bucket_policy.bindings.append(
|
683
|
+
{
|
684
|
+
"role": "roles/storage.admin",
|
685
|
+
"members": {f"serviceAccount:{service_account_email}"},
|
686
|
+
}
|
687
|
+
)
|
688
|
+
bucket.set_iam_policy(bucket_policy)
|
689
|
+
break # Success!
|
690
|
+
except Exception as e: # noqa: BLE001
|
691
|
+
if "does not exist" in str(e) and attempt < max_retries - 1:
|
692
|
+
self._debug(
|
693
|
+
f"Service account not yet propagated, retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})"
|
694
|
+
)
|
695
|
+
time.sleep(retry_delay)
|
696
|
+
retry_delay *= 2 # Exponential backoff
|
697
|
+
else:
|
698
|
+
raise # Re-raise if it's not a propagation issue or we're out of retries
|
699
|
+
|
700
|
+
self.log.info(
|
701
|
+
"Granted storage permissions to service account",
|
702
|
+
block_label="Setup",
|
703
|
+
)
|
704
|
+
|
705
|
+
self.log.info("GCP resources created successfully", block_label="Setup")
|
706
|
+
self.log.warning(
|
707
|
+
f"REMINDER: To clean up when no longer needed, delete GCS bucket '{bucket_name}' and service account '{service_account_email}'"
|
708
|
+
)
|
709
|
+
except Exception as e: # noqa: BLE001
|
710
|
+
self.log.error(f"Failed to create GCP resources: {e}")
|
711
|
+
raise click.ClickException(f"Failed to create GCP resources: {e}")
|
712
|
+
|
713
|
+
# Resources were created in the try block above
|
714
|
+
# bucket_name and service_account_email are already set
|
715
|
+
self._debug(f"GCS Bucket: {bucket_name}")
|
716
|
+
self._debug(f"Service Account Email: {service_account_email}")
|
717
|
+
|
718
|
+
return InfrastructureResources(
|
719
|
+
bucket_name=bucket_name,
|
720
|
+
iam_role_arn=service_account_email, # For GCP, we use service account email
|
721
|
+
region=region,
|
722
|
+
project_id=cluster_info.project_id,
|
723
|
+
)
|
724
|
+
|
725
|
+
def _get_gke_cluster_info(
|
726
|
+
self, cluster_name: str, region: str, project_id: str
|
727
|
+
) -> Dict[str, Any]:
|
728
|
+
"""Get GKE cluster information using gcloud CLI."""
|
729
|
+
try:
|
730
|
+
# Try regional cluster first
|
731
|
+
result = subprocess.run(
|
732
|
+
[
|
733
|
+
"gcloud",
|
734
|
+
"container",
|
735
|
+
"clusters",
|
736
|
+
"describe",
|
737
|
+
cluster_name,
|
738
|
+
f"--region={region}",
|
739
|
+
f"--project={project_id}",
|
740
|
+
"--format=json",
|
741
|
+
],
|
742
|
+
capture_output=True,
|
743
|
+
text=True,
|
744
|
+
check=False,
|
745
|
+
)
|
746
|
+
if result.returncode == 0:
|
747
|
+
return json.loads(result.stdout)
|
748
|
+
|
749
|
+
# Try zonal cluster
|
750
|
+
# Assuming zone 'a' if regional fails
|
751
|
+
zone = f"{region}-a"
|
752
|
+
result = subprocess.run(
|
753
|
+
[
|
754
|
+
"gcloud",
|
755
|
+
"container",
|
756
|
+
"clusters",
|
757
|
+
"describe",
|
758
|
+
cluster_name,
|
759
|
+
f"--zone={zone}",
|
760
|
+
f"--project={project_id}",
|
761
|
+
"--format=json",
|
762
|
+
],
|
763
|
+
capture_output=True,
|
764
|
+
text=True,
|
765
|
+
check=True,
|
766
|
+
)
|
767
|
+
return json.loads(result.stdout)
|
768
|
+
except subprocess.CalledProcessError as e:
|
769
|
+
raise click.ClickException(f"Failed to get GKE cluster info: {e.stderr}")
|
770
|
+
except json.JSONDecodeError as e:
|
771
|
+
raise click.ClickException(f"Failed to parse GKE cluster info: {e}")
|
772
|
+
|
773
|
+
def _get_gke_zones(
|
774
|
+
self, cluster_name: str, region: str, project_id: str
|
775
|
+
) -> List[str]:
|
776
|
+
"""Get zones where the GKE cluster's node pools are located."""
|
777
|
+
try:
|
778
|
+
cluster_info = self._get_gke_cluster_info(cluster_name, region, project_id)
|
779
|
+
|
780
|
+
# Extract zones from node pools
|
781
|
+
zones = []
|
782
|
+
node_pools = cluster_info.get("nodePools", [])
|
783
|
+
|
784
|
+
for pool in node_pools:
|
785
|
+
# For zonal clusters, each node pool has locations
|
786
|
+
pool_locations = pool.get("locations", [])
|
787
|
+
zones.extend(pool_locations)
|
788
|
+
|
789
|
+
# If no zones found from node pools, try cluster-level locations
|
790
|
+
if not zones:
|
791
|
+
cluster_locations = cluster_info.get("locations", [])
|
792
|
+
if cluster_locations:
|
793
|
+
zones = cluster_locations
|
794
|
+
|
795
|
+
# Remove duplicates and sort
|
796
|
+
if zones:
|
797
|
+
unique_zones = sorted(set(zones))
|
798
|
+
self._debug(f"Discovered zones: {', '.join(unique_zones)}")
|
799
|
+
return unique_zones
|
800
|
+
else:
|
801
|
+
# Fallback to default zones
|
802
|
+
self._debug(
|
803
|
+
"No zones found in cluster info, falling back to default zones"
|
804
|
+
)
|
805
|
+
return [region + "-a", region + "-b", region + "-c"]
|
806
|
+
|
807
|
+
except Exception as e: # noqa: BLE001
|
808
|
+
self._debug(f"Failed to get zones: {e}, using default zones")
|
809
|
+
return [region + "-a", region + "-b", region + "-c"]
|
810
|
+
|
811
|
+
def _configure_gcp_kubeconfig(
|
812
|
+
self, cluster_name: str, region: str, project_id: str
|
813
|
+
) -> None:
|
814
|
+
"""Configure kubeconfig for GCP GKE cluster."""
|
815
|
+
self.log.info(f"Configuring kubeconfig for GKE cluster: {cluster_name}")
|
816
|
+
|
817
|
+
try:
|
818
|
+
# Try regional cluster first
|
819
|
+
result = subprocess.run(
|
820
|
+
[
|
821
|
+
"gcloud",
|
822
|
+
"container",
|
823
|
+
"clusters",
|
824
|
+
"get-credentials",
|
825
|
+
cluster_name,
|
826
|
+
f"--region={region}",
|
827
|
+
f"--project={project_id}",
|
828
|
+
],
|
829
|
+
capture_output=True,
|
830
|
+
text=True,
|
831
|
+
check=False,
|
832
|
+
)
|
833
|
+
if result.returncode == 0:
|
834
|
+
self.log.info("GKE kubeconfig configured successfully")
|
835
|
+
return
|
836
|
+
|
837
|
+
# Try zonal cluster
|
838
|
+
zone = f"{region}-a"
|
839
|
+
subprocess.run(
|
840
|
+
[
|
841
|
+
"gcloud",
|
842
|
+
"container",
|
843
|
+
"clusters",
|
844
|
+
"get-credentials",
|
845
|
+
cluster_name,
|
846
|
+
f"--zone={zone}",
|
847
|
+
f"--project={project_id}",
|
848
|
+
],
|
849
|
+
capture_output=True,
|
850
|
+
text=True,
|
851
|
+
check=True,
|
852
|
+
)
|
853
|
+
self.log.info("GKE kubeconfig configured successfully")
|
854
|
+
except subprocess.CalledProcessError as e:
|
855
|
+
raise click.ClickException(
|
856
|
+
f"Failed to configure GKE kubeconfig: {e.stderr}"
|
857
|
+
)
|
858
|
+
|
505
859
|
def _get_eks_cluster_info(self, cluster_name: str, region: str) -> Dict[str, Any]:
|
506
860
|
"""Get EKS cluster information using AWS CLI."""
|
507
861
|
try:
|
@@ -698,10 +1052,32 @@ class KubernetesCloudSetupCommand:
|
|
698
1052
|
zones=zones,
|
699
1053
|
),
|
700
1054
|
)
|
701
|
-
|
702
|
-
|
703
|
-
|
1055
|
+
elif provider == "gcp":
|
1056
|
+
assert infrastructure.project_id, "Project ID is required for GCP"
|
1057
|
+
|
1058
|
+
from anyscale.client.openapi_client.models import GCPConfig
|
1059
|
+
|
1060
|
+
# Dynamically determine zones from the GKE cluster
|
1061
|
+
zones = self._get_gke_zones(
|
1062
|
+
cluster_info.cluster_name, region, infrastructure.project_id
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
cloud_deployment = CloudDeployment(
|
1066
|
+
name=name,
|
1067
|
+
provider=CloudProviders.GCP,
|
1068
|
+
region=region,
|
1069
|
+
compute_stack=ComputeStack.K8S,
|
1070
|
+
object_storage=ObjectStorage(
|
1071
|
+
bucket_name=infrastructure.bucket_name, region=region
|
1072
|
+
),
|
1073
|
+
gcp_config=GCPConfig(project_id=infrastructure.project_id,),
|
1074
|
+
kubernetes_config=OpenAPIKubernetesConfig(
|
1075
|
+
anyscale_operator_iam_identity=infrastructure.iam_role_arn,
|
1076
|
+
zones=zones,
|
1077
|
+
),
|
704
1078
|
)
|
1079
|
+
else:
|
1080
|
+
raise click.ClickException(f"Unsupported provider: {provider}")
|
705
1081
|
|
706
1082
|
# Register the cloud
|
707
1083
|
try:
|
@@ -735,10 +1111,18 @@ class KubernetesCloudSetupCommand:
|
|
735
1111
|
skip_verifications=True,
|
736
1112
|
auto_add_user=True,
|
737
1113
|
)
|
738
|
-
|
739
|
-
|
740
|
-
|
1114
|
+
elif provider == "gcp":
|
1115
|
+
self.log.info("Calling register_gcp_cloud...")
|
1116
|
+
self.cloud_controller.register_gcp_cloud(
|
1117
|
+
name=name,
|
1118
|
+
cloud_resource=cloud_deployment,
|
1119
|
+
functional_verify=None,
|
1120
|
+
yes=True,
|
1121
|
+
skip_verifications=True,
|
1122
|
+
auto_add_user=True,
|
741
1123
|
)
|
1124
|
+
else:
|
1125
|
+
raise click.ClickException(f"Unsupported provider: {provider}")
|
742
1126
|
finally:
|
743
1127
|
# Restore the original log.info method
|
744
1128
|
self.cloud_controller.log.info = original_log_info
|
@@ -817,6 +1201,10 @@ class KubernetesCloudSetupCommand:
|
|
817
1201
|
additional_values=set_string_values,
|
818
1202
|
)
|
819
1203
|
|
1204
|
+
# Add Helm repo before installing
|
1205
|
+
self._debug("Adding Anyscale Helm repository...")
|
1206
|
+
self._add_helm_repo()
|
1207
|
+
|
820
1208
|
# Build a simple Helm command that only uses the values file
|
821
1209
|
self._debug("Generating Helm command...")
|
822
1210
|
helm_command = (
|
@@ -830,6 +1218,41 @@ class KubernetesCloudSetupCommand:
|
|
830
1218
|
|
831
1219
|
self._execute_helm_command(helm_command)
|
832
1220
|
|
1221
|
+
def _add_helm_repo(self) -> None:
|
1222
|
+
"""Add and update the Anyscale Helm repository."""
|
1223
|
+
try:
|
1224
|
+
# Add the Anyscale Helm repository
|
1225
|
+
self.log.info("Adding Anyscale Helm repository...", block_label="Setup")
|
1226
|
+
subprocess.run(
|
1227
|
+
[
|
1228
|
+
"helm",
|
1229
|
+
"repo",
|
1230
|
+
"add",
|
1231
|
+
"anyscale",
|
1232
|
+
"https://anyscale.github.io/helm-charts",
|
1233
|
+
],
|
1234
|
+
capture_output=True,
|
1235
|
+
text=True,
|
1236
|
+
check=False, # Don't fail if repo already exists
|
1237
|
+
)
|
1238
|
+
|
1239
|
+
# Update the Helm repository
|
1240
|
+
self.log.info("Updating Helm repositories...", block_label="Setup")
|
1241
|
+
subprocess.run(
|
1242
|
+
["helm", "repo", "update", "anyscale"],
|
1243
|
+
capture_output=True,
|
1244
|
+
text=True,
|
1245
|
+
check=True,
|
1246
|
+
)
|
1247
|
+
self.log.info(
|
1248
|
+
"Helm repository configured successfully", block_label="Setup"
|
1249
|
+
)
|
1250
|
+
except subprocess.CalledProcessError as e:
|
1251
|
+
self.log.error(f"Failed to configure Helm repository: {e.stderr}")
|
1252
|
+
raise click.ClickException(
|
1253
|
+
f"Failed to configure Helm repository: {e.stderr}"
|
1254
|
+
)
|
1255
|
+
|
833
1256
|
def _extract_set_string_values(self, helm_command: str) -> Dict[str, str]:
|
834
1257
|
"""
|
835
1258
|
Extract all --set-string key=value pairs from a Helm command.
|
@@ -902,10 +1325,12 @@ class KubernetesCloudSetupCommand:
|
|
902
1325
|
|
903
1326
|
# Create values dictionary starting with base values
|
904
1327
|
values: Dict[str, Any] = {
|
905
|
-
"
|
906
|
-
|
907
|
-
|
908
|
-
|
1328
|
+
"global": {
|
1329
|
+
"cloudDeploymentId": cloud_deployment_id,
|
1330
|
+
"cloudProvider": provider,
|
1331
|
+
"region": region,
|
1332
|
+
"auth": {"iamIdentity": infrastructure.iam_role_arn,},
|
1333
|
+
},
|
909
1334
|
"ingress-nginx": {"enabled": True},
|
910
1335
|
}
|
911
1336
|
|
@@ -917,7 +1342,7 @@ class KubernetesCloudSetupCommand:
|
|
917
1342
|
# Add control plane URL from ANYSCALE_HOST environment variable
|
918
1343
|
if ANYSCALE_HOST:
|
919
1344
|
values["controlPlaneURL"] = ANYSCALE_HOST
|
920
|
-
self.
|
1345
|
+
self.log.info(f"Using control plane URL: {ANYSCALE_HOST}")
|
921
1346
|
|
922
1347
|
if custom_path:
|
923
1348
|
values_file_path = custom_path
|
@@ -983,11 +1408,6 @@ class KubernetesCloudSetupCommand:
|
|
983
1408
|
operator_namespace=namespace,
|
984
1409
|
)
|
985
1410
|
|
986
|
-
# Sleep to avoid race condition where operator has not loaded its IAM identity
|
987
|
-
import time
|
988
|
-
|
989
|
-
time.sleep(5)
|
990
|
-
|
991
1411
|
# Run verification
|
992
1412
|
success = verifier.verify(cloud_deployment)
|
993
1413
|
|
@@ -4193,46 +4193,46 @@ class CloudController(BaseController):
|
|
4193
4193
|
"""
|
4194
4194
|
command_parts = [
|
4195
4195
|
"helm upgrade <release-name> anyscale/anyscale-operator",
|
4196
|
-
f" --set-string cloudDeploymentId={cloud_deployment_id}",
|
4197
|
-
f" --set-string cloudProvider={provider}",
|
4196
|
+
f" --set-string global.cloudDeploymentId={cloud_deployment_id}",
|
4197
|
+
f" --set-string global.cloudProvider={provider}",
|
4198
4198
|
]
|
4199
4199
|
|
4200
4200
|
# Add region for most providers (not for generic)
|
4201
4201
|
if region and provider != "generic":
|
4202
|
-
command_parts.append(f" --set-string region={region}")
|
4202
|
+
command_parts.append(f" --set-string global.region={region}")
|
4203
4203
|
|
4204
4204
|
# Add provider-specific parameters
|
4205
4205
|
if provider == "gcp" and operator_iam_identity:
|
4206
4206
|
command_parts.append(
|
4207
|
-
f" --set-string
|
4207
|
+
f" --set-string global.auth.iamIdentity={operator_iam_identity}"
|
4208
4208
|
)
|
4209
4209
|
elif provider == "azure":
|
4210
4210
|
if operator_iam_identity:
|
4211
4211
|
command_parts.append(
|
4212
|
-
f" --set-string
|
4212
|
+
f" --set-string global.auth.iamIdentity={operator_iam_identity}"
|
4213
4213
|
)
|
4214
4214
|
if anyscale_cli_token:
|
4215
4215
|
command_parts.append(
|
4216
|
-
f" --set-string anyscaleCliToken={anyscale_cli_token}"
|
4216
|
+
f" --set-string global.auth.anyscaleCliToken={anyscale_cli_token}"
|
4217
4217
|
)
|
4218
4218
|
else:
|
4219
4219
|
command_parts.append(
|
4220
|
-
" --set-string anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
|
4220
|
+
" --set-string global.auth.anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
|
4221
4221
|
)
|
4222
4222
|
elif provider == "generic":
|
4223
4223
|
if anyscale_cli_token:
|
4224
4224
|
command_parts.append(
|
4225
|
-
f" --set-string anyscaleCliToken={anyscale_cli_token}"
|
4225
|
+
f" --set-string global.auth.anyscaleCliToken={anyscale_cli_token}"
|
4226
4226
|
)
|
4227
4227
|
else:
|
4228
4228
|
command_parts.append(
|
4229
|
-
" --set-string anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
|
4229
|
+
" --set-string global.auth.anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
|
4230
4230
|
)
|
4231
4231
|
|
4232
4232
|
# Add common parameters
|
4233
4233
|
command_parts.extend(
|
4234
4234
|
[
|
4235
|
-
" --set-string
|
4235
|
+
" --set-string workloads.serviceAccount.name=anyscale-operator",
|
4236
4236
|
" --namespace <namespace>",
|
4237
4237
|
" --create-namespace",
|
4238
4238
|
" --wait",
|