anyscale 0.26.67__py3-none-any.whl → 0.26.68__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. anyscale/client/README.md +20 -0
  2. anyscale/client/openapi_client/__init__.py +15 -0
  3. anyscale/client/openapi_client/api/default_api.py +656 -0
  4. anyscale/client/openapi_client/models/__init__.py +15 -0
  5. anyscale/client/openapi_client/models/lineage_artifact.py +383 -0
  6. anyscale/client/openapi_client/models/lineage_artifact_sort_field.py +101 -0
  7. anyscale/client/openapi_client/models/lineage_artifact_type.py +100 -0
  8. anyscale/client/openapi_client/models/lineage_direction.py +101 -0
  9. anyscale/client/openapi_client/models/lineage_graph.py +179 -0
  10. anyscale/client/openapi_client/models/lineage_graph_node.py +439 -0
  11. anyscale/client/openapi_client/models/lineage_node_type.py +100 -0
  12. anyscale/client/openapi_client/models/lineage_workload.py +355 -0
  13. anyscale/client/openapi_client/models/lineage_workload_sort_field.py +101 -0
  14. anyscale/client/openapi_client/models/lineage_workload_type.py +101 -0
  15. anyscale/client/openapi_client/models/lineageartifact_list_response.py +147 -0
  16. anyscale/client/openapi_client/models/lineageartifact_response.py +121 -0
  17. anyscale/client/openapi_client/models/lineagegraph_response.py +121 -0
  18. anyscale/client/openapi_client/models/lineageworkload_list_response.py +147 -0
  19. anyscale/client/openapi_client/models/lineageworkload_response.py +121 -0
  20. anyscale/commands/setup_k8s.py +460 -40
  21. anyscale/controllers/cloud_controller.py +10 -10
  22. anyscale/controllers/kubernetes_verifier.py +57 -11
  23. anyscale/version.py +1 -1
  24. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/METADATA +1 -1
  25. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/RECORD +30 -15
  26. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/WHEEL +0 -0
  27. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/entry_points.txt +0 -0
  28. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/licenses/LICENSE +0 -0
  29. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/licenses/NOTICE +0 -0
  30. {anyscale-0.26.67.dist-info → anyscale-0.26.68.dist-info}/top_level.txt +0 -0
@@ -42,11 +42,7 @@ class ClusterInfo:
42
42
  region: str
43
43
  cluster_name: str
44
44
  project_id: Optional[str] = None
45
- cluster_arn: Optional[str] = None
46
45
  oidc_provider: Optional[str] = None
47
- cluster_location: Optional[str] = None
48
- workload_identity_pool: Optional[str] = None
49
- cluster_version: Optional[str] = None
50
46
 
51
47
 
52
48
  @dataclass
@@ -194,7 +190,7 @@ class KubernetesCloudSetupCommand:
194
190
  namespace: str,
195
191
  provider: str,
196
192
  region: str,
197
- project_id: Optional[str], # noqa: ARG002
193
+ project_id: Optional[str],
198
194
  ) -> ClusterInfo:
199
195
  """Discover and validate the target Kubernetes cluster using cloud provider APIs."""
200
196
  self.log.info(
@@ -205,8 +201,12 @@ class KubernetesCloudSetupCommand:
205
201
  if provider == "aws":
206
202
  return self._discover_aws_cluster(cluster_name, namespace, region)
207
203
  elif provider == "gcp":
208
- raise click.ClickException(
209
- "GCP support is not yet implemented. Please use AWS for now."
204
+ if not project_id:
205
+ raise click.ClickException(
206
+ "GCP project ID is required. Please provide --project-id"
207
+ )
208
+ return self._discover_gcp_cluster(
209
+ cluster_name, namespace, region, project_id
210
210
  )
211
211
  else:
212
212
  raise click.ClickException(f"Unsupported provider: {provider}")
@@ -215,18 +215,6 @@ class KubernetesCloudSetupCommand:
215
215
  self, cluster_name: str, namespace: str, region: str
216
216
  ) -> ClusterInfo:
217
217
  """Discover AWS EKS cluster details and configure kubeconfig."""
218
- try:
219
- self._debug(f"Fetching EKS cluster info for {cluster_name} in {region}...")
220
- cluster_info = self._get_eks_cluster_info(cluster_name, region)
221
- self._debug(f"EKS Cluster ARN: {cluster_info.get('arn', 'Unknown')}")
222
- self._debug(
223
- f"EKS Cluster Version: {cluster_info.get('version', 'Unknown')}"
224
- )
225
- except Exception as e: # noqa: BLE001
226
- self.log.error(f"Failed to get EKS cluster info: {e}")
227
- raise click.ClickException(
228
- f"Failed to discover EKS cluster {cluster_name}: {e}"
229
- )
230
218
 
231
219
  try:
232
220
  self._debug("Fetching OIDC provider information...")
@@ -262,9 +250,39 @@ class KubernetesCloudSetupCommand:
262
250
  provider="aws",
263
251
  region=region,
264
252
  cluster_name=cluster_name,
265
- cluster_arn=cluster_info.get("arn"),
266
253
  oidc_provider=oidc_provider,
267
- cluster_version=cluster_info.get("version"),
254
+ )
255
+
256
+ def _discover_gcp_cluster(
257
+ self, cluster_name: str, namespace: str, region: str, project_id: str
258
+ ) -> ClusterInfo:
259
+ """Discover GCP GKE cluster details and configure kubeconfig."""
260
+
261
+ try:
262
+ self._debug("Configuring kubeconfig for GKE cluster...")
263
+ self._configure_gcp_kubeconfig(cluster_name, region, project_id)
264
+ except Exception as e: # noqa: BLE001
265
+ self.log.error(f"Failed to configure kubeconfig: {e}")
266
+ raise click.ClickException(
267
+ f"Failed to configure kubeconfig for GKE cluster: {e}"
268
+ )
269
+
270
+ try:
271
+ self._debug("Verifying kubeconfig configuration...")
272
+ self._verify_kubeconfig()
273
+ current_context = self._get_current_kubectl_context()
274
+ self.log.info(f"Cluster discovered: {current_context}", block_label="Setup")
275
+ except Exception as e: # noqa: BLE001
276
+ self.log.error(f"Failed to verify kubeconfig: {e}")
277
+ raise click.ClickException(f"Failed to verify kubeconfig: {e}")
278
+
279
+ return ClusterInfo(
280
+ context=current_context,
281
+ namespace=namespace,
282
+ provider="gcp",
283
+ region=region,
284
+ cluster_name=cluster_name,
285
+ project_id=project_id,
268
286
  )
269
287
 
270
288
  def _setup_infrastructure(
@@ -278,9 +296,7 @@ class KubernetesCloudSetupCommand:
278
296
  if provider == "aws":
279
297
  return self._setup_aws_infrastructure(region, name, cluster_info)
280
298
  elif provider == "gcp":
281
- raise click.ClickException(
282
- "GCP support is not yet implemented. Please use AWS for now."
283
- )
299
+ return self._setup_gcp_infrastructure(region, name, cluster_info)
284
300
  else:
285
301
  raise click.ClickException(f"Unsupported provider: {provider}")
286
302
 
@@ -502,6 +518,344 @@ class KubernetesCloudSetupCommand:
502
518
 
503
519
  return json.dumps(template, indent=2)
504
520
 
521
+ def _setup_gcp_infrastructure( # noqa: PLR0912
522
+ self, region: str, name: str, cluster_info: ClusterInfo,
523
+ ) -> InfrastructureResources:
524
+ """Set up GCP infrastructure for Kubernetes using GCP Python SDK.
525
+
526
+ Note: Deployment Manager is deprecated so it is unused here.
527
+ Infrastructure Manager was tried but did not work well, so we rely
528
+ on the GCP Python SDK instead.
529
+ """
530
+ try:
531
+ from anyscale.utils.gcp_utils import get_google_cloud_client_factory
532
+ except ImportError as e:
533
+ self.log.error(f"Failed to import required modules: {e}")
534
+ raise click.ClickException(f"Failed to import required modules: {e}")
535
+
536
+ try:
537
+ # Generate a unique cloud ID
538
+ cloud_id = f"k8s-{name}-{os.urandom(4).hex()}"
539
+ deployment_name = cloud_id.replace("_", "-").lower()
540
+ self._debug(f"Generated cloud ID: {cloud_id}")
541
+ self._debug(f"Infrastructure Manager deployment name: {deployment_name}")
542
+ except Exception as e: # noqa: BLE001
543
+ self.log.error(f"Failed to generate cloud ID: {e}")
544
+ raise click.ClickException(f"Failed to generate cloud ID: {e}")
545
+
546
+ try:
547
+ # Get Google Cloud client factory
548
+ factory = get_google_cloud_client_factory(self.log, cluster_info.project_id)
549
+ except Exception as e: # noqa: BLE001
550
+ self.log.error(f"Failed to initialize GCP client: {e}")
551
+ raise click.ClickException(f"Failed to initialize GCP client: {e}")
552
+
553
+ try:
554
+ with self.log.indent():
555
+ self.log.warning(
556
+ "NOTE: GCP resources (bucket and service account) created by this command are not managed by Anyscale.",
557
+ )
558
+ self.log.warning(
559
+ "You will need to manually delete these resources when the cloud is no longer needed.",
560
+ )
561
+ self.log.info(
562
+ "Creating GCP resources (bucket, service account, IAM bindings)...",
563
+ )
564
+
565
+ # Calculate resource names
566
+ # Service account name: anyscale-operator-<random 8 chars>
567
+ # Max length for GCP service account is 30 characters
568
+ random_suffix = os.urandom(4).hex() # 8 hex chars
569
+ anyscale_service_account_name = f"anyscale-operator-{random_suffix}"
570
+ bucket_name = f"anyscale-{cloud_id.replace('_', '-').lower()}"
571
+
572
+ # Create GCS bucket
573
+ self._debug(f"Creating GCS bucket: {bucket_name}")
574
+ storage_client = factory.storage.Client()
575
+ bucket = storage_client.bucket(bucket_name)
576
+ bucket.location = region
577
+ bucket.storage_class = "REGIONAL"
578
+ bucket.iam_configuration.uniform_bucket_level_access_enabled = True
579
+ bucket.iam_configuration.public_access_prevention = "enforced"
580
+ bucket.versioning_enabled = True
581
+ bucket.labels = {"anyscale-cloud-id": cloud_id.replace("-", "_")}
582
+
583
+ # Set CORS
584
+ # Use ANYSCALE_CORS_ORIGIN from shared config
585
+ # This respects the ANYSCALE_HOST environment variable
586
+ allowed_origin = ANYSCALE_CORS_ORIGIN
587
+ bucket.cors = [
588
+ {
589
+ "origin": [allowed_origin],
590
+ "responseHeader": ["*"],
591
+ "method": ["GET", "PUT", "POST", "HEAD", "DELETE"],
592
+ "maxAgeSeconds": 3600,
593
+ }
594
+ ]
595
+
596
+ storage_client.create_bucket(bucket, location=region)
597
+ self.log.info(f"Created GCS bucket: {bucket_name}", block_label="Setup")
598
+
599
+ # Create service account
600
+ self._debug(
601
+ f"Creating service account: {anyscale_service_account_name}"
602
+ )
603
+ iam_client = factory.build("iam", "v1")
604
+ service_account_body = {
605
+ "accountId": anyscale_service_account_name,
606
+ "serviceAccount": {
607
+ "displayName": f"{cloud_id} Anyscale operator service account",
608
+ "description": "Service account for Anyscale Kubernetes operator",
609
+ },
610
+ }
611
+
612
+ service_account = (
613
+ iam_client.projects()
614
+ .serviceAccounts()
615
+ .create(
616
+ name=f"projects/{cluster_info.project_id}",
617
+ body=service_account_body,
618
+ )
619
+ .execute()
620
+ )
621
+
622
+ service_account_email = service_account["email"]
623
+ self.log.info(
624
+ f"Created service account: {service_account_email}",
625
+ block_label="Setup",
626
+ )
627
+
628
+ # Wait for service account to propagate through GCP systems
629
+ import time
630
+
631
+ self._debug("Waiting 10 seconds for service account to propagate...")
632
+ time.sleep(10)
633
+
634
+ # Grant Workload Identity binding
635
+ self._debug("Setting up Workload Identity binding")
636
+
637
+ # The K8s service account needs:
638
+ # 1. workloadIdentityUser role - to impersonate the GCP service account
639
+ # 2. serviceAccountTokenCreator - to generate tokens (for getOpenIdToken)
640
+
641
+ policy_body = {
642
+ "policy": {
643
+ "bindings": [
644
+ {
645
+ "role": "roles/iam.workloadIdentityUser",
646
+ "members": [
647
+ f"serviceAccount:{cluster_info.project_id}.svc.id.goog[{cluster_info.namespace}/anyscale-operator]"
648
+ ],
649
+ },
650
+ {
651
+ "role": "roles/iam.serviceAccountTokenCreator",
652
+ "members": [f"serviceAccount:{service_account_email}"],
653
+ },
654
+ ]
655
+ }
656
+ }
657
+
658
+ iam_client.projects().serviceAccounts().setIamPolicy(
659
+ resource=f"projects/{cluster_info.project_id}/serviceAccounts/{service_account_email}",
660
+ body=policy_body,
661
+ ).execute()
662
+
663
+ self.log.info(
664
+ "Configured Workload Identity binding", block_label="Setup"
665
+ )
666
+
667
+ # Grant storage admin role to service account for the bucket
668
+ # Note: There's often a propagation delay after service account creation
669
+ # We need to retry with exponential backoff
670
+ self._debug("Granting storage permissions")
671
+
672
+ import time
673
+
674
+ max_retries = 5
675
+ retry_delay = 2 # Start with 2 seconds
676
+
677
+ for attempt in range(max_retries):
678
+ try:
679
+ bucket_policy = bucket.get_iam_policy(
680
+ requested_policy_version=3
681
+ )
682
+ bucket_policy.bindings.append(
683
+ {
684
+ "role": "roles/storage.admin",
685
+ "members": {f"serviceAccount:{service_account_email}"},
686
+ }
687
+ )
688
+ bucket.set_iam_policy(bucket_policy)
689
+ break # Success!
690
+ except Exception as e: # noqa: BLE001
691
+ if "does not exist" in str(e) and attempt < max_retries - 1:
692
+ self._debug(
693
+ f"Service account not yet propagated, retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})"
694
+ )
695
+ time.sleep(retry_delay)
696
+ retry_delay *= 2 # Exponential backoff
697
+ else:
698
+ raise # Re-raise if it's not a propagation issue or we're out of retries
699
+
700
+ self.log.info(
701
+ "Granted storage permissions to service account",
702
+ block_label="Setup",
703
+ )
704
+
705
+ self.log.info("GCP resources created successfully", block_label="Setup")
706
+ self.log.warning(
707
+ f"REMINDER: To clean up when no longer needed, delete GCS bucket '{bucket_name}' and service account '{service_account_email}'"
708
+ )
709
+ except Exception as e: # noqa: BLE001
710
+ self.log.error(f"Failed to create GCP resources: {e}")
711
+ raise click.ClickException(f"Failed to create GCP resources: {e}")
712
+
713
+ # Resources were created in the try block above
714
+ # bucket_name and service_account_email are already set
715
+ self._debug(f"GCS Bucket: {bucket_name}")
716
+ self._debug(f"Service Account Email: {service_account_email}")
717
+
718
+ return InfrastructureResources(
719
+ bucket_name=bucket_name,
720
+ iam_role_arn=service_account_email, # For GCP, we use service account email
721
+ region=region,
722
+ project_id=cluster_info.project_id,
723
+ )
724
+
725
+ def _get_gke_cluster_info(
726
+ self, cluster_name: str, region: str, project_id: str
727
+ ) -> Dict[str, Any]:
728
+ """Get GKE cluster information using gcloud CLI."""
729
+ try:
730
+ # Try regional cluster first
731
+ result = subprocess.run(
732
+ [
733
+ "gcloud",
734
+ "container",
735
+ "clusters",
736
+ "describe",
737
+ cluster_name,
738
+ f"--region={region}",
739
+ f"--project={project_id}",
740
+ "--format=json",
741
+ ],
742
+ capture_output=True,
743
+ text=True,
744
+ check=False,
745
+ )
746
+ if result.returncode == 0:
747
+ return json.loads(result.stdout)
748
+
749
+ # Try zonal cluster
750
+ # Assuming zone 'a' if regional fails
751
+ zone = f"{region}-a"
752
+ result = subprocess.run(
753
+ [
754
+ "gcloud",
755
+ "container",
756
+ "clusters",
757
+ "describe",
758
+ cluster_name,
759
+ f"--zone={zone}",
760
+ f"--project={project_id}",
761
+ "--format=json",
762
+ ],
763
+ capture_output=True,
764
+ text=True,
765
+ check=True,
766
+ )
767
+ return json.loads(result.stdout)
768
+ except subprocess.CalledProcessError as e:
769
+ raise click.ClickException(f"Failed to get GKE cluster info: {e.stderr}")
770
+ except json.JSONDecodeError as e:
771
+ raise click.ClickException(f"Failed to parse GKE cluster info: {e}")
772
+
773
+ def _get_gke_zones(
774
+ self, cluster_name: str, region: str, project_id: str
775
+ ) -> List[str]:
776
+ """Get zones where the GKE cluster's node pools are located."""
777
+ try:
778
+ cluster_info = self._get_gke_cluster_info(cluster_name, region, project_id)
779
+
780
+ # Extract zones from node pools
781
+ zones = []
782
+ node_pools = cluster_info.get("nodePools", [])
783
+
784
+ for pool in node_pools:
785
+ # For zonal clusters, each node pool has locations
786
+ pool_locations = pool.get("locations", [])
787
+ zones.extend(pool_locations)
788
+
789
+ # If no zones found from node pools, try cluster-level locations
790
+ if not zones:
791
+ cluster_locations = cluster_info.get("locations", [])
792
+ if cluster_locations:
793
+ zones = cluster_locations
794
+
795
+ # Remove duplicates and sort
796
+ if zones:
797
+ unique_zones = sorted(set(zones))
798
+ self._debug(f"Discovered zones: {', '.join(unique_zones)}")
799
+ return unique_zones
800
+ else:
801
+ # Fallback to default zones
802
+ self._debug(
803
+ "No zones found in cluster info, falling back to default zones"
804
+ )
805
+ return [region + "-a", region + "-b", region + "-c"]
806
+
807
+ except Exception as e: # noqa: BLE001
808
+ self._debug(f"Failed to get zones: {e}, using default zones")
809
+ return [region + "-a", region + "-b", region + "-c"]
810
+
811
+ def _configure_gcp_kubeconfig(
812
+ self, cluster_name: str, region: str, project_id: str
813
+ ) -> None:
814
+ """Configure kubeconfig for GCP GKE cluster."""
815
+ self.log.info(f"Configuring kubeconfig for GKE cluster: {cluster_name}")
816
+
817
+ try:
818
+ # Try regional cluster first
819
+ result = subprocess.run(
820
+ [
821
+ "gcloud",
822
+ "container",
823
+ "clusters",
824
+ "get-credentials",
825
+ cluster_name,
826
+ f"--region={region}",
827
+ f"--project={project_id}",
828
+ ],
829
+ capture_output=True,
830
+ text=True,
831
+ check=False,
832
+ )
833
+ if result.returncode == 0:
834
+ self.log.info("GKE kubeconfig configured successfully")
835
+ return
836
+
837
+ # Try zonal cluster
838
+ zone = f"{region}-a"
839
+ subprocess.run(
840
+ [
841
+ "gcloud",
842
+ "container",
843
+ "clusters",
844
+ "get-credentials",
845
+ cluster_name,
846
+ f"--zone={zone}",
847
+ f"--project={project_id}",
848
+ ],
849
+ capture_output=True,
850
+ text=True,
851
+ check=True,
852
+ )
853
+ self.log.info("GKE kubeconfig configured successfully")
854
+ except subprocess.CalledProcessError as e:
855
+ raise click.ClickException(
856
+ f"Failed to configure GKE kubeconfig: {e.stderr}"
857
+ )
858
+
505
859
  def _get_eks_cluster_info(self, cluster_name: str, region: str) -> Dict[str, Any]:
506
860
  """Get EKS cluster information using AWS CLI."""
507
861
  try:
@@ -698,10 +1052,32 @@ class KubernetesCloudSetupCommand:
698
1052
  zones=zones,
699
1053
  ),
700
1054
  )
701
- else:
702
- raise click.ClickException(
703
- "GCP support is not yet implemented. Please use AWS for now."
1055
+ elif provider == "gcp":
1056
+ assert infrastructure.project_id, "Project ID is required for GCP"
1057
+
1058
+ from anyscale.client.openapi_client.models import GCPConfig
1059
+
1060
+ # Dynamically determine zones from the GKE cluster
1061
+ zones = self._get_gke_zones(
1062
+ cluster_info.cluster_name, region, infrastructure.project_id
1063
+ )
1064
+
1065
+ cloud_deployment = CloudDeployment(
1066
+ name=name,
1067
+ provider=CloudProviders.GCP,
1068
+ region=region,
1069
+ compute_stack=ComputeStack.K8S,
1070
+ object_storage=ObjectStorage(
1071
+ bucket_name=infrastructure.bucket_name, region=region
1072
+ ),
1073
+ gcp_config=GCPConfig(project_id=infrastructure.project_id,),
1074
+ kubernetes_config=OpenAPIKubernetesConfig(
1075
+ anyscale_operator_iam_identity=infrastructure.iam_role_arn,
1076
+ zones=zones,
1077
+ ),
704
1078
  )
1079
+ else:
1080
+ raise click.ClickException(f"Unsupported provider: {provider}")
705
1081
 
706
1082
  # Register the cloud
707
1083
  try:
@@ -735,10 +1111,18 @@ class KubernetesCloudSetupCommand:
735
1111
  skip_verifications=True,
736
1112
  auto_add_user=True,
737
1113
  )
738
- else:
739
- raise click.ClickException(
740
- "GCP support is not yet implemented. Please use AWS for now."
1114
+ elif provider == "gcp":
1115
+ self.log.info("Calling register_gcp_cloud...")
1116
+ self.cloud_controller.register_gcp_cloud(
1117
+ name=name,
1118
+ cloud_resource=cloud_deployment,
1119
+ functional_verify=None,
1120
+ yes=True,
1121
+ skip_verifications=True,
1122
+ auto_add_user=True,
741
1123
  )
1124
+ else:
1125
+ raise click.ClickException(f"Unsupported provider: {provider}")
742
1126
  finally:
743
1127
  # Restore the original log.info method
744
1128
  self.cloud_controller.log.info = original_log_info
@@ -817,6 +1201,10 @@ class KubernetesCloudSetupCommand:
817
1201
  additional_values=set_string_values,
818
1202
  )
819
1203
 
1204
+ # Add Helm repo before installing
1205
+ self._debug("Adding Anyscale Helm repository...")
1206
+ self._add_helm_repo()
1207
+
820
1208
  # Build a simple Helm command that only uses the values file
821
1209
  self._debug("Generating Helm command...")
822
1210
  helm_command = (
@@ -830,6 +1218,41 @@ class KubernetesCloudSetupCommand:
830
1218
 
831
1219
  self._execute_helm_command(helm_command)
832
1220
 
1221
+ def _add_helm_repo(self) -> None:
1222
+ """Add and update the Anyscale Helm repository."""
1223
+ try:
1224
+ # Add the Anyscale Helm repository
1225
+ self.log.info("Adding Anyscale Helm repository...", block_label="Setup")
1226
+ subprocess.run(
1227
+ [
1228
+ "helm",
1229
+ "repo",
1230
+ "add",
1231
+ "anyscale",
1232
+ "https://anyscale.github.io/helm-charts",
1233
+ ],
1234
+ capture_output=True,
1235
+ text=True,
1236
+ check=False, # Don't fail if repo already exists
1237
+ )
1238
+
1239
+ # Update the Helm repository
1240
+ self.log.info("Updating Helm repositories...", block_label="Setup")
1241
+ subprocess.run(
1242
+ ["helm", "repo", "update", "anyscale"],
1243
+ capture_output=True,
1244
+ text=True,
1245
+ check=True,
1246
+ )
1247
+ self.log.info(
1248
+ "Helm repository configured successfully", block_label="Setup"
1249
+ )
1250
+ except subprocess.CalledProcessError as e:
1251
+ self.log.error(f"Failed to configure Helm repository: {e.stderr}")
1252
+ raise click.ClickException(
1253
+ f"Failed to configure Helm repository: {e.stderr}"
1254
+ )
1255
+
833
1256
  def _extract_set_string_values(self, helm_command: str) -> Dict[str, str]:
834
1257
  """
835
1258
  Extract all --set-string key=value pairs from a Helm command.
@@ -902,10 +1325,12 @@ class KubernetesCloudSetupCommand:
902
1325
 
903
1326
  # Create values dictionary starting with base values
904
1327
  values: Dict[str, Any] = {
905
- "cloudProvider": provider,
906
- "cloudDeploymentId": cloud_deployment_id,
907
- "region": region,
908
- "operatorIamIdentity": infrastructure.iam_role_arn,
1328
+ "global": {
1329
+ "cloudDeploymentId": cloud_deployment_id,
1330
+ "cloudProvider": provider,
1331
+ "region": region,
1332
+ "auth": {"iamIdentity": infrastructure.iam_role_arn,},
1333
+ },
909
1334
  "ingress-nginx": {"enabled": True},
910
1335
  }
911
1336
 
@@ -917,7 +1342,7 @@ class KubernetesCloudSetupCommand:
917
1342
  # Add control plane URL from ANYSCALE_HOST environment variable
918
1343
  if ANYSCALE_HOST:
919
1344
  values["controlPlaneURL"] = ANYSCALE_HOST
920
- self._debug(f"Using control plane URL: {ANYSCALE_HOST}")
1345
+ self.log.info(f"Using control plane URL: {ANYSCALE_HOST}")
921
1346
 
922
1347
  if custom_path:
923
1348
  values_file_path = custom_path
@@ -983,11 +1408,6 @@ class KubernetesCloudSetupCommand:
983
1408
  operator_namespace=namespace,
984
1409
  )
985
1410
 
986
- # Sleep to avoid race condition where operator has not loaded its IAM identity
987
- import time
988
-
989
- time.sleep(5)
990
-
991
1411
  # Run verification
992
1412
  success = verifier.verify(cloud_deployment)
993
1413
 
@@ -4193,46 +4193,46 @@ class CloudController(BaseController):
4193
4193
  """
4194
4194
  command_parts = [
4195
4195
  "helm upgrade <release-name> anyscale/anyscale-operator",
4196
- f" --set-string cloudDeploymentId={cloud_deployment_id}",
4197
- f" --set-string cloudProvider={provider}",
4196
+ f" --set-string global.cloudDeploymentId={cloud_deployment_id}",
4197
+ f" --set-string global.cloudProvider={provider}",
4198
4198
  ]
4199
4199
 
4200
4200
  # Add region for most providers (not for generic)
4201
4201
  if region and provider != "generic":
4202
- command_parts.append(f" --set-string region={region}")
4202
+ command_parts.append(f" --set-string global.region={region}")
4203
4203
 
4204
4204
  # Add provider-specific parameters
4205
4205
  if provider == "gcp" and operator_iam_identity:
4206
4206
  command_parts.append(
4207
- f" --set-string operatorIamIdentity={operator_iam_identity}"
4207
+ f" --set-string global.auth.iamIdentity={operator_iam_identity}"
4208
4208
  )
4209
4209
  elif provider == "azure":
4210
4210
  if operator_iam_identity:
4211
4211
  command_parts.append(
4212
- f" --set-string operatorIamIdentity={operator_iam_identity}"
4212
+ f" --set-string global.auth.iamIdentity={operator_iam_identity}"
4213
4213
  )
4214
4214
  if anyscale_cli_token:
4215
4215
  command_parts.append(
4216
- f" --set-string anyscaleCliToken={anyscale_cli_token}"
4216
+ f" --set-string global.auth.anyscaleCliToken={anyscale_cli_token}"
4217
4217
  )
4218
4218
  else:
4219
4219
  command_parts.append(
4220
- " --set-string anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
4220
+ " --set-string global.auth.anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
4221
4221
  )
4222
4222
  elif provider == "generic":
4223
4223
  if anyscale_cli_token:
4224
4224
  command_parts.append(
4225
- f" --set-string anyscaleCliToken={anyscale_cli_token}"
4225
+ f" --set-string global.auth.anyscaleCliToken={anyscale_cli_token}"
4226
4226
  )
4227
4227
  else:
4228
4228
  command_parts.append(
4229
- " --set-string anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
4229
+ " --set-string global.auth.anyscaleCliToken=$ANYSCALE_CLI_TOKEN"
4230
4230
  )
4231
4231
 
4232
4232
  # Add common parameters
4233
4233
  command_parts.extend(
4234
4234
  [
4235
- " --set-string workloadServiceAccountName=anyscale-operator",
4235
+ " --set-string workloads.serviceAccount.name=anyscale-operator",
4236
4236
  " --namespace <namespace>",
4237
4237
  " --create-namespace",
4238
4238
  " --wait",