skypilot-nightly 1.0.0.dev20250910__py3-none-any.whl → 1.0.0.dev20250913__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (105) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/seeweb.py +103 -0
  3. sky/authentication.py +38 -0
  4. sky/backends/backend_utils.py +148 -30
  5. sky/backends/cloud_vm_ray_backend.py +606 -223
  6. sky/catalog/__init__.py +7 -0
  7. sky/catalog/aws_catalog.py +4 -0
  8. sky/catalog/common.py +18 -0
  9. sky/catalog/data_fetchers/fetch_aws.py +13 -37
  10. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  11. sky/catalog/seeweb_catalog.py +184 -0
  12. sky/client/cli/command.py +2 -71
  13. sky/client/sdk_async.py +5 -2
  14. sky/clouds/__init__.py +2 -0
  15. sky/clouds/aws.py +23 -5
  16. sky/clouds/cloud.py +8 -0
  17. sky/clouds/kubernetes.py +2 -0
  18. sky/clouds/seeweb.py +463 -0
  19. sky/core.py +46 -12
  20. sky/dashboard/out/404.html +1 -1
  21. sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_buildManifest.js +1 -1
  22. sky/dashboard/out/_next/static/chunks/1141-159df2d4c441a9d1.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/3294.03e02ae73455f48e.js +6 -0
  25. sky/dashboard/out/_next/static/chunks/3785.0fa442e16dd3f00e.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +51 -0
  27. sky/dashboard/out/_next/static/chunks/6856-e0754534b3015377.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/6990-11c8e9b982e8ffec.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +6 -0
  30. sky/dashboard/out/_next/static/chunks/{webpack-1d7e11230da3ca89.js → webpack-d1e29b3aa66bf4cf.js} +1 -1
  31. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  32. sky/dashboard/out/clusters/[cluster].html +1 -1
  33. sky/dashboard/out/clusters.html +1 -1
  34. sky/dashboard/out/config.html +1 -1
  35. sky/dashboard/out/index.html +1 -1
  36. sky/dashboard/out/infra/[context].html +1 -1
  37. sky/dashboard/out/infra.html +1 -1
  38. sky/dashboard/out/jobs/[job].html +1 -1
  39. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  40. sky/dashboard/out/jobs.html +1 -1
  41. sky/dashboard/out/users.html +1 -1
  42. sky/dashboard/out/volumes.html +1 -1
  43. sky/dashboard/out/workspace/new.html +1 -1
  44. sky/dashboard/out/workspaces/[name].html +1 -1
  45. sky/dashboard/out/workspaces.html +1 -1
  46. sky/exceptions.py +5 -0
  47. sky/global_user_state.py +75 -26
  48. sky/jobs/client/sdk_async.py +4 -2
  49. sky/jobs/controller.py +4 -2
  50. sky/jobs/recovery_strategy.py +1 -1
  51. sky/jobs/state.py +26 -16
  52. sky/jobs/utils.py +67 -24
  53. sky/logs/agent.py +10 -2
  54. sky/provision/__init__.py +1 -0
  55. sky/provision/kubernetes/config.py +7 -2
  56. sky/provision/kubernetes/instance.py +84 -41
  57. sky/provision/kubernetes/utils.py +14 -3
  58. sky/provision/seeweb/__init__.py +11 -0
  59. sky/provision/seeweb/config.py +13 -0
  60. sky/provision/seeweb/instance.py +806 -0
  61. sky/provision/vast/instance.py +1 -1
  62. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  63. sky/schemas/generated/jobsv1_pb2.py +86 -0
  64. sky/schemas/generated/jobsv1_pb2.pyi +252 -0
  65. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  66. sky/server/config.py +14 -5
  67. sky/server/metrics.py +41 -8
  68. sky/server/requests/executor.py +41 -4
  69. sky/server/server.py +1 -0
  70. sky/server/uvicorn.py +11 -5
  71. sky/setup_files/dependencies.py +8 -1
  72. sky/skylet/constants.py +14 -8
  73. sky/skylet/job_lib.py +128 -10
  74. sky/skylet/log_lib.py +14 -3
  75. sky/skylet/log_lib.pyi +9 -0
  76. sky/skylet/services.py +203 -0
  77. sky/skylet/skylet.py +4 -0
  78. sky/task.py +62 -0
  79. sky/templates/kubernetes-ray.yml.j2 +120 -3
  80. sky/templates/seeweb-ray.yml.j2 +108 -0
  81. sky/utils/accelerator_registry.py +3 -1
  82. sky/utils/command_runner.py +35 -11
  83. sky/utils/command_runner.pyi +22 -0
  84. sky/utils/context_utils.py +15 -2
  85. sky/utils/controller_utils.py +11 -5
  86. sky/utils/db/migration_utils.py +1 -1
  87. sky/utils/git.py +559 -1
  88. sky/utils/resource_checker.py +8 -7
  89. sky/workspaces/core.py +57 -21
  90. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/METADATA +40 -35
  91. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/RECORD +96 -85
  92. sky/client/cli/git.py +0 -549
  93. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  96. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  98. sky/dashboard/out/_next/static/chunks/6856-6e2bc8a6fd0867af.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  101. /sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_ssgManifest.js +0 -0
  102. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/WHEEL +0 -0
  103. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/entry_points.txt +0 -0
  104. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/licenses/LICENSE +0 -0
  105. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/top_level.txt +0 -0
sky/client/cli/command.py CHANGED
@@ -59,7 +59,6 @@ from sky import task as task_lib
59
59
  from sky.adaptors import common as adaptors_common
60
60
  from sky.client import sdk
61
61
  from sky.client.cli import flags
62
- from sky.client.cli import git
63
62
  from sky.data import storage_utils
64
63
  from sky.provision.kubernetes import constants as kubernetes_constants
65
64
  from sky.provision.kubernetes import utils as kubernetes_utils
@@ -79,7 +78,6 @@ from sky.utils import controller_utils
79
78
  from sky.utils import dag_utils
80
79
  from sky.utils import directory_utils
81
80
  from sky.utils import env_options
82
- from sky.utils import git as git_utils
83
81
  from sky.utils import infra_utils
84
82
  from sky.utils import log_utils
85
83
  from sky.utils import registry
@@ -783,8 +781,8 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
783
781
 
784
782
  # Update the workdir config from the command line parameters.
785
783
  # And update the envs and secrets from the workdir.
786
- _update_task_workdir(task, workdir, git_url, git_ref)
787
- _update_task_workdir_and_secrets_from_workdir(task)
784
+ task.update_workdir(workdir, git_url, git_ref)
785
+ task.update_envs_and_secrets_from_workdir()
788
786
 
789
787
  # job launch specific.
790
788
  if job_recovery is not None:
@@ -799,73 +797,6 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
799
797
  return task
800
798
 
801
799
 
802
- def _update_task_workdir(task: task_lib.Task, workdir: Optional[str],
803
- git_url: Optional[str], git_ref: Optional[str]):
804
- """Updates the task workdir.
805
-
806
- Args:
807
- task: The task to update.
808
- workdir: The workdir to update.
809
- git_url: The git url to update.
810
- git_ref: The git ref to update.
811
- """
812
- if task.workdir is None or isinstance(task.workdir, str):
813
- if workdir is not None:
814
- task.workdir = workdir
815
- return
816
- if git_url is not None:
817
- task.workdir = {}
818
- task.workdir['url'] = git_url
819
- if git_ref is not None:
820
- task.workdir['ref'] = git_ref
821
- return
822
- return
823
- if git_url is not None:
824
- task.workdir['url'] = git_url
825
- if git_ref is not None:
826
- task.workdir['ref'] = git_ref
827
- return
828
-
829
-
830
- def _update_task_workdir_and_secrets_from_workdir(task: task_lib.Task):
831
- """Updates the task secrets from the workdir.
832
-
833
- Args:
834
- task: The task to update.
835
- """
836
- if task.workdir is None:
837
- return
838
- if not isinstance(task.workdir, dict):
839
- return
840
- url = task.workdir['url']
841
- ref = task.workdir.get('ref', '')
842
- token = os.environ.get(git_utils.GIT_TOKEN_ENV_VAR)
843
- ssh_key_path = os.environ.get(git_utils.GIT_SSH_KEY_PATH_ENV_VAR)
844
- try:
845
- git_repo = git.GitRepo(url, ref, token, ssh_key_path)
846
- clone_info = git_repo.get_repo_clone_info()
847
- if clone_info is None:
848
- return
849
- task.envs[git_utils.GIT_URL_ENV_VAR] = clone_info.url
850
- if ref:
851
- ref_type = git_repo.get_ref_type()
852
- if ref_type == git.GitRefType.COMMIT:
853
- task.envs[git_utils.GIT_COMMIT_HASH_ENV_VAR] = ref
854
- elif ref_type == git.GitRefType.BRANCH:
855
- task.envs[git_utils.GIT_BRANCH_ENV_VAR] = ref
856
- elif ref_type == git.GitRefType.TAG:
857
- task.envs[git_utils.GIT_TAG_ENV_VAR] = ref
858
- if clone_info.token is None and clone_info.ssh_key is None:
859
- return
860
- if clone_info.token is not None:
861
- task.secrets[git_utils.GIT_TOKEN_ENV_VAR] = clone_info.token
862
- if clone_info.ssh_key is not None:
863
- task.secrets[git_utils.GIT_SSH_KEY_ENV_VAR] = clone_info.ssh_key
864
- except exceptions.GitError as e:
865
- with ux_utils.print_exception_no_traceback():
866
- raise ValueError(f'{str(e)}') from None
867
-
868
-
869
800
  class _NaturalOrderGroup(click.Group):
870
801
  """Lists commands in the order defined in this script.
871
802
 
sky/client/sdk_async.py CHANGED
@@ -456,6 +456,7 @@ async def download_logs(cluster_name: str,
456
456
  async def start(
457
457
  cluster_name: str,
458
458
  idle_minutes_to_autostop: Optional[int] = None,
459
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
459
460
  retry_until_up: bool = False,
460
461
  down: bool = False, # pylint: disable=redefined-outer-name
461
462
  force: bool = False,
@@ -464,7 +465,8 @@ async def start(
464
465
  """Async version of start() that restarts a cluster."""
465
466
  request_id = await context_utils.to_thread(sdk.start, cluster_name,
466
467
  idle_minutes_to_autostop,
467
- retry_until_up, down, force)
468
+ wait_for, retry_until_up, down,
469
+ force)
468
470
  if stream_logs is not None:
469
471
  return await _stream_and_get(request_id, stream_logs)
470
472
  else:
@@ -504,13 +506,14 @@ async def stop(
504
506
  async def autostop(
505
507
  cluster_name: str,
506
508
  idle_minutes: int,
509
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
507
510
  down: bool = False, # pylint: disable=redefined-outer-name
508
511
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
509
512
  ) -> None:
510
513
  """Async version of autostop() that schedules an autostop/autodown for a
511
514
  cluster."""
512
515
  request_id = await context_utils.to_thread(sdk.autostop, cluster_name,
513
- idle_minutes, down)
516
+ idle_minutes, wait_for, down)
514
517
  if stream_logs is not None:
515
518
  return await _stream_and_get(request_id, stream_logs)
516
519
  else:
sky/clouds/__init__.py CHANGED
@@ -28,6 +28,7 @@ from sky.clouds.oci import OCI
28
28
  from sky.clouds.paperspace import Paperspace
29
29
  from sky.clouds.runpod import RunPod
30
30
  from sky.clouds.scp import SCP
31
+ from sky.clouds.seeweb import Seeweb
31
32
  from sky.clouds.ssh import SSH
32
33
  from sky.clouds.vast import Vast
33
34
  from sky.clouds.vsphere import Vsphere
@@ -58,6 +59,7 @@ __all__ = [
58
59
  'Fluidstack',
59
60
  'Nebius',
60
61
  'Hyperbolic',
62
+ 'Seeweb',
61
63
  # Utility functions
62
64
  'cloud_in_iterable',
63
65
  ]
sky/clouds/aws.py CHANGED
@@ -39,9 +39,11 @@ logger = sky_logging.init_logger(__name__)
39
39
 
40
40
  # Image ID tags
41
41
  _DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu'
42
+ _DEFAULT_CPU_ARM64_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-arm64'
42
43
  # For GPU-related package version,
43
44
  # see sky/catalog/images/provisioners/cuda.sh
44
45
  _DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu'
46
+ _DEFAULT_GPU_ARM64_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-arm64'
45
47
  _DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-ubuntu-2004'
46
48
  _DEFAULT_NEURON_IMAGE_ID = 'skypilot:neuron-ubuntu-2204'
47
49
 
@@ -364,13 +366,22 @@ class AWS(clouds.Cloud):
364
366
  @classmethod
365
367
  def _get_default_ami(cls, region_name: str, instance_type: str) -> str:
366
368
  acc = cls.get_accelerators_from_instance_type(instance_type)
367
- image_id = catalog.get_image_id_from_tag(_DEFAULT_CPU_IMAGE_ID,
368
- region_name,
369
- clouds='aws')
370
- if acc is not None:
371
- image_id = catalog.get_image_id_from_tag(_DEFAULT_GPU_IMAGE_ID,
369
+ arch = cls.get_arch_from_instance_type(instance_type)
370
+ if arch == constants.ARM64_ARCH:
371
+ image_id = catalog.get_image_id_from_tag(
372
+ _DEFAULT_CPU_ARM64_IMAGE_ID, region_name, clouds='aws')
373
+ else:
374
+ image_id = catalog.get_image_id_from_tag(_DEFAULT_CPU_IMAGE_ID,
372
375
  region_name,
373
376
  clouds='aws')
377
+ if acc is not None:
378
+ if arch == constants.ARM64_ARCH:
379
+ image_id = catalog.get_image_id_from_tag(
380
+ _DEFAULT_GPU_ARM64_IMAGE_ID, region_name, clouds='aws')
381
+ else:
382
+ image_id = catalog.get_image_id_from_tag(_DEFAULT_GPU_IMAGE_ID,
383
+ region_name,
384
+ clouds='aws')
374
385
  assert len(acc) == 1, acc
375
386
  acc_name = list(acc.keys())[0]
376
387
  if acc_name == 'K80':
@@ -573,6 +584,13 @@ class AWS(clouds.Cloud):
573
584
  return catalog.get_accelerators_from_instance_type(instance_type,
574
585
  clouds='aws')
575
586
 
587
+ @classmethod
588
+ def get_arch_from_instance_type(
589
+ cls,
590
+ instance_type: str,
591
+ ) -> Optional[str]:
592
+ return catalog.get_arch_from_instance_type(instance_type, clouds='aws')
593
+
576
594
  @classmethod
577
595
  def get_vcpus_mem_from_instance_type(
578
596
  cls,
sky/clouds/cloud.py CHANGED
@@ -340,6 +340,14 @@ class Cloud:
340
340
  """Returns {acc: acc_count} held by 'instance_type', if any."""
341
341
  raise NotImplementedError
342
342
 
343
+ @classmethod
344
+ def get_arch_from_instance_type(
345
+ cls,
346
+ instance_type: str,
347
+ ) -> Optional[str]:
348
+ """Returns the arch of the instance type, if any."""
349
+ raise NotImplementedError
350
+
343
351
  @classmethod
344
352
  def get_default_instance_type(cls,
345
353
  cpus: Optional[str] = None,
sky/clouds/kubernetes.py CHANGED
@@ -841,6 +841,8 @@ class Kubernetes(clouds.Cloud):
841
841
  from_instance_type(default_instance_type))
842
842
 
843
843
  gpu_task_cpus = k8s_instance_type.cpus
844
+ if resources.cpus is None:
845
+ gpu_task_cpus = gpu_task_cpus * acc_count
844
846
  # Special handling to bump up memory multiplier for GPU instances
845
847
  gpu_task_memory = (float(resources.memory.strip('+')) if
846
848
  resources.memory is not None else gpu_task_cpus *
sky/clouds/seeweb.py ADDED
@@ -0,0 +1,463 @@
1
+ """Seeweb Cloud
2
+
3
+ History:
4
+ @ Aug 6, 2025: Initial version of the integration.
5
+ - Francesco Massa
6
+ - Marco Cristofanilli (marco.cATseeweb.it)
7
+
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import typing
13
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
14
+
15
+ from sky import catalog
16
+ from sky import clouds
17
+ from sky.adaptors import seeweb as seeweb_adaptor
18
+ from sky.provision import seeweb as seeweb_provision
19
+ from sky.utils import registry
20
+ from sky.utils import resources_utils
21
+ from sky.utils import ux_utils
22
+
23
+ if typing.TYPE_CHECKING:
24
+ from sky import resources as resources_lib
25
+ from sky.utils import status_lib
26
+ from sky.utils import volume as volume_lib
27
+
28
+ # ---------- key file path -----------------
29
+ _SEEWEB_KEY_FILE = '~/.seeweb_cloud/seeweb_keys'
30
+ # (content: ini-like)
31
+ # api_key = <TOKEN>
32
+
33
+
34
+ @registry.CLOUD_REGISTRY.register
35
+ class Seeweb(clouds.Cloud):
36
+ """Seeweb GPU Cloud."""
37
+
38
+ _REPR = 'Seeweb'
39
+ # Define unsupported features to provide clear error messages
40
+ # This helps users understand what Seeweb can and cannot do
41
+ _CLOUD_UNSUPPORTED_FEATURES = {
42
+ clouds.CloudImplementationFeatures.MULTI_NODE:
43
+ ('Multi-node not supported. '
44
+ 'Seeweb does not support multi-node clusters.'),
45
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
46
+ ('Custom disk tiers not supported. '
47
+ 'Seeweb does not support custom disk tiers.'),
48
+ clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
49
+ ('Storage mounting not supported. '
50
+ 'Seeweb does not support storage mounting.'),
51
+ clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
52
+ ('High availability controllers not supported. '
53
+ 'Seeweb does not support high availability controllers.'),
54
+ clouds.CloudImplementationFeatures.SPOT_INSTANCE:
55
+ ('Spot instances not supported. '
56
+ 'Seeweb does not support spot instances.'),
57
+ clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
58
+ ('Disk cloning not supported. '
59
+ 'Seeweb does not support disk cloning.'),
60
+ clouds.CloudImplementationFeatures.DOCKER_IMAGE:
61
+ ('Docker images not supported. '
62
+ 'Seeweb does not support Docker images.'),
63
+ clouds.CloudImplementationFeatures.IMAGE_ID:
64
+ ('Custom image IDs not supported. '
65
+ 'Seeweb does not support custom image IDs.'),
66
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
67
+ ('Custom network tiers not supported. '
68
+ 'Seeweb does not support custom network tiers.'),
69
+ clouds.CloudImplementationFeatures.HOST_CONTROLLERS:
70
+ ('Host controllers not supported. '
71
+ 'Seeweb does not support host controllers.'),
72
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
73
+ ('Custom multi-network not supported. '
74
+ 'Seeweb does not support custom multi-network.'),
75
+ }
76
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
77
+ _regions: List[clouds.Region] = []
78
+
79
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
80
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
81
+
82
+ # Enable port support with updatable version
83
+ OPEN_PORTS_VERSION = clouds.OpenPortsVersion.UPDATABLE
84
+
85
+ @classmethod
86
+ def _unsupported_features_for_resources(
87
+ cls, resources: 'resources_lib.Resources'
88
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
89
+ return cls._CLOUD_UNSUPPORTED_FEATURES
90
+
91
+ @classmethod
92
+ def max_cluster_name_length(cls) -> Optional[int]:
93
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
94
+
95
+ @classmethod
96
+ def regions(cls) -> List['clouds.Region']:
97
+ """Return available regions for Seeweb."""
98
+ # Get regions from the catalog system
99
+ # This reads from the CSV files generated by fetch_seeweb.py
100
+ regions = catalog.regions(clouds='seeweb')
101
+ return regions
102
+
103
+ @classmethod
104
+ def regions_with_offering(
105
+ cls,
106
+ instance_type: str,
107
+ accelerators: Optional[Dict[str, int]],
108
+ use_spot: bool,
109
+ region: Optional[str],
110
+ zone: Optional[str],
111
+ ) -> List[clouds.Region]:
112
+ assert zone is None, 'Seeweb does not support zones.'
113
+ del zone
114
+ if use_spot:
115
+ return []
116
+
117
+ # Get regions from catalog based on instance type
118
+ # This will read the CSV and return only regions
119
+ # where the instance type exists
120
+ regions = catalog.get_region_zones_for_instance_type(
121
+ instance_type, use_spot, 'seeweb')
122
+
123
+ if region is not None:
124
+ regions = [r for r in regions if r.name == region]
125
+
126
+ return regions
127
+
128
+ @classmethod
129
+ def zones_provision_loop(
130
+ cls,
131
+ *,
132
+ region: str,
133
+ num_nodes: int,
134
+ instance_type: str,
135
+ accelerators: Optional[Dict[str, int]] = None,
136
+ use_spot: bool = False,
137
+ ) -> Iterator[None]:
138
+ del num_nodes
139
+ regions = cls.regions_with_offering(instance_type,
140
+ accelerators,
141
+ use_spot,
142
+ region=region,
143
+ zone=None)
144
+ for r in regions:
145
+ assert r.zones is None, r
146
+ yield r.zones
147
+
148
+ @classmethod
149
+ def get_zone_shell_cmd(cls) -> Optional[str]:
150
+ """Seeweb doesn't support zones."""
151
+ return None
152
+
153
+ def instance_type_to_hourly_cost(
154
+ self,
155
+ instance_type: str,
156
+ use_spot: bool,
157
+ region: Optional[str],
158
+ zone: Optional[str],
159
+ ) -> float:
160
+ cost = catalog.get_hourly_cost(instance_type,
161
+ use_spot=use_spot,
162
+ region=region,
163
+ zone=zone,
164
+ clouds='seeweb')
165
+ return cost
166
+
167
+ def accelerators_to_hourly_cost(
168
+ self,
169
+ accelerators: Dict[str, int],
170
+ use_spot: bool,
171
+ region: Optional[str],
172
+ zone: Optional[str],
173
+ ) -> float:
174
+
175
+ return 0.0
176
+
177
+ def get_egress_cost(self, num_gigabytes: float):
178
+ return 0.0
179
+
180
+ def make_deploy_resources_variables(
181
+ self,
182
+ resources: 'resources_lib.Resources',
183
+ cluster_name: resources_utils.ClusterName,
184
+ region: 'clouds.Region',
185
+ zones: Optional[List['clouds.Zone']],
186
+ num_nodes: int,
187
+ dryrun: bool = False,
188
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
189
+ ) -> Dict[str, Any]:
190
+ """Create deployment variables for Seeweb."""
191
+
192
+ # Note: Spot instances and multi-node are automatically handled by
193
+ # the framework via _CLOUD_UNSUPPORTED_FEATURES
194
+
195
+ resources = resources.assert_launchable()
196
+
197
+ acc_dict = self.get_accelerators_from_instance_type(
198
+ resources.instance_type)
199
+
200
+ # Standard custom_resources string for Ray
201
+ custom_resources = resources_utils.make_ray_custom_resources_str(
202
+ acc_dict)
203
+
204
+ # Seeweb-specific GPU configuration for the provisioner
205
+ # This tells the provisioner how to configure GPU resources
206
+ seeweb_gpu_config = None
207
+ if resources.accelerators:
208
+ # If the instance has accelerators, prepare GPU configuration
209
+ accelerator_name = list(resources.accelerators.keys())[0]
210
+ accelerator_count = resources.accelerators[accelerator_name]
211
+ seeweb_gpu_config = {
212
+ 'gpu': accelerator_count,
213
+ 'gpu_label': accelerator_name,
214
+ }
215
+
216
+ # Seeweb uses pre-configured images based on instance type
217
+ # Determine image based on whether the instance type name contains "GPU"
218
+ if resources.instance_type and 'GPU' in resources.instance_type.upper():
219
+ # GPU instance - use image with NVIDIA drivers
220
+ if resources.instance_type in ['ECS1GPU10', 'ECS2GPU10']:
221
+ # H200 GPU instance - use UEFI image with NVIDIA drivers
222
+ image_id = 'ubuntu-2204-uefi-nvidia-driver'
223
+ else:
224
+ # Other GPU instance - use standard image with NVIDIA drivers
225
+ image_id = 'ubuntu-2204-nvidia-driver'
226
+ else:
227
+ # CPU-only instance - use standard Ubuntu image
228
+ image_id = 'ubuntu-2204'
229
+
230
+ result = {
231
+ 'instance_type': resources.instance_type,
232
+ 'region': region.name,
233
+ 'cluster_name': cluster_name,
234
+ 'custom_resources': custom_resources,
235
+ 'seeweb_gpu_config': seeweb_gpu_config,
236
+ 'image_id': image_id,
237
+ }
238
+ return result
239
+
240
+ @classmethod
241
+ def get_vcpus_mem_from_instance_type(
242
+ cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]:
243
+ result = catalog.get_vcpus_mem_from_instance_type(instance_type,
244
+ clouds='seeweb')
245
+ return result
246
+
247
+ @classmethod
248
+ def get_accelerators_from_instance_type(
249
+ cls,
250
+ instance_type: str,
251
+ ) -> Optional[Dict[str, Union[int, float]]]:
252
+ result = catalog.get_accelerators_from_instance_type(instance_type,
253
+ clouds='seeweb')
254
+ return result
255
+
256
+ @classmethod
257
+ def get_default_instance_type(
258
+ cls,
259
+ cpus: Optional[str] = None,
260
+ memory: Optional[str] = None,
261
+ disk_tier: Optional[resources_utils.DiskTier] = None,
262
+ region: Optional[str] = None,
263
+ zone: Optional[str] = None,
264
+ ) -> Optional[str]:
265
+ result = catalog.get_default_instance_type(cpus=cpus,
266
+ memory=memory,
267
+ disk_tier=disk_tier,
268
+ clouds='seeweb')
269
+ return result
270
+
271
+ def _get_feasible_launchable_resources(
272
+ self, resources: 'resources_lib.Resources'
273
+ ) -> 'resources_utils.FeasibleResources':
274
+ """Get feasible resources for Seeweb."""
275
+ if resources.use_spot:
276
+ return resources_utils.FeasibleResources(
277
+ [], [], 'Spot instances not supported on Seeweb')
278
+
279
+ if resources.accelerators and len(resources.accelerators) > 1:
280
+ return resources_utils.FeasibleResources(
281
+ [], [], 'Multiple accelerator types not supported on Seeweb')
282
+
283
+ # If no instance_type is specified, try to get a default one
284
+ if not resources.instance_type:
285
+ # If accelerators are specified, try to find instance
286
+ # type forthat accelerator
287
+ if resources.accelerators:
288
+ # Get the first accelerator
289
+ # (we already checked there's only one)
290
+ acc_name, acc_count = list(resources.accelerators.items())[0]
291
+
292
+ # Use catalog to find instance type for this accelerator
293
+ # This leverages the catalog system to find suitable instances
294
+ (
295
+ instance_types,
296
+ fuzzy_candidates,
297
+ ) = catalog.get_instance_type_for_accelerator(
298
+ acc_name=acc_name,
299
+ acc_count=acc_count,
300
+ cpus=resources.cpus,
301
+ memory=resources.memory,
302
+ use_spot=resources.use_spot,
303
+ region=resources.region,
304
+ zone=resources.zone,
305
+ clouds='seeweb',
306
+ )
307
+
308
+ if instance_types and len(instance_types) > 0:
309
+ # Use the first (cheapest) instance type
310
+ selected_instance_type = instance_types[0]
311
+ resources = resources.copy(
312
+ instance_type=selected_instance_type)
313
+ else:
314
+ return resources_utils.FeasibleResources(
315
+ [],
316
+ fuzzy_candidates,
317
+ f'No instance type found for accelerator'
318
+ f'{acc_name}:{acc_count} on Seeweb',
319
+ )
320
+ else:
321
+ # No accelerators specified, use default instance type
322
+ default_instance_type = self.get_default_instance_type(
323
+ cpus=resources.cpus,
324
+ memory=resources.memory,
325
+ region=resources.region,
326
+ zone=resources.zone,
327
+ )
328
+
329
+ if default_instance_type:
330
+ # Create new resources with the default instance type
331
+ resources = resources.copy(
332
+ instance_type=default_instance_type)
333
+ else:
334
+ return resources_utils.FeasibleResources(
335
+ [],
336
+ [],
337
+ f'No suitable instance type found for'
338
+ f'cpus={resources.cpus}, memory={resources.memory}',
339
+ )
340
+
341
+ # Check if instance type exists
342
+ if resources.instance_type:
343
+ exists = catalog.instance_type_exists(resources.instance_type,
344
+ clouds='seeweb')
345
+ if not exists:
346
+ return resources_utils.FeasibleResources(
347
+ [],
348
+ [],
349
+ f'Instance type {resources.instance_type}'
350
+ f' not available on Seeweb',
351
+ )
352
+
353
+ # Set the cloud if not already set
354
+ if not resources.cloud:
355
+ resources = resources.copy(cloud=self)
356
+
357
+ # Return the resources as feasible
358
+ return resources_utils.FeasibleResources([resources], [], None)
359
+
360
+ @classmethod
361
+ def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
362
+ """Check Seeweb compute credentials."""
363
+ try:
364
+ result = seeweb_adaptor.check_compute_credentials()
365
+ return result, None
366
+ except Exception as e: # pylint: disable=broad-except
367
+ return False, str(e)
368
+
369
+ @classmethod
370
+ def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
371
+ """Check Seeweb storage credentials."""
372
+ try:
373
+ result = seeweb_adaptor.check_storage_credentials()
374
+ return result, None
375
+ except Exception as e: # pylint: disable=broad-except
376
+ return False, str(e)
377
+
378
+ @classmethod
379
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
380
+ # Seeweb doesn't have user identity concept
381
+ return None
382
+
383
+ @classmethod
384
+ def query_status(
385
+ cls,
386
+ name: str,
387
+ tag_filters: Dict[str, str],
388
+ region: Optional[str],
389
+ zone: Optional[str],
390
+ **kwargs,
391
+ ) -> List['status_lib.ClusterStatus']:
392
+ """Query the status of Seeweb cluster instances."""
393
+ cluster_name_on_cloud = name
394
+
395
+ result = seeweb_provision.instance.query_instances(
396
+ cluster_name=name,
397
+ cluster_name_on_cloud=cluster_name_on_cloud,
398
+ provider_config={},
399
+ non_terminated_only=True)
400
+ # Convert Dict[str, Tuple[Optional[ClusterStatus],
401
+ # Optional[str]]] to List[ClusterStatus]
402
+ return [status for status, _ in result.values() if status is not None]
403
+
404
+ def get_credential_file_mounts(self) -> Dict[str, str]:
405
+ """Returns the credential files to mount."""
406
+ # Mount the Seeweb API key file to the remote instance
407
+ # This allows the provisioner to authenticate with Seeweb API
408
+ result = {
409
+ _SEEWEB_KEY_FILE: _SEEWEB_KEY_FILE,
410
+ }
411
+ return result
412
+
413
+ def instance_type_exists(self, instance_type: str) -> bool:
414
+ """Returns whether the instance type exists for Seeweb."""
415
+ result = catalog.instance_type_exists(instance_type, clouds='seeweb')
416
+ return result
417
+
418
+ @classmethod
419
+ def get_image_size(cls, image_id: str, region: Optional[str]) -> float:
420
+ """Seeweb doesn't support custom images."""
421
+ del image_id, region
422
+ with ux_utils.print_exception_no_traceback():
423
+ raise ValueError(f'Custom images are not supported on {cls._REPR}. '
424
+ 'Seeweb clusters use pre-configured images only.')
425
+
426
+ # Image-related methods (not supported)
427
+ @classmethod
428
+ def create_image_from_cluster(
429
+ cls,
430
+ cluster_name: resources_utils.ClusterName,
431
+ region: Optional[str],
432
+ zone: Optional[str],
433
+ ) -> str:
434
+ del cluster_name, region, zone # unused
435
+ with ux_utils.print_exception_no_traceback():
436
+ raise ValueError(
437
+ f'Creating images from clusters is not supported on'
438
+ f' {cls._REPR}. Seeweb does not support custom'
439
+ f' image creation.')
440
+
441
+ @classmethod
442
+ def maybe_move_image(
443
+ cls,
444
+ image_id: str,
445
+ source_region: str,
446
+ target_region: str,
447
+ source_zone: Optional[str],
448
+ target_zone: Optional[str],
449
+ ) -> str:
450
+ del image_id, source_region, target_region, source_zone, target_zone
451
+ with ux_utils.print_exception_no_traceback():
452
+ raise ValueError(
453
+ f'Moving images between regions is not supported on'
454
+ f' {cls._REPR}. '
455
+ 'Seeweb does not support custom images.')
456
+
457
+ @classmethod
458
+ def delete_image(cls, image_id: str, region: Optional[str]) -> None:
459
+ del image_id, region
460
+ with ux_utils.print_exception_no_traceback():
461
+ raise ValueError(
462
+ f'Deleting images is not supported on {cls._REPR}. '
463
+ 'Seeweb does not support custom image management.')