skypilot-nightly 1.0.0.dev20251027__py3-none-any.whl → 1.0.0.dev20251101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (114) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/coreweave.py +278 -0
  4. sky/backends/backend_utils.py +9 -6
  5. sky/backends/cloud_vm_ray_backend.py +2 -3
  6. sky/check.py +25 -13
  7. sky/client/cli/command.py +52 -24
  8. sky/cloud_stores.py +73 -0
  9. sky/clouds/aws.py +59 -11
  10. sky/core.py +7 -5
  11. sky/dashboard/out/404.html +1 -1
  12. sky/dashboard/out/_next/static/{YP5Vc3ROcDnTGta0XAhcs → 8ixeA0NVQJN8HUdijid8b}/_buildManifest.js +1 -1
  13. sky/dashboard/out/_next/static/chunks/{1141-d5204f35a3388bf4.js → 1141-c3c10e2c6ed71a8f.js} +1 -1
  14. sky/dashboard/out/_next/static/chunks/2755.d6dc6d530fed0b61.js +26 -0
  15. sky/dashboard/out/_next/static/chunks/3294.87a13fba0058865b.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/{3785.538eb23a098fc304.js → 3785.170be320e0060eaf.js} +1 -1
  17. sky/dashboard/out/_next/static/chunks/4282-49b2065b7336e496.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/7615-80aa7b09f45a86d2.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/8969-4ed9236db997b42b.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/9360.10a3aac7aad5e3aa.js +31 -0
  21. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ac4a217f17b087cb.js +16 -0
  22. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-fbf2907ce2bb67e2.js → [cluster]-1704039ccaf997cf.js} +1 -1
  23. sky/dashboard/out/_next/static/chunks/pages/{jobs-0dc34cf9a8710a9f.js → jobs-7eee823559e5cf9f.js} +1 -1
  24. sky/dashboard/out/_next/static/chunks/pages/{users-96d6b8bb2dec055f.js → users-2b172f13f8538a7a.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-fb1b4d3bfb047cad.js → [name]-bbfe5860c93470fd.js} +1 -1
  26. sky/dashboard/out/_next/static/chunks/pages/{workspaces-6fc994fa1ee6c6bf.js → workspaces-1891376c08050940.js} +1 -1
  27. sky/dashboard/out/_next/static/chunks/{webpack-585d805f693dbceb.js → webpack-e38d5319cd10a3a0.js} +1 -1
  28. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  29. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  30. sky/dashboard/out/clusters/[cluster].html +1 -1
  31. sky/dashboard/out/clusters.html +1 -1
  32. sky/dashboard/out/config.html +1 -1
  33. sky/dashboard/out/index.html +1 -1
  34. sky/dashboard/out/infra/[context].html +1 -1
  35. sky/dashboard/out/infra.html +1 -1
  36. sky/dashboard/out/jobs/[job].html +1 -1
  37. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  38. sky/dashboard/out/jobs.html +1 -1
  39. sky/dashboard/out/users.html +1 -1
  40. sky/dashboard/out/volumes.html +1 -1
  41. sky/dashboard/out/workspace/new.html +1 -1
  42. sky/dashboard/out/workspaces/[name].html +1 -1
  43. sky/dashboard/out/workspaces.html +1 -1
  44. sky/data/data_utils.py +92 -1
  45. sky/data/mounting_utils.py +71 -2
  46. sky/data/storage.py +166 -9
  47. sky/global_user_state.py +14 -18
  48. sky/jobs/constants.py +2 -0
  49. sky/jobs/controller.py +62 -67
  50. sky/jobs/file_content_utils.py +80 -0
  51. sky/jobs/log_gc.py +201 -0
  52. sky/jobs/scheduler.py +15 -2
  53. sky/jobs/server/core.py +85 -13
  54. sky/jobs/server/server.py +14 -13
  55. sky/jobs/server/utils.py +28 -10
  56. sky/jobs/state.py +216 -40
  57. sky/jobs/utils.py +65 -28
  58. sky/metrics/utils.py +18 -0
  59. sky/optimizer.py +1 -1
  60. sky/provision/kubernetes/instance.py +88 -19
  61. sky/provision/kubernetes/volume.py +2 -2
  62. sky/schemas/api/responses.py +3 -5
  63. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  64. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  65. sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
  66. sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
  67. sky/serve/replica_managers.py +2 -2
  68. sky/serve/serve_utils.py +9 -2
  69. sky/serve/server/server.py +8 -7
  70. sky/server/common.py +21 -15
  71. sky/server/constants.py +1 -1
  72. sky/server/daemons.py +23 -17
  73. sky/server/requests/executor.py +7 -3
  74. sky/server/requests/payloads.py +2 -0
  75. sky/server/requests/request_names.py +80 -0
  76. sky/server/requests/requests.py +137 -102
  77. sky/server/requests/serializers/decoders.py +0 -6
  78. sky/server/requests/serializers/encoders.py +33 -6
  79. sky/server/server.py +105 -36
  80. sky/server/stream_utils.py +56 -13
  81. sky/setup_files/dependencies.py +2 -0
  82. sky/skylet/constants.py +6 -1
  83. sky/skylet/events.py +7 -0
  84. sky/skylet/services.py +18 -7
  85. sky/ssh_node_pools/server.py +5 -4
  86. sky/task.py +14 -42
  87. sky/templates/kubernetes-ray.yml.j2 +1 -1
  88. sky/templates/nebius-ray.yml.j2 +1 -0
  89. sky/templates/websocket_proxy.py +140 -12
  90. sky/users/permission.py +4 -1
  91. sky/utils/cli_utils/status_utils.py +8 -2
  92. sky/utils/context_utils.py +13 -1
  93. sky/utils/db/migration_utils.py +1 -1
  94. sky/utils/resource_checker.py +4 -1
  95. sky/utils/resources_utils.py +53 -29
  96. sky/utils/schemas.py +23 -4
  97. sky/volumes/server/server.py +4 -3
  98. sky/workspaces/server.py +7 -6
  99. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/METADATA +53 -37
  100. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/RECORD +106 -100
  101. sky/dashboard/out/_next/static/chunks/2755.227c84f5adf75c6b.js +0 -26
  102. sky/dashboard/out/_next/static/chunks/3015-2dcace420c8939f4.js +0 -1
  103. sky/dashboard/out/_next/static/chunks/3294.6d5054a953a818cb.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/4282-d2f3ef2fbf78e347.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/8969-0389e2cb52412db3.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/9360.07d78b8552bc9d17.js +0 -31
  107. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c815b90e296b8075.js +0 -16
  108. sky/dashboard/out/_next/static/css/4c052b4444e52a58.css +0 -3
  109. /sky/dashboard/out/_next/static/{YP5Vc3ROcDnTGta0XAhcs → 8ixeA0NVQJN8HUdijid8b}/_ssgManifest.js +0 -0
  110. /sky/dashboard/out/_next/static/chunks/pages/{_app-513d332313670f2a.js → _app-bde01e4a2beec258.js} +0 -0
  111. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/WHEEL +0 -0
  112. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/entry_points.txt +0 -0
  113. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/licenses/LICENSE +0 -0
  114. {skypilot_nightly-1.0.0.dev20251027.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -7,7 +7,7 @@ import urllib.request
7
7
  from sky.utils import directory_utils
8
8
 
9
9
  # Replaced with the current commit when building the wheels.
10
- _SKYPILOT_COMMIT_SHA = 'b7eec54b8f52d7f72ff5b3908ba7f2d66b8bca6a'
10
+ _SKYPILOT_COMMIT_SHA = '2205c51a08e8dc517375ba0f653557257c6f6751'
11
11
 
12
12
 
13
13
  def _get_git_commit():
@@ -37,7 +37,7 @@ def _get_git_commit():
37
37
 
38
38
 
39
39
  __commit__ = _get_git_commit()
40
- __version__ = '1.0.0.dev20251027'
40
+ __version__ = '1.0.0.dev20251101'
41
41
  __root_dir__ = directory_utils.get_sky_dir()
42
42
 
43
43
 
sky/adaptors/aws.py CHANGED
@@ -34,6 +34,7 @@ import time
34
34
  import typing
35
35
  from typing import Callable, Literal, Optional, TypeVar
36
36
 
37
+ from sky import skypilot_config
37
38
  from sky.adaptors import common
38
39
  from sky.utils import annotations
39
40
  from sky.utils import common_utils
@@ -119,12 +120,27 @@ def _create_aws_object(creation_fn_or_cls: Callable[[], T],
119
120
  f'{common_utils.format_exception(e)}.')
120
121
 
121
122
 
123
+ def get_workspace_profile() -> Optional[str]:
124
+ """Get AWS profile name from workspace config."""
125
+ return skypilot_config.get_workspace_cloud('aws').get('profile', None)
126
+
127
+
122
128
  # The LRU cache needs to be thread-local to avoid multiple threads sharing the
123
129
  # same session object, which is not guaranteed to be thread-safe.
124
130
  @_thread_local_lru_cache()
125
- def session(check_credentials: bool = True):
126
- """Create an AWS session."""
127
- s = _create_aws_object(boto3.session.Session, 'session')
131
+ def session(check_credentials: bool = True, profile: Optional[str] = None):
132
+ """Create an AWS session.
133
+
134
+ Args:
135
+ check_credentials: Whether to check if credentials are available.
136
+ profile: AWS profile name to use. If None, uses default credentials.
137
+ """
138
+ if profile is not None:
139
+ logger.debug(f'Using AWS profile \'{profile}\'.')
140
+ s = _create_aws_object(
141
+ lambda: boto3.session.Session(profile_name=profile), 'session')
142
+ else:
143
+ s = _create_aws_object(boto3.session.Session, 'session')
128
144
  if check_credentials and s.get_credentials() is None:
129
145
  # s.get_credentials() can be None if there are actually no credentials,
130
146
  # or if we fail to get credentials from IMDS (e.g. due to throttling).
@@ -180,13 +196,14 @@ def resource(service_name: str, **kwargs):
180
196
  kwargs['config'] = config
181
197
 
182
198
  check_credentials = kwargs.pop('check_credentials', True)
199
+ profile = get_workspace_profile()
183
200
 
184
201
  # Need to use the client retrieved from the per-thread session to avoid
185
202
  # thread-safety issues (Directly creating the client with boto3.resource()
186
203
  # is not thread-safe). Reference: https://stackoverflow.com/a/59635814
187
204
  return _create_aws_object(
188
- lambda: session(check_credentials=check_credentials).resource(
189
- service_name, **kwargs), 'resource')
205
+ lambda: session(check_credentials=check_credentials, profile=profile).
206
+ resource(service_name, **kwargs), 'resource')
190
207
 
191
208
 
192
209
  # New typing overloads can be added as needed.
@@ -221,14 +238,15 @@ def client(service_name: str, **kwargs):
221
238
  _assert_kwargs_builtin_type(kwargs)
222
239
 
223
240
  check_credentials = kwargs.pop('check_credentials', True)
241
+ profile = get_workspace_profile()
224
242
 
225
243
  # Need to use the client retrieved from the per-thread session to avoid
226
244
  # thread-safety issues (Directly creating the client with boto3.client() is
227
245
  # not thread-safe). Reference: https://stackoverflow.com/a/59635814
228
246
 
229
247
  return _create_aws_object(
230
- lambda: session(check_credentials=check_credentials).client(
231
- service_name, **kwargs), 'client')
248
+ lambda: session(check_credentials=check_credentials, profile=profile).
249
+ client(service_name, **kwargs), 'client')
232
250
 
233
251
 
234
252
  @common.load_lazy_modules(modules=_LAZY_MODULES)
@@ -0,0 +1,278 @@
1
+ """CoreWeave cloud adaptor."""
2
+
3
+ import configparser
4
+ import contextlib
5
+ import os
6
+ import threading
7
+ from typing import Dict, Optional, Tuple
8
+
9
+ from sky import exceptions
10
+ from sky import sky_logging
11
+ from sky.adaptors import common
12
+ from sky.clouds import cloud
13
+ from sky.utils import annotations
14
+ from sky.utils import ux_utils
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ COREWEAVE_PROFILE_NAME = 'cw'
19
+ COREWEAVE_CREDENTIALS_PATH = '~/.coreweave/cw.credentials'
20
+ COREWEAVE_CONFIG_PATH = '~/.coreweave/cw.config'
21
+ NAME = 'CoreWeave'
22
+ DEFAULT_REGION = 'US-EAST-01A'
23
+ _DEFAULT_ENDPOINT = 'https://cwobject.com'
24
+ _INDENT_PREFIX = ' '
25
+
26
+ _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for CoreWeave.'
27
+ 'Try pip install "skypilot[coreweave]"')
28
+
29
+ boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
30
+ botocore = common.LazyImport('botocore',
31
+ import_error_message=_IMPORT_ERROR_MESSAGE)
32
+
33
+ _LAZY_MODULES = (boto3, botocore)
34
+ _session_creation_lock = threading.RLock()
35
+
36
+
37
+ @contextlib.contextmanager
38
+ def _load_cw_credentials_env():
39
+ """Context manager to temporarily change the AWS credentials file path."""
40
+ prev_credentials_path = os.environ.get('AWS_SHARED_CREDENTIALS_FILE')
41
+ prev_config_path = os.environ.get('AWS_CONFIG_FILE')
42
+ os.environ['AWS_SHARED_CREDENTIALS_FILE'] = COREWEAVE_CREDENTIALS_PATH
43
+ os.environ['AWS_CONFIG_FILE'] = COREWEAVE_CONFIG_PATH
44
+ try:
45
+ yield
46
+ finally:
47
+ if prev_credentials_path is None:
48
+ del os.environ['AWS_SHARED_CREDENTIALS_FILE']
49
+ else:
50
+ os.environ['AWS_SHARED_CREDENTIALS_FILE'] = prev_credentials_path
51
+ if prev_config_path is None:
52
+ del os.environ['AWS_CONFIG_FILE']
53
+ else:
54
+ os.environ['AWS_CONFIG_FILE'] = prev_config_path
55
+
56
+
57
+ def get_coreweave_credentials(boto3_session):
58
+ """Gets the CoreWeave credentials from the boto3 session object.
59
+
60
+ Args:
61
+ boto3_session: The boto3 session object.
62
+ Returns:
63
+ botocore.credentials.ReadOnlyCredentials object with the CoreWeave
64
+ credentials.
65
+ """
66
+ with _load_cw_credentials_env():
67
+ coreweave_credentials = boto3_session.get_credentials()
68
+ if coreweave_credentials is None:
69
+ with ux_utils.print_exception_no_traceback():
70
+ raise ValueError('CoreWeave credentials not found. Run '
71
+ '`sky check` to verify credentials are '
72
+ 'correctly set up.')
73
+ return coreweave_credentials.get_frozen_credentials()
74
+
75
+
76
+ @annotations.lru_cache(scope='global')
77
+ def session():
78
+ """Create an AWS session for CoreWeave."""
79
+ # Creating the session object is not thread-safe for boto3,
80
+ # so we add a reentrant lock to synchronize the session creation.
81
+ # Reference: https://github.com/boto/boto3/issues/1592
82
+ # However, the session object itself is thread-safe, so we are
83
+ # able to use lru_cache() to cache the session object.
84
+ with _session_creation_lock:
85
+ with _load_cw_credentials_env():
86
+ session_ = boto3.session.Session(
87
+ profile_name=COREWEAVE_PROFILE_NAME)
88
+ return session_
89
+
90
+
91
+ @annotations.lru_cache(scope='global')
92
+ def resource(resource_name: str, **kwargs):
93
+ """Create a CoreWeave resource.
94
+
95
+ Args:
96
+ resource_name: CoreWeave resource name (e.g., 's3').
97
+ kwargs: Other options.
98
+ """
99
+ # Need to use the resource retrieved from the per-thread session
100
+ # to avoid thread-safety issues (Directly creating the client
101
+ # with boto3.resource() is not thread-safe).
102
+ # Reference: https://stackoverflow.com/a/59635814
103
+
104
+ session_ = session()
105
+ coreweave_credentials = get_coreweave_credentials(session_)
106
+ endpoint = get_endpoint()
107
+
108
+ return session_.resource(
109
+ resource_name,
110
+ endpoint_url=endpoint,
111
+ aws_access_key_id=coreweave_credentials.access_key,
112
+ aws_secret_access_key=coreweave_credentials.secret_key,
113
+ region_name='auto',
114
+ config=botocore.config.Config(s3={'addressing_style': 'virtual'}),
115
+ **kwargs)
116
+
117
+
118
+ @annotations.lru_cache(scope='global')
119
+ def client(service_name: str):
120
+ """Create CoreWeave client of a certain service.
121
+
122
+ Args:
123
+ service_name: CoreWeave service name (e.g., 's3').
124
+ """
125
+ # Need to use the client retrieved from the per-thread session
126
+ # to avoid thread-safety issues (Directly creating the client
127
+ # with boto3.client() is not thread-safe).
128
+ # Reference: https://stackoverflow.com/a/59635814
129
+
130
+ session_ = session()
131
+ coreweave_credentials = get_coreweave_credentials(session_)
132
+ endpoint = get_endpoint()
133
+
134
+ return session_.client(
135
+ service_name,
136
+ endpoint_url=endpoint,
137
+ aws_access_key_id=coreweave_credentials.access_key,
138
+ aws_secret_access_key=coreweave_credentials.secret_key,
139
+ region_name='auto',
140
+ config=botocore.config.Config(s3={'addressing_style': 'virtual'}),
141
+ )
142
+
143
+
144
+ @common.load_lazy_modules(_LAZY_MODULES)
145
+ def botocore_exceptions():
146
+ """AWS botocore exception."""
147
+ # pylint: disable=import-outside-toplevel
148
+ from botocore import exceptions as boto_exceptions
149
+ return boto_exceptions
150
+
151
+
152
+ def get_endpoint():
153
+ """Parse the COREWEAVE_CONFIG_PATH to get the endpoint_url.
154
+
155
+ The config file is an AWS-style config file with format:
156
+ [profile cw]
157
+ endpoint_url = https://cwobject.com
158
+ s3 =
159
+ addressing_style = virtual
160
+
161
+ Returns:
162
+ str: The endpoint URL from the config file, or the default endpoint
163
+ if the file doesn't exist or doesn't contain the endpoint_url.
164
+ """
165
+ config_path = os.path.expanduser(COREWEAVE_CONFIG_PATH)
166
+ if not os.path.isfile(config_path):
167
+ return _DEFAULT_ENDPOINT
168
+
169
+ try:
170
+ config = configparser.ConfigParser()
171
+ config.read(config_path)
172
+
173
+ # Try to get endpoint_url from [profile cw] section
174
+ profile_section = f'profile {COREWEAVE_PROFILE_NAME}'
175
+ if config.has_section(profile_section):
176
+ if config.has_option(profile_section, 'endpoint_url'):
177
+ endpoint = config.get(profile_section, 'endpoint_url')
178
+ return endpoint.strip()
179
+ except (configparser.Error, OSError) as e:
180
+ logger.warning(f'Failed to parse CoreWeave config file: {e}. '
181
+ f'Using default endpoint: {_DEFAULT_ENDPOINT}')
182
+
183
+ return _DEFAULT_ENDPOINT
184
+
185
+
186
+ def check_credentials(
187
+ cloud_capability: cloud.CloudCapability) -> Tuple[bool, Optional[str]]:
188
+ if cloud_capability == cloud.CloudCapability.STORAGE:
189
+ return check_storage_credentials()
190
+ else:
191
+ raise exceptions.NotSupportedError(
192
+ f'{NAME} does not support {cloud_capability}.')
193
+
194
+
195
+ def check_storage_credentials() -> Tuple[bool, Optional[str]]:
196
+ """Checks if the user has access credentials to CoreWeave Object Storage.
197
+
198
+ Returns:
199
+ A tuple of a boolean value and a hint message where the bool
200
+ is True when both credentials needed for CoreWeave storage is set.
201
+ It is False when either of those are not set, which would hint with a
202
+ string on unset credential.
203
+ """
204
+ hints = None
205
+ profile_in_cred = coreweave_profile_in_cred()
206
+ profile_in_config = coreweave_profile_in_config()
207
+
208
+ if not profile_in_cred:
209
+ hints = (f'[{COREWEAVE_PROFILE_NAME}] profile is not set in '
210
+ f'{COREWEAVE_CREDENTIALS_PATH}.')
211
+ if not profile_in_config:
212
+ if hints:
213
+ hints += ' Additionally, '
214
+ else:
215
+ hints = ''
216
+ hints += (f'[{COREWEAVE_PROFILE_NAME}] profile is not set in '
217
+ f'{COREWEAVE_CONFIG_PATH}.')
218
+
219
+ if hints:
220
+ hints += ' Run the following commands:'
221
+ if not profile_in_cred:
222
+ hints += f'\n{_INDENT_PREFIX} $ pip install boto3'
223
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_SHARED_CREDENTIALS_FILE='
224
+ f'{COREWEAVE_CREDENTIALS_PATH} aws configure --profile '
225
+ f'{COREWEAVE_PROFILE_NAME}')
226
+ if not profile_in_config:
227
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_CONFIG_FILE='
228
+ f'{COREWEAVE_CONFIG_PATH} aws configure set endpoint_url'
229
+ f' <ENDPOINT_URL> --profile '
230
+ f'{COREWEAVE_PROFILE_NAME}')
231
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_CONFIG_FILE='
232
+ f'{COREWEAVE_CONFIG_PATH} aws configure set '
233
+ f's3.addressing_style virtual --profile '
234
+ f'{COREWEAVE_PROFILE_NAME}')
235
+ hints += f'\n{_INDENT_PREFIX}For more info: '
236
+ hints += 'https://docs.coreweave.com/docs/products/storage/object-storage/get-started-caios' # pylint: disable=line-too-long
237
+
238
+ return (False, hints) if hints else (True, hints)
239
+
240
+
241
+ def coreweave_profile_in_config() -> bool:
242
+ """Checks if CoreWeave profile is set in config"""
243
+ conf_path = os.path.expanduser(COREWEAVE_CONFIG_PATH)
244
+ coreweave_profile_exists = False
245
+ if os.path.isfile(conf_path):
246
+ with open(conf_path, 'r', encoding='utf-8') as file:
247
+ for line in file:
248
+ if f'[profile {COREWEAVE_PROFILE_NAME}]' in line:
249
+ coreweave_profile_exists = True
250
+ break
251
+ return coreweave_profile_exists
252
+
253
+
254
+ def coreweave_profile_in_cred() -> bool:
255
+ """Checks if CoreWeave profile is set in credentials"""
256
+ cred_path = os.path.expanduser(COREWEAVE_CREDENTIALS_PATH)
257
+ coreweave_profile_exists = False
258
+ if os.path.isfile(cred_path):
259
+ with open(cred_path, 'r', encoding='utf-8') as file:
260
+ for line in file:
261
+ if f'[{COREWEAVE_PROFILE_NAME}]' in line:
262
+ coreweave_profile_exists = True
263
+ break
264
+ return coreweave_profile_exists
265
+
266
+
267
+ def get_credential_file_mounts() -> Dict[str, str]:
268
+ """Returns credential file mounts for CoreWeave.
269
+
270
+ Returns:
271
+ Dict[str, str]: A dictionary mapping source paths to destination paths
272
+ for credential files.
273
+ """
274
+ coreweave_credential_mounts = {
275
+ COREWEAVE_CREDENTIALS_PATH: COREWEAVE_CREDENTIALS_PATH,
276
+ COREWEAVE_CONFIG_PATH: COREWEAVE_CONFIG_PATH
277
+ }
278
+ return coreweave_credential_mounts
@@ -3157,6 +3157,7 @@ def get_clusters(
3157
3157
  all_users: bool = True,
3158
3158
  include_credentials: bool = False,
3159
3159
  summary_response: bool = False,
3160
+ include_handle: bool = True,
3160
3161
  # Internal only:
3161
3162
  # pylint: disable=invalid-name
3162
3163
  _include_is_managed: bool = False,
@@ -3240,13 +3241,13 @@ def get_clusters(
3240
3241
  """Add resource str to record"""
3241
3242
  for record in _get_records_with_handle(records):
3242
3243
  handle = record['handle']
3243
- record[
3244
- 'resources_str'] = resources_utils.get_readable_resources_repr(
3245
- handle, simplify=True)
3246
- record[
3247
- 'resources_str_full'] = resources_utils.get_readable_resources_repr(
3248
- handle, simplify=False)
3244
+ resource_str_simple, resource_str_full = (
3245
+ resources_utils.get_readable_resources_repr(
3246
+ handle, simplified_only=summary_response))
3247
+ record['resources_str'] = resource_str_simple
3249
3248
  if not summary_response:
3249
+ assert resource_str_full is not None
3250
+ record['resources_str_full'] = resource_str_full
3250
3251
  record['cluster_name_on_cloud'] = handle.cluster_name_on_cloud
3251
3252
 
3252
3253
  def _update_records_with_credentials(
@@ -3313,6 +3314,8 @@ def get_clusters(
3313
3314
  record['accelerators'] = (
3314
3315
  f'{handle.launched_resources.accelerators}'
3315
3316
  if handle.launched_resources.accelerators else None)
3317
+ if not include_handle:
3318
+ record.pop('handle', None)
3316
3319
 
3317
3320
  # Add handle info to the records
3318
3321
  _update_records_with_handle_info(records)
@@ -2369,9 +2369,8 @@ class RetryingVmProvisioner(object):
2369
2369
  for (resource, exception) in resource_exceptions.items():
2370
2370
  table.add_row([
2371
2371
  resource.infra.formatted_str(),
2372
- resources_utils.format_resource(resource,
2373
- simplify=True),
2374
- exception
2372
+ resources_utils.format_resource(
2373
+ resource, simplified_only=True)[0], exception
2375
2374
  ])
2376
2375
  # Set the max width of REASON column to 80 to avoid the table
2377
2376
  # being wrapped in a unreadable way.
sky/check.py CHANGED
@@ -14,6 +14,7 @@ from sky import global_user_state
14
14
  from sky import sky_logging
15
15
  from sky import skypilot_config
16
16
  from sky.adaptors import cloudflare
17
+ from sky.adaptors import coreweave
17
18
  from sky.clouds import cloud as sky_cloud
18
19
  from sky.skylet import constants
19
20
  from sky.utils import common_utils
@@ -33,7 +34,8 @@ def _get_workspace_allowed_clouds(workspace: str) -> List[str]:
33
34
  # clouds. Also validate names with get_cloud_tuple.
34
35
  config_allowed_cloud_names = skypilot_config.get_nested(
35
36
  ('allowed_clouds',),
36
- [repr(c) for c in registry.CLOUD_REGISTRY.values()] + [cloudflare.NAME])
37
+ [repr(c) for c in registry.CLOUD_REGISTRY.values()] +
38
+ [cloudflare.NAME, coreweave.NAME])
37
39
  # filter out the clouds that are disabled in the workspace config
38
40
  workspace_disabled_clouds = []
39
41
  for cloud in config_allowed_cloud_names:
@@ -81,7 +83,7 @@ def check_capabilities(
81
83
 
82
84
  def get_all_clouds() -> Tuple[str, ...]:
83
85
  return tuple([repr(c) for c in registry.CLOUD_REGISTRY.values()] +
84
- [cloudflare.NAME])
86
+ [cloudflare.NAME, coreweave.NAME])
85
87
 
86
88
  def _execute_check_logic_for_workspace(
87
89
  current_workspace_name: str,
@@ -121,9 +123,12 @@ def check_capabilities(
121
123
  cloud_name: str
122
124
  ) -> Tuple[str, Union[sky_clouds.Cloud, ModuleType]]:
123
125
  # Validates cloud_name and returns a tuple of the cloud's name and
124
- # the cloud object. Includes special handling for Cloudflare.
126
+ # the cloud object. Includes special handling for Cloudflare and
127
+ # CoreWeave.
125
128
  if cloud_name.lower().startswith('cloudflare'):
126
129
  return cloudflare.NAME, cloudflare
130
+ elif cloud_name.lower().startswith('coreweave'):
131
+ return coreweave.NAME, coreweave
127
132
  else:
128
133
  cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud_name)
129
134
  assert cloud_obj is not None, f'Cloud {cloud_name!r} not found'
@@ -219,23 +224,24 @@ def check_capabilities(
219
224
  # allowed_clouds in config.yaml, it will be disabled.
220
225
  all_enabled_clouds: Set[str] = set()
221
226
  for capability in capabilities:
222
- # Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and
223
- # should not be inserted into the DB (otherwise `sky launch` and
224
- # other code would error out when it's trying to look it up in the
225
- # registry).
227
+ # Cloudflare and CoreWeave are not real clouds in
228
+ # registry.CLOUD_REGISTRY, and should not be inserted into the DB
229
+ # (otherwise `sky launch` and other code would error out when it's
230
+ # trying to look it up in the registry).
226
231
  enabled_clouds_set = {
227
232
  cloud for cloud, capabilities in enabled_clouds.items()
228
- if capability in capabilities and
229
- not cloud.startswith('Cloudflare')
233
+ if capability in capabilities and not cloud.startswith(
234
+ 'Cloudflare') and not cloud.startswith('CoreWeave')
230
235
  }
231
236
  disabled_clouds_set = {
232
237
  cloud for cloud, capabilities in disabled_clouds.items()
233
- if capability in capabilities and
234
- not cloud.startswith('Cloudflare')
238
+ if capability in capabilities and not cloud.startswith(
239
+ 'Cloudflare') and not cloud.startswith('CoreWeave')
235
240
  }
236
241
  config_allowed_clouds_set = {
237
242
  cloud for cloud in config_allowed_cloud_names
238
- if not cloud.startswith('Cloudflare')
243
+ if not cloud.startswith('Cloudflare') and
244
+ not cloud.startswith('CoreWeave')
239
245
  }
240
246
  previously_enabled_clouds_set = {
241
247
  repr(cloud)
@@ -430,6 +436,12 @@ def get_cloud_credential_file_mounts(
430
436
  if r2_is_enabled:
431
437
  r2_credential_mounts = cloudflare.get_credential_file_mounts()
432
438
  file_mounts.update(r2_credential_mounts)
439
+
440
+ # Similarly, handle CoreWeave storage credentials
441
+ coreweave_is_enabled, _ = coreweave.check_storage_credentials()
442
+ if coreweave_is_enabled:
443
+ coreweave_credential_mounts = coreweave.get_credential_file_mounts()
444
+ file_mounts.update(coreweave_credential_mounts)
433
445
  return file_mounts
434
446
 
435
447
 
@@ -494,7 +506,7 @@ def _print_checked_cloud(
494
506
  style_str = f'{colorama.Fore.GREEN}{colorama.Style.NORMAL}'
495
507
  status_msg = 'enabled'
496
508
  capability_string = f'[{", ".join(enabled_capabilities)}]'
497
- if verbose and cloud is not cloudflare:
509
+ if verbose and cloud is not cloudflare and cloud is not coreweave:
498
510
  activated_account = cloud.get_active_user_identity_str()
499
511
  if isinstance(cloud_tuple[1], (sky_clouds.SSH, sky_clouds.Kubernetes)):
500
512
  detail_string = _format_context_details(cloud_tuple[1],
sky/client/cli/command.py CHANGED
@@ -1383,7 +1383,26 @@ def _handle_jobs_queue_request(
1383
1383
  try:
1384
1384
  if not is_called_by_user:
1385
1385
  usage_lib.messages.usage.set_internal()
1386
- result = sdk.stream_and_get(request_id)
1386
+ # Call both stream_and_get functions in parallel
1387
+ def get_jobs_queue_result():
1388
+ return sdk.stream_and_get(request_id)
1389
+
1390
+ def get_pool_status_result():
1391
+ if pool_status_request_id is not None:
1392
+ try:
1393
+ return sdk.stream_and_get(pool_status_request_id)
1394
+ except Exception: # pylint: disable=broad-except
1395
+ # If getting pool status fails, just continue without it
1396
+ return None
1397
+ return None
1398
+
1399
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
1400
+ jobs_future = executor.submit(get_jobs_queue_result)
1401
+ pool_status_future = executor.submit(get_pool_status_result)
1402
+
1403
+ result = jobs_future.result()
1404
+ pool_status_result = pool_status_future.result()
1405
+
1387
1406
  if isinstance(result, tuple):
1388
1407
  managed_jobs_, total, status_counts, _ = result
1389
1408
  if only_in_progress:
@@ -1400,13 +1419,6 @@ def _handle_jobs_queue_request(
1400
1419
  managed_jobs_ = result
1401
1420
  num_in_progress_jobs = len(
1402
1421
  set(job['job_id'] for job in managed_jobs_))
1403
- # Try to get pool status if request was made
1404
- if pool_status_request_id is not None:
1405
- try:
1406
- pool_status_result = sdk.stream_and_get(pool_status_request_id)
1407
- except Exception: # pylint: disable=broad-except
1408
- # If getting pool status fails, just continue without it
1409
- pool_status_result = None
1410
1422
  except exceptions.ClusterNotUpError as e:
1411
1423
  controller_status = e.cluster_status
1412
1424
  msg = str(e)
@@ -3452,7 +3464,7 @@ def _down_or_stop_clusters(
3452
3464
  click.echo(f' {name} ({first})')
3453
3465
 
3454
3466
  if failures:
3455
- raise click.ClickException('Cluster(s) failed. See details above.')
3467
+ click.echo('Cluster(s) failed. See details above.')
3456
3468
 
3457
3469
 
3458
3470
  @cli.command(cls=_DocumentedCodeCommand)
@@ -4253,6 +4265,10 @@ def volumes():
4253
4265
  pass
4254
4266
 
4255
4267
 
4268
+ # Add 'volume' as an alias for 'volumes'
4269
+ cli.add_command(volumes, name='volume')
4270
+
4271
+
4256
4272
  @volumes.command('apply', cls=_DocumentedCodeCommand)
4257
4273
  @flags.config_option(expose_value=False)
4258
4274
  @click.argument('entrypoint',
@@ -4661,7 +4677,8 @@ def jobs_launch(
4661
4677
  else:
4662
4678
  # TODO(tian): This can be very long. Considering have a "group id"
4663
4679
  # and query all job ids with the same group id.
4664
- job_ids_str = ','.join(map(str, job_ids))
4680
+ # Sort job ids to ensure consistent ordering.
4681
+ job_ids_str = ','.join(map(str, sorted(job_ids)))
4665
4682
  click.secho(
4666
4683
  f'Jobs submitted with IDs: {colorama.Fore.CYAN}'
4667
4684
  f'{job_ids_str}{colorama.Style.RESET_ALL}.'
@@ -4775,19 +4792,28 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool,
4775
4792
  fields = fields + _USER_NAME_FIELD
4776
4793
  if verbose:
4777
4794
  fields = fields + _USER_HASH_FIELD
4778
- managed_jobs_request_id = managed_jobs.queue(
4779
- refresh=refresh,
4780
- skip_finished=skip_finished,
4781
- all_users=all_users,
4782
- limit=max_num_jobs_to_show,
4783
- fields=fields)
4784
- # Try to get pool status for worker information
4785
- pool_status_request_id = None
4786
- try:
4787
- pool_status_request_id = managed_jobs.pool_status(pool_names=None)
4788
- except Exception: # pylint: disable=broad-except
4789
- # If pool_status fails, we'll just skip the worker information
4790
- pass
4795
+ # Call both managed_jobs.queue and managed_jobs.pool_status in parallel
4796
+ def get_managed_jobs_queue():
4797
+ return managed_jobs.queue(refresh=refresh,
4798
+ skip_finished=skip_finished,
4799
+ all_users=all_users,
4800
+ limit=max_num_jobs_to_show,
4801
+ fields=fields)
4802
+
4803
+ def get_pool_status():
4804
+ try:
4805
+ return managed_jobs.pool_status(pool_names=None)
4806
+ except Exception: # pylint: disable=broad-except
4807
+ # If pool_status fails, we'll just skip the worker information
4808
+ return None
4809
+
4810
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
4811
+ managed_jobs_future = executor.submit(get_managed_jobs_queue)
4812
+ pool_status_future = executor.submit(get_pool_status)
4813
+
4814
+ managed_jobs_request_id = managed_jobs_future.result()
4815
+ pool_status_request_id = pool_status_future.result()
4816
+
4791
4817
  num_jobs, msg = _handle_jobs_queue_request(
4792
4818
  managed_jobs_request_id,
4793
4819
  pool_status_request_id=pool_status_request_id,
@@ -6364,7 +6390,9 @@ INT_OR_NONE = IntOrNone()
6364
6390
  is_flag=True,
6365
6391
  default=False,
6366
6392
  required=False,
6367
- help='Show requests of all statuses.')
6393
+ help=('Show requests of all statuses, including finished ones '
6394
+ '(SUCCEEDED, FAILED, CANCELLED). By default, only active '
6395
+ 'requests (PENDING, RUNNING) are shown.'))
6368
6396
  @click.option(
6369
6397
  '--limit',
6370
6398
  '-l',