skypilot-nightly 1.0.0.dev20250324__py3-none-any.whl → 1.0.0.dev20250326__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '633e16611f2f858dc27c9eae2f410811e0bc714c'
8
+ _SKYPILOT_COMMIT_SHA = '4f8721ab403668198ffd61297874c4c365e642b7'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250324'
38
+ __version__ = '1.0.0.dev20250326'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -3632,8 +3632,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3632
3632
  # should be higher priority than the cluster requests, and we should
3633
3633
  # release the lock from other requests.
3634
3634
  exclude_request_to_kill = 'sky.down' if terminate else 'sky.stop'
3635
- requests_lib.kill_cluster_requests(handle.cluster_name,
3636
- exclude_request_to_kill)
3635
+ try:
3636
+ # TODO(zhwu): we should get rid of this when it is being called
3637
+ # internally without involving an API server, e.g., when a
3638
+ # controller is trying to terminate a cluster.
3639
+ requests_lib.kill_cluster_requests(handle.cluster_name,
3640
+ exclude_request_to_kill)
3641
+ except Exception as e: # pylint: disable=broad-except
3642
+ # We allow the failure to kill other launch requests, because
3643
+ # it is not critical to the cluster teardown.
3644
+ logger.warning(
3645
+ 'Failed to kill other launch requests for the '
3646
+ f'cluster {handle.cluster_name}: '
3647
+ f'{common_utils.format_exception(e, use_bracket=True)}')
3637
3648
  try:
3638
3649
  with filelock.FileLock(
3639
3650
  lock_path,
@@ -4030,8 +4041,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4030
4041
  # the cluster is terminated/stopped. Otherwise, it will be quite
4031
4042
  # confusing to see the cluster restarted immediately after it is
4032
4043
  # terminated/stopped, when there is a pending launch request.
4033
- requests_lib.kill_cluster_requests(handle.cluster_name,
4034
- exclude_request_to_kill)
4044
+ try:
4045
+ # TODO(zhwu): we should get rid of this when it is being called
4046
+ # internally without involving an API server, e.g., when a
4047
+ # controller is trying to terminate a cluster.
4048
+ requests_lib.kill_cluster_requests(handle.cluster_name,
4049
+ exclude_request_to_kill)
4050
+ except Exception as e: # pylint: disable=broad-except
4051
+ # We allow the failure to kill other launch requests, because
4052
+ # it is not critical to the cluster teardown.
4053
+ logger.warning(
4054
+ 'Failed to kill other launch requests for the '
4055
+ f'cluster {handle.cluster_name}: '
4056
+ f'{common_utils.format_exception(e, use_bracket=True)}')
4035
4057
  cluster_status_fetched = False
4036
4058
  if refresh_cluster_status:
4037
4059
  try:
@@ -4358,7 +4380,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4358
4380
  # If cluster_yaml is None, the cluster should ensured to be terminated,
4359
4381
  # so we don't need to do the double check.
4360
4382
  if handle.cluster_yaml is not None:
4361
- _detect_abnormal_non_terminated_nodes(handle)
4383
+ try:
4384
+ _detect_abnormal_non_terminated_nodes(handle)
4385
+ except exceptions.ClusterStatusFetchingError as e:
4386
+ if purge:
4387
+ msg = common_utils.format_exception(e, use_bracket=True)
4388
+ logger.warning(
4389
+ 'Failed abnormal non-terminated nodes cleanup. '
4390
+ 'Skipping and cleaning up as purge is set. '
4391
+ f'Details: {msg}')
4392
+ logger.debug(f'Full exception details: {msg}',
4393
+ exc_info=True)
4394
+ else:
4395
+ raise
4362
4396
 
4363
4397
  if not terminate or remove_from_db:
4364
4398
  global_user_state.remove_cluster(handle.cluster_name,
@@ -182,13 +182,7 @@ VM_MINIMAL_PERMISSIONS = [
182
182
 
183
183
  STORAGE_MINIMAL_PERMISSIONS = [
184
184
  'storage.buckets.create',
185
- 'storage.buckets.get',
186
185
  'storage.buckets.delete',
187
- 'storage.objects.create',
188
- 'storage.objects.update',
189
- 'storage.objects.delete',
190
- 'storage.objects.get',
191
- 'storage.objects.list',
192
186
  ]
193
187
 
194
188
  # Permissions implied by GCP built-in roles. We hardcode these here, as we
@@ -396,7 +396,6 @@ pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
396
396
 
397
397
 
398
398
  def create_table(cursor, conn):
399
- del conn
400
399
  # Enable WAL mode to avoid locking issues.
401
400
  # See: issue #1441 and PR #1509
402
401
  # https://github.com/microsoft/WSL/issues/2395
@@ -428,6 +427,9 @@ def create_table(cursor, conn):
428
427
  {COL_USER_ID} TEXT,
429
428
  {COL_STATUS_MSG} TEXT)""")
430
429
 
430
+ db_utils.add_column_to_table(cursor, conn, REQUEST_TABLE, COL_STATUS_MSG,
431
+ 'TEXT')
432
+
431
433
 
432
434
  _DB = None
433
435
 
sky/utils/rich_utils.py CHANGED
@@ -144,15 +144,28 @@ class _RevertibleStatus:
144
144
  return _statuses[self.status_type]
145
145
 
146
146
  def __exit__(self, exc_type, exc_val, exc_tb):
147
- global _status_nesting_level
148
- _status_nesting_level -= 1
149
- if _status_nesting_level <= 0:
150
- _status_nesting_level = 0
151
- if _statuses[self.status_type] is not None:
152
- _statuses[self.status_type].__exit__(exc_type, exc_val, exc_tb)
153
- _statuses[self.status_type] = None
154
- else:
155
- _statuses[self.status_type].update(self.previous_message)
147
+ # We use the same lock with the `safe_logger` to avoid the following 2
148
+ # raice conditions. We refer loggers in another thread as "thread
149
+ # logger" hereafter.
150
+ # 1. When a thread logger stopped the status in `safe_logger`, and
151
+ # here we exit the status and set it to None. Then the thread logger
152
+ # will raise an error when it tries to restart the status.
153
+ # 2. When a thread logger stopped the status in `safe_logger`, and
154
+ # here we exit the status and entered a new one. Then the thread
155
+ # logger will raise an error when it tries to restart the old status,
156
+ # since only one LiveStatus can be started at the same time.
157
+ # Please refer to #4995 for more information.
158
+ with _logging_lock:
159
+ global _status_nesting_level
160
+ _status_nesting_level -= 1
161
+ if _status_nesting_level <= 0:
162
+ _status_nesting_level = 0
163
+ if _statuses[self.status_type] is not None:
164
+ _statuses[self.status_type].__exit__(
165
+ exc_type, exc_val, exc_tb)
166
+ _statuses[self.status_type] = None
167
+ else:
168
+ _statuses[self.status_type].update(self.previous_message)
156
169
 
157
170
  def update(self, *args, **kwargs):
158
171
  _statuses[self.status_type].update(*args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250324
3
+ Version: 1.0.0.dev20250326
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -193,6 +193,10 @@ Dynamic: summary
193
193
  <img alt="Downloads" src="https://img.shields.io/pypi/dm/skypilot">
194
194
  </a>
195
195
 
196
+ <a href="https://buildkite.com/skypilot-1/full-smoke-tests-run">
197
+ <img alt="Smoke Tests" src="https://badge.buildkite.com/d3aa9d2370e4a9ac4fb5e210381f955082a63a9a46673b197a.svg?theme=github&branch=master">
198
+ </a>
199
+
196
200
  </p>
197
201
 
198
202
  <h3 align="center">
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=FZLm0eOUqmRoDKokxdSzkA-KffEhUH6hS9QYWsjROyY,6428
1
+ sky/__init__.py,sha256=ZQgnwZFx1OI8QGgvePoEaTVl6x9kc5Opq9Vsu0qdHWQ,6428
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=QWybUjX-O6TXzCDSu2vtSgyeQb-OY9B6gDpT-jwYK9I,22700
4
4
  sky/check.py,sha256=iMGuM7yjUPRgDHc13Pf1_LSybBqIexq-6aXfnVcaI54,15898
@@ -34,7 +34,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
34
34
  sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
35
35
  sky/backends/backend.py,sha256=4BOqKZ-bwBTpjNnZF4JAHX2m2Iga7EmEn8Ao3tEivaM,7527
36
36
  sky/backends/backend_utils.py,sha256=ndY4IPs1F9QovyiKAnB1FNYGWm52_ylwf_K7wY50cv0,134922
37
- sky/backends/cloud_vm_ray_backend.py,sha256=zoLk0j6o84fZpGOupRv5mOpDKRf7coPrMMeHCLHdE8w,246263
37
+ sky/backends/cloud_vm_ray_backend.py,sha256=8uMkhOzJqeHDkh1212RUe8HrT5fLYTdxe-beTXgygR8,248154
38
38
  sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
39
39
  sky/backends/local_docker_backend.py,sha256=nSYCjms3HOPjPNOrcCqsUKm1WV3AAovRFjEQ7hcEXW4,17021
40
40
  sky/backends/wheel_utils.py,sha256=meypuMaygSXXjGdXfq6dhWl-OrpAybg9KVRoup4D0wU,9098
@@ -155,7 +155,7 @@ sky/provision/fluidstack/fluidstack_utils.py,sha256=NdhQcwhIPGT21g7lQR-t6j-1zTA_
155
155
  sky/provision/fluidstack/instance.py,sha256=TCGLojd5mEuEaUQ1BnmRvXMOSSBjltyf7dhPG3OLdgQ,13787
156
156
  sky/provision/gcp/__init__.py,sha256=zlgjR2JoaGD7sStGStMRu9bJ62f-8NKEIyb-bFHBlzM,528
157
157
  sky/provision/gcp/config.py,sha256=kU357o4tCTuQ2e0Gind5q_tC0kFRW2tch-NQ2DnwN9Q,33319
158
- sky/provision/gcp/constants.py,sha256=KbnAupsKCp_mNNOas62-yiCJcqYoaer7iMljFq-CPfk,7739
158
+ sky/provision/gcp/constants.py,sha256=G4Q5zUantKFEnkt9DG1-in3q_KtD7JObRFTw1kEFDWA,7567
159
159
  sky/provision/gcp/instance.py,sha256=47jDHLbIAI5M1MZIQTCiKGfwc9QzPOyjApkShqBRczE,25035
160
160
  sky/provision/gcp/instance_utils.py,sha256=T0AVT8lMn128snPp3MvqmhXOihlZSC8-c1QpgYT4_FA,71377
161
161
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
@@ -239,7 +239,7 @@ sky/server/requests/event_loop.py,sha256=OhpPbuce65bbjpGRlcJa78AVnYSm08SzFKt70yp
239
239
  sky/server/requests/executor.py,sha256=BNJqkTQ3swYeRO5YVW-dTmobL2CYnDDf_m-kY7__n40,21684
240
240
  sky/server/requests/payloads.py,sha256=6egfR6QSpd1WZP7K0joCQOrA0K6n7iTQbKL02T_RCOM,16494
241
241
  sky/server/requests/preconditions.py,sha256=ipxIb_3JXG6S3-ymcOdqQNb7VDvoPqADxu9ZK7-nQWc,7179
242
- sky/server/requests/requests.py,sha256=Sys2rg22rIXn7SrHfKzDVuTjBdRlm5oZk58u1UmS6JA,21231
242
+ sky/server/requests/requests.py,sha256=9ovdQE-zv_Mvc6IbGATHVyQlOxSKjg_OankZbgDVGeE,21338
243
243
  sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
244
244
  sky/server/requests/queues/mp_queue.py,sha256=_7AFas__0b1L8e7Bwy4lu0VYU18R85YwMlDHPhQCfh0,2998
245
245
  sky/server/requests/serializers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -326,7 +326,7 @@ sky/utils/message_utils.py,sha256=zi2Z7PEX6Xq_zvho-aEZe_J7UvpKOLdVDdGAcipRQPU,26
326
326
  sky/utils/registry.py,sha256=sH_VBupeczMHJIQMXPFv9jNMqE_ZF1ytOUcDFGdHjxA,4132
327
327
  sky/utils/resources_utils.py,sha256=URp6OS9B9nc9tIB5ibZCgGK4XSABmI4kRG0wOM6qgvs,7774
328
328
  sky/utils/rich_console_utils.py,sha256=wPvAlshaFHuMZSjiDnaK3OSBppZLBjAn-lj7AvxNBQk,553
329
- sky/utils/rich_utils.py,sha256=7QknWkr5uRUawabEYM41Lf5CBHLdPGpTAdOfmy40_S4,12126
329
+ sky/utils/rich_utils.py,sha256=PK5nVUFRMvg7ngYoIxZU6Ldcyiy3VfXriOLgNFkv53g,12980
330
330
  sky/utils/schemas.py,sha256=KJCHrn1nMZ3XqzddWuu_nFQoRQw01cZh9qh19OrRtps,30145
331
331
  sky/utils/status_lib.py,sha256=zn_MSuRYQdNKF8pnFOGQ54X_s_R7dyqWS6Q3a9zENw8,1512
332
332
  sky/utils/subprocess_utils.py,sha256=yM2WumV49gSKuZs0v6E3R8XKl5Q9b6veIzi6us5ORU8,15927
@@ -348,9 +348,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
348
348
  sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=NtfbovAECN2A_R8x4FiNPaBVPWeuwaOVuuGsnVOSk2g,10231
349
349
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
350
350
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
351
- skypilot_nightly-1.0.0.dev20250324.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
352
- skypilot_nightly-1.0.0.dev20250324.dist-info/METADATA,sha256=wJClvsEDKwrcbhu1y2kadg0B6iGbfknPR8dJAaJjKpA,18438
353
- skypilot_nightly-1.0.0.dev20250324.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
354
- skypilot_nightly-1.0.0.dev20250324.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
355
- skypilot_nightly-1.0.0.dev20250324.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
356
- skypilot_nightly-1.0.0.dev20250324.dist-info/RECORD,,
351
+ skypilot_nightly-1.0.0.dev20250326.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
352
+ skypilot_nightly-1.0.0.dev20250326.dist-info/METADATA,sha256=V5UlwHyFmeBcPPaUBEzuhCvdyoXArUCqy4zdQrckbMg,18657
353
+ skypilot_nightly-1.0.0.dev20250326.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
354
+ skypilot_nightly-1.0.0.dev20250326.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
355
+ skypilot_nightly-1.0.0.dev20250326.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
356
+ skypilot_nightly-1.0.0.dev20250326.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.3)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5