skypilot-nightly 1.0.0.dev20250117__py3-none-any.whl → 1.0.0.dev20250119__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/jobs/state.py +6 -0
- sky/provision/gcp/instance_utils.py +15 -9
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/RECORD +9 -9
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '2354b818b6faa06ac65e1eb5bfad7e2278ca43f8'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250119'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/jobs/state.py
CHANGED
@@ -313,6 +313,8 @@ _SPOT_STATUS_TO_COLOR = {
|
|
313
313
|
class ManagedJobScheduleState(enum.Enum):
|
314
314
|
"""Captures the state of the job from the scheduler's perspective.
|
315
315
|
|
316
|
+
A job that predates the introduction of the scheduler will be INVALID.
|
317
|
+
|
316
318
|
A newly created job will be INACTIVE. The following transitions are valid:
|
317
319
|
- INACTIVE -> WAITING: The job is "submitted" to the scheduler, and its job
|
318
320
|
controller can be started.
|
@@ -342,6 +344,10 @@ class ManagedJobScheduleState(enum.Enum):
|
|
342
344
|
briefly observe inconsistent states, like a job that just finished but
|
343
345
|
hasn't yet transitioned to DONE.
|
344
346
|
"""
|
347
|
+
# This job may have been created before scheduler was introduced in #4458.
|
348
|
+
# This state is not used by scheduler but just for backward compatibility.
|
349
|
+
# TODO(cooperc): remove this in v0.11.0
|
350
|
+
INVALID = None
|
345
351
|
# The job should be ignored by the scheduler.
|
346
352
|
INACTIVE = 'INACTIVE'
|
347
353
|
# The job is waiting to transition to LAUNCHING for the first time. The
|
@@ -38,7 +38,7 @@ _FIREWALL_RESOURCE_NOT_FOUND_PATTERN = re.compile(
|
|
38
38
|
r'The resource \'projects/.*/global/firewalls/.*\' was not found')
|
39
39
|
|
40
40
|
|
41
|
-
def
|
41
|
+
def _retry_on_gcp_http_exception(
|
42
42
|
regex: Optional[str] = None,
|
43
43
|
max_retries: int = GCP_MAX_RETRIES,
|
44
44
|
retry_interval_s: int = GCP_RETRY_INTERVAL_SECONDS,
|
@@ -49,17 +49,18 @@ def _retry_on_http_exception(
|
|
49
49
|
|
50
50
|
@functools.wraps(func)
|
51
51
|
def wrapper(*args, **kwargs):
|
52
|
-
exception_type = gcp.http_error_exception()
|
53
52
|
|
54
53
|
def try_catch_exc():
|
55
54
|
try:
|
56
55
|
value = func(*args, **kwargs)
|
57
56
|
return value
|
58
57
|
except Exception as e: # pylint: disable=broad-except
|
59
|
-
if
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
if (isinstance(e, gcp.http_error_exception()) and
|
59
|
+
(regex is None or re.search(regex, str(e)))):
|
60
|
+
logger.error(
|
61
|
+
f'Retrying for gcp.http_error_exception: {e}')
|
62
|
+
return e
|
63
|
+
raise
|
63
64
|
|
64
65
|
for _ in range(max_retries):
|
65
66
|
ret = try_catch_exc()
|
@@ -431,7 +432,7 @@ class GCPComputeInstance(GCPInstance):
|
|
431
432
|
logger.debug(
|
432
433
|
f'Waiting GCP operation {operation["name"]} to be ready ...')
|
433
434
|
|
434
|
-
@
|
435
|
+
@_retry_on_gcp_http_exception(
|
435
436
|
f'Failed to wait for operation {operation["name"]}')
|
436
437
|
def call_operation(fn, timeout: int):
|
437
438
|
request = fn(
|
@@ -613,6 +614,11 @@ class GCPComputeInstance(GCPInstance):
|
|
613
614
|
return operation
|
614
615
|
|
615
616
|
@classmethod
|
617
|
+
# When there is a cloud function running in parallel to set labels for
|
618
|
+
# newly created instances, it may fail with the following error:
|
619
|
+
# "Labels fingerprint either invalid or resource labels have changed"
|
620
|
+
# We should retry until the labels are set successfully.
|
621
|
+
@_retry_on_gcp_http_exception('Labels fingerprint either invalid')
|
616
622
|
def set_labels(cls, project_id: str, availability_zone: str, node_id: str,
|
617
623
|
labels: dict) -> None:
|
618
624
|
node = cls.load_resource().instances().get(
|
@@ -1211,7 +1217,7 @@ class GCPTPUVMInstance(GCPInstance):
|
|
1211
1217
|
"""Poll for TPU operation until finished."""
|
1212
1218
|
del project_id, region, zone # unused
|
1213
1219
|
|
1214
|
-
@
|
1220
|
+
@_retry_on_gcp_http_exception(
|
1215
1221
|
f'Failed to wait for operation {operation["name"]}')
|
1216
1222
|
def call_operation(fn, timeout: int):
|
1217
1223
|
request = fn(name=operation['name'])
|
@@ -1379,7 +1385,7 @@ class GCPTPUVMInstance(GCPInstance):
|
|
1379
1385
|
f'Failed to get VPC name for instance {instance}') from e
|
1380
1386
|
|
1381
1387
|
@classmethod
|
1382
|
-
@
|
1388
|
+
@_retry_on_gcp_http_exception('unable to queue the operation')
|
1383
1389
|
def set_labels(cls, project_id: str, availability_zone: str, node_id: str,
|
1384
1390
|
labels: dict) -> None:
|
1385
1391
|
while True:
|
{skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=aC5XkRJ6V-eJ02IR9TVtzIZdM6O3RIr3pg2xI_Qb3jM,5944
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=LXUDABKP1FJCS256xTTDJa40WXwHKF5x49S-4hZbD1M,21501
|
4
4
|
sky/check.py,sha256=s8deMVL-k9y8gd519K7NWZc3DqWsEySwiAr0uH3Vvcc,9459
|
@@ -102,7 +102,7 @@ sky/jobs/controller.py,sha256=Qv7vOj4OXkbnZF0F9wKrlJsYhkSNJjJ1Mgrn2FyQyaM,28250
|
|
102
102
|
sky/jobs/core.py,sha256=2_Q9thiBPnd3i2nDqyUtQY-dsGZ1kRgAdnLcXHoycYo,19938
|
103
103
|
sky/jobs/recovery_strategy.py,sha256=m-EA-MWXPFrgx2CYFPr6MmgeUoDTEBmY2xruD2PRSGY,26365
|
104
104
|
sky/jobs/scheduler.py,sha256=WAvNb8-vBk8q1zFordFdpH7gxqWDjPHDGZZay6aodOk,12028
|
105
|
-
sky/jobs/state.py,sha256=
|
105
|
+
sky/jobs/state.py,sha256=Cjv2UEKfk3j7enXaCkU9CDqsvUfYZ3FWnYEH5HMachs,38153
|
106
106
|
sky/jobs/utils.py,sha256=waKmLbUNRXeuYKBn_U7sekSFGAEgoPp9QemUULK4Y9k,49491
|
107
107
|
sky/jobs/dashboard/dashboard.py,sha256=KMSarpVcfnc-ELPFvy1M9_I1k4kSeXubTk3ibQC67Tg,3219
|
108
108
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
@@ -142,7 +142,7 @@ sky/provision/gcp/__init__.py,sha256=zlgjR2JoaGD7sStGStMRu9bJ62f-8NKEIyb-bFHBlzM
|
|
142
142
|
sky/provision/gcp/config.py,sha256=rNpnRFNZqqvEHjzjSdwMoI7Fq7RW9w_dL2vIaubj3Dc,33319
|
143
143
|
sky/provision/gcp/constants.py,sha256=9eLZatVSW2uGqxrvFGaWK2A_Ab0o_4S4GdgUuI7mfsk,7441
|
144
144
|
sky/provision/gcp/instance.py,sha256=AMnJz6xDwYqIHmDfJfENTG_ID6uhjD_2VhlWw8FJp_s,24934
|
145
|
-
sky/provision/gcp/instance_utils.py,sha256=
|
145
|
+
sky/provision/gcp/instance_utils.py,sha256=T0AVT8lMn128snPp3MvqmhXOihlZSC8-c1QpgYT4_FA,71377
|
146
146
|
sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
|
147
147
|
sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
|
148
148
|
sky/provision/kubernetes/config.py,sha256=bXwOGdSAnXCkDreew0KsSUqSv3ZrptNeevqat76LLts,29012
|
@@ -289,9 +289,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
|
|
289
289
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
290
290
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
291
291
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
292
|
-
skypilot_nightly-1.0.0.
|
293
|
-
skypilot_nightly-1.0.0.
|
294
|
-
skypilot_nightly-1.0.0.
|
295
|
-
skypilot_nightly-1.0.0.
|
296
|
-
skypilot_nightly-1.0.0.
|
297
|
-
skypilot_nightly-1.0.0.
|
292
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
293
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/METADATA,sha256=EMtxUVksl6PZt-fJt5k6PvgcWoaDL1pHzaQEc5K9iCU,20884
|
294
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
295
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
296
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
297
|
+
skypilot_nightly-1.0.0.dev20250119.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20250117.dist-info → skypilot_nightly-1.0.0.dev20250119.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|