skypilot-nightly 1.0.0.dev20250117__py3-none-any.whl → 1.0.0.dev20250119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '9e1b4ddc5fb1cb3fd6c00c106555b919e449e2c9'
8
+ _SKYPILOT_COMMIT_SHA = '2354b818b6faa06ac65e1eb5bfad7e2278ca43f8'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250117'
38
+ __version__ = '1.0.0.dev20250119'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/jobs/state.py CHANGED
@@ -313,6 +313,8 @@ _SPOT_STATUS_TO_COLOR = {
313
313
  class ManagedJobScheduleState(enum.Enum):
314
314
  """Captures the state of the job from the scheduler's perspective.
315
315
 
316
+ A job that predates the introduction of the scheduler will be INVALID.
317
+
316
318
  A newly created job will be INACTIVE. The following transitions are valid:
317
319
  - INACTIVE -> WAITING: The job is "submitted" to the scheduler, and its job
318
320
  controller can be started.
@@ -342,6 +344,10 @@ class ManagedJobScheduleState(enum.Enum):
342
344
  briefly observe inconsistent states, like a job that just finished but
343
345
  hasn't yet transitioned to DONE.
344
346
  """
347
+ # This job may have been created before scheduler was introduced in #4458.
348
+ # This state is not used by scheduler but just for backward compatibility.
349
+ # TODO(cooperc): remove this in v0.11.0
350
+ INVALID = None
345
351
  # The job should be ignored by the scheduler.
346
352
  INACTIVE = 'INACTIVE'
347
353
  # The job is waiting to transition to LAUNCHING for the first time. The
@@ -38,7 +38,7 @@ _FIREWALL_RESOURCE_NOT_FOUND_PATTERN = re.compile(
38
38
  r'The resource \'projects/.*/global/firewalls/.*\' was not found')
39
39
 
40
40
 
41
- def _retry_on_http_exception(
41
+ def _retry_on_gcp_http_exception(
42
42
  regex: Optional[str] = None,
43
43
  max_retries: int = GCP_MAX_RETRIES,
44
44
  retry_interval_s: int = GCP_RETRY_INTERVAL_SECONDS,
@@ -49,17 +49,18 @@ def _retry_on_http_exception(
49
49
 
50
50
  @functools.wraps(func)
51
51
  def wrapper(*args, **kwargs):
52
- exception_type = gcp.http_error_exception()
53
52
 
54
53
  def try_catch_exc():
55
54
  try:
56
55
  value = func(*args, **kwargs)
57
56
  return value
58
57
  except Exception as e: # pylint: disable=broad-except
59
- if not isinstance(e, exception_type) or (
60
- regex and not re.search(regex, str(e))):
61
- raise
62
- return e
58
+ if (isinstance(e, gcp.http_error_exception()) and
59
+ (regex is None or re.search(regex, str(e)))):
60
+ logger.error(
61
+ f'Retrying for gcp.http_error_exception: {e}')
62
+ return e
63
+ raise
63
64
 
64
65
  for _ in range(max_retries):
65
66
  ret = try_catch_exc()
@@ -431,7 +432,7 @@ class GCPComputeInstance(GCPInstance):
431
432
  logger.debug(
432
433
  f'Waiting GCP operation {operation["name"]} to be ready ...')
433
434
 
434
- @_retry_on_http_exception(
435
+ @_retry_on_gcp_http_exception(
435
436
  f'Failed to wait for operation {operation["name"]}')
436
437
  def call_operation(fn, timeout: int):
437
438
  request = fn(
@@ -613,6 +614,11 @@ class GCPComputeInstance(GCPInstance):
613
614
  return operation
614
615
 
615
616
  @classmethod
617
+ # When there is a cloud function running in parallel to set labels for
618
+ # newly created instances, it may fail with the following error:
619
+ # "Labels fingerprint either invalid or resource labels have changed"
620
+ # We should retry until the labels are set successfully.
621
+ @_retry_on_gcp_http_exception('Labels fingerprint either invalid')
616
622
  def set_labels(cls, project_id: str, availability_zone: str, node_id: str,
617
623
  labels: dict) -> None:
618
624
  node = cls.load_resource().instances().get(
@@ -1211,7 +1217,7 @@ class GCPTPUVMInstance(GCPInstance):
1211
1217
  """Poll for TPU operation until finished."""
1212
1218
  del project_id, region, zone # unused
1213
1219
 
1214
- @_retry_on_http_exception(
1220
+ @_retry_on_gcp_http_exception(
1215
1221
  f'Failed to wait for operation {operation["name"]}')
1216
1222
  def call_operation(fn, timeout: int):
1217
1223
  request = fn(name=operation['name'])
@@ -1379,7 +1385,7 @@ class GCPTPUVMInstance(GCPInstance):
1379
1385
  f'Failed to get VPC name for instance {instance}') from e
1380
1386
 
1381
1387
  @classmethod
1382
- @_retry_on_http_exception('unable to queue the operation')
1388
+ @_retry_on_gcp_http_exception('unable to queue the operation')
1383
1389
  def set_labels(cls, project_id: str, availability_zone: str, node_id: str,
1384
1390
  labels: dict) -> None:
1385
1391
  while True:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250117
3
+ Version: 1.0.0.dev20250119
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=kA2ieB1SawvfYteqYNXCfJepPEo1gELmFmJNfKXgdkM,5944
1
+ sky/__init__.py,sha256=aC5XkRJ6V-eJ02IR9TVtzIZdM6O3RIr3pg2xI_Qb3jM,5944
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=LXUDABKP1FJCS256xTTDJa40WXwHKF5x49S-4hZbD1M,21501
4
4
  sky/check.py,sha256=s8deMVL-k9y8gd519K7NWZc3DqWsEySwiAr0uH3Vvcc,9459
@@ -102,7 +102,7 @@ sky/jobs/controller.py,sha256=Qv7vOj4OXkbnZF0F9wKrlJsYhkSNJjJ1Mgrn2FyQyaM,28250
102
102
  sky/jobs/core.py,sha256=2_Q9thiBPnd3i2nDqyUtQY-dsGZ1kRgAdnLcXHoycYo,19938
103
103
  sky/jobs/recovery_strategy.py,sha256=m-EA-MWXPFrgx2CYFPr6MmgeUoDTEBmY2xruD2PRSGY,26365
104
104
  sky/jobs/scheduler.py,sha256=WAvNb8-vBk8q1zFordFdpH7gxqWDjPHDGZZay6aodOk,12028
105
- sky/jobs/state.py,sha256=DK-tQnN200SpCXzDllRRibfQOjDCCFo7VRGSdi-FKA4,37856
105
+ sky/jobs/state.py,sha256=Cjv2UEKfk3j7enXaCkU9CDqsvUfYZ3FWnYEH5HMachs,38153
106
106
  sky/jobs/utils.py,sha256=waKmLbUNRXeuYKBn_U7sekSFGAEgoPp9QemUULK4Y9k,49491
107
107
  sky/jobs/dashboard/dashboard.py,sha256=KMSarpVcfnc-ELPFvy1M9_I1k4kSeXubTk3ibQC67Tg,3219
108
108
  sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
@@ -142,7 +142,7 @@ sky/provision/gcp/__init__.py,sha256=zlgjR2JoaGD7sStGStMRu9bJ62f-8NKEIyb-bFHBlzM
142
142
  sky/provision/gcp/config.py,sha256=rNpnRFNZqqvEHjzjSdwMoI7Fq7RW9w_dL2vIaubj3Dc,33319
143
143
  sky/provision/gcp/constants.py,sha256=9eLZatVSW2uGqxrvFGaWK2A_Ab0o_4S4GdgUuI7mfsk,7441
144
144
  sky/provision/gcp/instance.py,sha256=AMnJz6xDwYqIHmDfJfENTG_ID6uhjD_2VhlWw8FJp_s,24934
145
- sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHaSvY7c8,70943
145
+ sky/provision/gcp/instance_utils.py,sha256=T0AVT8lMn128snPp3MvqmhXOihlZSC8-c1QpgYT4_FA,71377
146
146
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
147
147
  sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
148
148
  sky/provision/kubernetes/config.py,sha256=bXwOGdSAnXCkDreew0KsSUqSv3ZrptNeevqat76LLts,29012
@@ -289,9 +289,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
289
289
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
290
290
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
291
291
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
292
- skypilot_nightly-1.0.0.dev20250117.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
293
- skypilot_nightly-1.0.0.dev20250117.dist-info/METADATA,sha256=NeM0Gfty_C94SeoCFNc7G5N0Q2Gb4tNCxgwZv_e_jnQ,20884
294
- skypilot_nightly-1.0.0.dev20250117.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
295
- skypilot_nightly-1.0.0.dev20250117.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
296
- skypilot_nightly-1.0.0.dev20250117.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
297
- skypilot_nightly-1.0.0.dev20250117.dist-info/RECORD,,
292
+ skypilot_nightly-1.0.0.dev20250119.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
293
+ skypilot_nightly-1.0.0.dev20250119.dist-info/METADATA,sha256=EMtxUVksl6PZt-fJt5k6PvgcWoaDL1pHzaQEc5K9iCU,20884
294
+ skypilot_nightly-1.0.0.dev20250119.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
295
+ skypilot_nightly-1.0.0.dev20250119.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
296
+ skypilot_nightly-1.0.0.dev20250119.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
297
+ skypilot_nightly-1.0.0.dev20250119.dist-info/RECORD,,