skypilot-nightly 1.0.0.dev20250623__py3-none-any.whl → 1.0.0.dev20250625__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +16 -5
  3. sky/backends/__init__.py +2 -1
  4. sky/backends/backend_utils.py +38 -11
  5. sky/backends/cloud_vm_ray_backend.py +52 -18
  6. sky/client/cli/command.py +264 -25
  7. sky/client/sdk.py +119 -85
  8. sky/clouds/aws.py +10 -7
  9. sky/clouds/azure.py +10 -7
  10. sky/clouds/cloud.py +2 -0
  11. sky/clouds/cudo.py +2 -0
  12. sky/clouds/do.py +10 -7
  13. sky/clouds/fluidstack.py +2 -0
  14. sky/clouds/gcp.py +10 -7
  15. sky/clouds/hyperbolic.py +10 -7
  16. sky/clouds/ibm.py +2 -0
  17. sky/clouds/kubernetes.py +27 -9
  18. sky/clouds/lambda_cloud.py +10 -7
  19. sky/clouds/nebius.py +10 -7
  20. sky/clouds/oci.py +10 -7
  21. sky/clouds/paperspace.py +10 -7
  22. sky/clouds/runpod.py +10 -7
  23. sky/clouds/scp.py +10 -7
  24. sky/clouds/vast.py +10 -7
  25. sky/clouds/vsphere.py +2 -0
  26. sky/core.py +89 -15
  27. sky/dag.py +14 -0
  28. sky/dashboard/out/404.html +1 -1
  29. sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +16 -0
  32. sky/dashboard/out/_next/static/chunks/37-1f1e94f5a561202a.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +6 -0
  34. sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/{513.211357a2914a34b2.js → 513.309df9e18a9ff005.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +6 -0
  40. sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
  41. sky/dashboard/out/_next/static/chunks/856-cdf66268ec878d0c.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/969-d3a0b53f728d280a.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/989-db34c16ad7ea6155.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/{_app-c416e87d5c2715cf.js → _app-0ef7418d1a3822f3.js} +1 -1
  46. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
  47. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-32ce4f49f2261f55.js +6 -0
  48. sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
  53. sky/dashboard/out/_next/static/chunks/pages/jobs-26da173e20af16e4.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/users-ce29e7420385563d.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c4ff1ec05e2f3daf.js → [name]-0b4c662a25e4747a.js} +1 -1
  58. sky/dashboard/out/_next/static/chunks/pages/workspaces-862b120406461b10.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +1 -0
  60. sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +3 -0
  61. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  62. sky/dashboard/out/clusters/[cluster].html +1 -1
  63. sky/dashboard/out/clusters.html +1 -1
  64. sky/dashboard/out/config.html +1 -1
  65. sky/dashboard/out/index.html +1 -1
  66. sky/dashboard/out/infra/[context].html +1 -1
  67. sky/dashboard/out/infra.html +1 -1
  68. sky/dashboard/out/jobs/[job].html +1 -1
  69. sky/dashboard/out/jobs.html +1 -1
  70. sky/dashboard/out/users.html +1 -1
  71. sky/dashboard/out/volumes.html +1 -0
  72. sky/dashboard/out/workspace/new.html +1 -1
  73. sky/dashboard/out/workspaces/[name].html +1 -1
  74. sky/dashboard/out/workspaces.html +1 -1
  75. sky/data/storage_utils.py +2 -4
  76. sky/exceptions.py +26 -0
  77. sky/execution.py +5 -0
  78. sky/global_user_state.py +263 -20
  79. sky/jobs/client/sdk.py +13 -12
  80. sky/jobs/controller.py +5 -1
  81. sky/jobs/scheduler.py +4 -3
  82. sky/jobs/server/core.py +121 -51
  83. sky/jobs/state.py +15 -0
  84. sky/jobs/utils.py +114 -8
  85. sky/models.py +16 -0
  86. sky/provision/__init__.py +26 -0
  87. sky/provision/kubernetes/__init__.py +3 -0
  88. sky/provision/kubernetes/instance.py +38 -77
  89. sky/provision/kubernetes/utils.py +52 -2
  90. sky/provision/kubernetes/volume.py +147 -0
  91. sky/resources.py +20 -76
  92. sky/serve/client/sdk.py +13 -13
  93. sky/serve/server/core.py +5 -1
  94. sky/server/common.py +40 -5
  95. sky/server/constants.py +5 -1
  96. sky/server/metrics.py +105 -0
  97. sky/server/requests/executor.py +30 -14
  98. sky/server/requests/payloads.py +22 -3
  99. sky/server/requests/requests.py +59 -2
  100. sky/server/rest.py +152 -0
  101. sky/server/server.py +70 -19
  102. sky/server/state.py +20 -0
  103. sky/server/stream_utils.py +8 -3
  104. sky/server/uvicorn.py +153 -13
  105. sky/setup_files/dependencies.py +2 -0
  106. sky/skylet/constants.py +19 -14
  107. sky/task.py +141 -43
  108. sky/templates/jobs-controller.yaml.j2 +12 -1
  109. sky/templates/kubernetes-ray.yml.j2 +31 -2
  110. sky/users/permission.py +2 -0
  111. sky/utils/admin_policy_utils.py +5 -1
  112. sky/utils/cli_utils/status_utils.py +25 -17
  113. sky/utils/command_runner.py +118 -12
  114. sky/utils/command_runner.pyi +57 -0
  115. sky/utils/common_utils.py +9 -1
  116. sky/utils/context.py +3 -1
  117. sky/utils/controller_utils.py +1 -2
  118. sky/utils/resources_utils.py +66 -0
  119. sky/utils/rich_utils.py +6 -0
  120. sky/utils/schemas.py +180 -38
  121. sky/utils/status_lib.py +10 -0
  122. sky/utils/validator.py +11 -1
  123. sky/volumes/__init__.py +0 -0
  124. sky/volumes/client/__init__.py +0 -0
  125. sky/volumes/client/sdk.py +64 -0
  126. sky/volumes/server/__init__.py +0 -0
  127. sky/volumes/server/core.py +199 -0
  128. sky/volumes/server/server.py +85 -0
  129. sky/volumes/utils.py +158 -0
  130. sky/volumes/volume.py +198 -0
  131. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/METADATA +2 -1
  132. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/RECORD +139 -123
  133. sky/dashboard/out/_next/static/F4kiZ6Zh72jA6HzZ3ncFo/_buildManifest.js +0 -1
  134. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/37-3a4d77ad62932eaf.js +0 -6
  136. sky/dashboard/out/_next/static/chunks/42.d39e24467181b06b.js +0 -6
  137. sky/dashboard/out/_next/static/chunks/470-4d1a5dbe58a8a2b9.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
  139. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
  140. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
  141. sky/dashboard/out/_next/static/chunks/856-c2c39c0912285e54.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/938-1493ac755eadeb35.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/969-20d54a9d998dc102.js +0 -1
  145. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
  146. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-89216c616dbaa9c5.js +0 -6
  147. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-36bc0962129f72df.js +0 -6
  148. sky/dashboard/out/_next/static/chunks/pages/clusters-82a651dbad53ec6e.js +0 -1
  149. sky/dashboard/out/_next/static/chunks/pages/config-497a35a7ed49734a.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-d2910be98e9227cb.js +0 -1
  151. sky/dashboard/out/_next/static/chunks/pages/infra-780860bcc1103945.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-cf490d1fa38f3740.js +0 -16
  153. sky/dashboard/out/_next/static/chunks/pages/jobs-336ab80e270ce2ce.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +0 -1
  156. sky/dashboard/out/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js +0 -1
  157. sky/dashboard/out/_next/static/chunks/webpack-0263b00d6a10e64a.js +0 -1
  158. sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +0 -3
  159. /sky/dashboard/out/_next/static/{F4kiZ6Zh72jA6HzZ3ncFo → ZWdSYkqVe3WjnFR8ocqoG}/_ssgManifest.js +0 -0
  160. /sky/dashboard/out/_next/static/chunks/{843-b3040e493f6e7947.js → 843-07d25a7e64462fd8.js} +0 -0
  161. /sky/dashboard/out/_next/static/chunks/{973-db3c97c2bfbceb65.js → 973-5b5019ba333e8d62.js} +0 -0
  162. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/WHEEL +0 -0
  163. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/entry_points.txt +0 -0
  164. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/licenses/LICENSE +0 -0
  165. {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'c8d3f5cb372dc93cdc4d95ffaa9cbd9e08402ab4'
8
+ _SKYPILOT_COMMIT_SHA = 'cc8dbb73a2d26e8c017a788b0bbfc63041c78bae'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250623'
38
+ __version__ = '1.0.0.dev20250625'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/admin_policy.py CHANGED
@@ -4,11 +4,13 @@ import dataclasses
4
4
  import typing
5
5
  from typing import Any, Dict, Optional
6
6
 
7
+ import colorama
7
8
  import pydantic
8
9
 
9
10
  import sky
10
11
  from sky import exceptions
11
12
  from sky.adaptors import common as adaptors_common
13
+ from sky.utils import common_utils
12
14
  from sky.utils import config_utils
13
15
  from sky.utils import ux_utils
14
16
 
@@ -218,18 +220,27 @@ class RestfulAdminPolicy(PolicyTemplate):
218
220
  headers={'Content-Type': 'application/json'},
219
221
  # TODO(aylei): make this configurable
220
222
  timeout=30)
223
+ if response.status_code == 400:
224
+ raise exceptions.UserRequestRejectedByPolicy(
225
+ f'{colorama.Fore.RED}User request is rejected by admin '
226
+ f'policy {self.policy_url}{colorama.Fore.RESET}: '
227
+ f'{response.text}')
221
228
  response.raise_for_status()
222
229
  except requests.exceptions.RequestException as e:
223
230
  with ux_utils.print_exception_no_traceback():
224
- raise exceptions.UserRequestRejectedByPolicy(
225
- f'Failed to validate request with admin policy URL '
226
- f'{self.policy_url}: {e}') from e
231
+ raise exceptions.RestfulPolicyError(
232
+ f'Failed to call admin policy URL '
233
+ f'{self.policy_url}: {e}') from None
227
234
 
228
235
  try:
229
236
  mutated_user_request = MutatedUserRequest.decode(response.json())
230
237
  except Exception as e: # pylint: disable=broad-except
231
238
  with ux_utils.print_exception_no_traceback():
232
- raise exceptions.UserRequestRejectedByPolicy(
239
+ raise exceptions.RestfulPolicyError(
233
240
  f'Failed to decode response from admin policy URL '
234
- f'{self.policy_url}: {e}') from e
241
+ f'{self.policy_url}: {common_utils.format_exception(e, use_bracket=True)}'
242
+ ) from None
235
243
  return mutated_user_request
244
+
245
+ def __repr__(self):
246
+ return f'RestfulAdminPolicy(policy_url={self.policy_url})'
sky/backends/__init__.py CHANGED
@@ -3,11 +3,12 @@ from sky.backends.backend import Backend
3
3
  from sky.backends.backend import ResourceHandle
4
4
  from sky.backends.cloud_vm_ray_backend import CloudVmRayBackend
5
5
  from sky.backends.cloud_vm_ray_backend import CloudVmRayResourceHandle
6
+ from sky.backends.cloud_vm_ray_backend import LocalResourcesHandle
6
7
  from sky.backends.local_docker_backend import LocalDockerBackend
7
8
  from sky.backends.local_docker_backend import LocalDockerResourceHandle
8
9
 
9
10
  __all__ = [
10
11
  'Backend', 'ResourceHandle', 'CloudVmRayBackend',
11
- 'CloudVmRayResourceHandle', 'LocalDockerBackend',
12
+ 'CloudVmRayResourceHandle', 'LocalResourcesHandle', 'LocalDockerBackend',
12
13
  'LocalDockerResourceHandle'
13
14
  ]
@@ -33,6 +33,7 @@ from sky import provision as provision_lib
33
33
  from sky import sky_logging
34
34
  from sky import skypilot_config
35
35
  from sky.adaptors import common as adaptors_common
36
+ from sky.jobs import utils as managed_job_utils
36
37
  from sky.provision import instance_setup
37
38
  from sky.provision.kubernetes import utils as kubernetes_utils
38
39
  from sky.skylet import constants
@@ -65,6 +66,7 @@ if typing.TYPE_CHECKING:
65
66
  from sky import task as task_lib
66
67
  from sky.backends import cloud_vm_ray_backend
67
68
  from sky.backends import local_docker_backend
69
+ from sky.volumes import volume as volume_lib
68
70
  else:
69
71
  yaml = adaptors_common.LazyImport('yaml')
70
72
  requests = adaptors_common.LazyImport('requests')
@@ -540,16 +542,18 @@ def get_expirable_clouds(
540
542
  # TODO: too many things happening here - leaky abstraction. Refactor.
541
543
  @timeline.event
542
544
  def write_cluster_config(
543
- to_provision: 'resources_lib.Resources',
544
- num_nodes: int,
545
- cluster_config_template: str,
546
- cluster_name: str,
547
- local_wheel_path: pathlib.Path,
548
- wheel_hash: str,
549
- region: clouds.Region,
550
- zones: Optional[List[clouds.Zone]] = None,
551
- dryrun: bool = False,
552
- keep_launch_fields_in_existing_config: bool = True) -> Dict[str, str]:
545
+ to_provision: 'resources_lib.Resources',
546
+ num_nodes: int,
547
+ cluster_config_template: str,
548
+ cluster_name: str,
549
+ local_wheel_path: pathlib.Path,
550
+ wheel_hash: str,
551
+ region: clouds.Region,
552
+ zones: Optional[List[clouds.Zone]] = None,
553
+ dryrun: bool = False,
554
+ keep_launch_fields_in_existing_config: bool = True,
555
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
556
+ ) -> Dict[str, str]:
553
557
  """Fills in cluster configuration templates and writes them out.
554
558
 
555
559
  Returns:
@@ -597,7 +601,7 @@ def write_cluster_config(
597
601
  resources_utils.ClusterName(
598
602
  cluster_name,
599
603
  cluster_name_on_cloud,
600
- ), region, zones, num_nodes, dryrun)
604
+ ), region, zones, num_nodes, dryrun, volume_mounts)
601
605
  config_dict = {}
602
606
 
603
607
  specific_reservations = set(
@@ -730,6 +734,15 @@ def write_cluster_config(
730
734
  high_availability_specified = controller_utils.high_availability_specified(
731
735
  cluster_name)
732
736
 
737
+ volume_mount_vars = []
738
+ if volume_mounts is not None:
739
+ for vol in volume_mounts:
740
+ volume_mount_vars.append({
741
+ 'name': vol.volume_name,
742
+ 'path': vol.path,
743
+ 'volume_name_on_cloud': vol.volume_config.name_on_cloud,
744
+ })
745
+
733
746
  # Use a tmp file path to avoid incomplete YAML file being re-used in the
734
747
  # future.
735
748
  tmp_yaml_path = yaml_path + '.tmp'
@@ -820,6 +833,9 @@ def write_cluster_config(
820
833
 
821
834
  # High availability
822
835
  'high_availability': high_availability_specified,
836
+
837
+ # Volume mounts
838
+ 'volume_mounts': volume_mount_vars,
823
839
  }),
824
840
  output_path=tmp_yaml_path)
825
841
  config_dict['cluster_name'] = cluster_name
@@ -2454,6 +2470,17 @@ def is_controller_accessible(
2454
2470
  exceptions.ClusterNotUpError: if the controller is not accessible, or
2455
2471
  failed to be connected.
2456
2472
  """
2473
+ if (managed_job_utils.is_consolidation_mode() and
2474
+ controller == controller_utils.Controllers.JOBS_CONTROLLER):
2475
+ cn = 'local-controller-consolidation'
2476
+ return backends.LocalResourcesHandle(
2477
+ cluster_name=cn,
2478
+ cluster_name_on_cloud=cn,
2479
+ cluster_yaml=None,
2480
+ launched_nodes=1,
2481
+ launched_resources=sky.Resources(cloud=clouds.Cloud(),
2482
+ instance_type=cn),
2483
+ )
2457
2484
  if non_existent_message is None:
2458
2485
  non_existent_message = controller.value.default_hint_if_non_existent
2459
2486
  cluster_name = controller.value.cluster_name
@@ -73,6 +73,7 @@ from sky.utils import status_lib
73
73
  from sky.utils import subprocess_utils
74
74
  from sky.utils import timeline
75
75
  from sky.utils import ux_utils
76
+ from sky.volumes import volume as volume_lib
76
77
 
77
78
  if typing.TYPE_CHECKING:
78
79
  from sky import dag
@@ -1327,6 +1328,7 @@ class RetryingVmProvisioner(object):
1327
1328
  prev_handle: Optional['CloudVmRayResourceHandle'],
1328
1329
  prev_cluster_ever_up: bool,
1329
1330
  skip_if_config_hash_matches: Optional[str],
1331
+ volume_mounts: Optional[List[volume_lib.VolumeMount]],
1330
1332
  ) -> Dict[str, Any]:
1331
1333
  """The provision retry loop.
1332
1334
 
@@ -1432,7 +1434,9 @@ class RetryingVmProvisioner(object):
1432
1434
  region=region,
1433
1435
  zones=zones,
1434
1436
  dryrun=dryrun,
1435
- keep_launch_fields_in_existing_config=cluster_exists)
1437
+ keep_launch_fields_in_existing_config=cluster_exists,
1438
+ volume_mounts=volume_mounts,
1439
+ )
1436
1440
  except exceptions.ResourcesUnavailableError as e:
1437
1441
  # Failed due to catalog issue, e.g. image not found, or
1438
1442
  # GPUs are requested in a Kubernetes cluster but the cluster
@@ -2081,7 +2085,9 @@ class RetryingVmProvisioner(object):
2081
2085
  prev_cluster_status=prev_cluster_status,
2082
2086
  prev_handle=prev_handle,
2083
2087
  prev_cluster_ever_up=prev_cluster_ever_up,
2084
- skip_if_config_hash_matches=skip_if_config_hash_matches)
2088
+ skip_if_config_hash_matches=skip_if_config_hash_matches,
2089
+ volume_mounts=task.volume_mounts,
2090
+ )
2085
2091
  if dryrun:
2086
2092
  return config_dict
2087
2093
  except (exceptions.InvalidClusterNameError,
@@ -2435,9 +2441,14 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
2435
2441
  zip(cluster_internal_ips, cluster_feasible_ips))
2436
2442
 
2437
2443
  # Ensure head node is the first element, then sort based on the
2438
- # external IPs for stableness
2439
- stable_internal_external_ips = [internal_external_ips[0]] + sorted(
2440
- internal_external_ips[1:], key=lambda x: x[1])
2444
+ # external IPs for stableness. Skip for k8s nodes since pods
2445
+ # worker ids are already mapped.
2446
+ if (cluster_info is not None and
2447
+ cluster_info.provider_name == 'kubernetes'):
2448
+ stable_internal_external_ips = internal_external_ips
2449
+ else:
2450
+ stable_internal_external_ips = [internal_external_ips[0]] + sorted(
2451
+ internal_external_ips[1:], key=lambda x: x[1])
2441
2452
  self.stable_internal_external_ips = stable_internal_external_ips
2442
2453
 
2443
2454
  @context_utils.cancellation_guard
@@ -2696,6 +2707,21 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
2696
2707
  pass
2697
2708
 
2698
2709
 
2710
+ class LocalResourcesHandle(CloudVmRayResourceHandle):
2711
+ """A handle for local resources."""
2712
+
2713
+ @context_utils.cancellation_guard
2714
+ @annotations.lru_cache(scope='global')
2715
+ @timeline.event
2716
+ def get_command_runners(self,
2717
+ force_cached: bool = False,
2718
+ avoid_ssh_control: bool = False
2719
+ ) -> List[command_runner.CommandRunner]:
2720
+ """Returns a list of local command runners."""
2721
+ del force_cached, avoid_ssh_control # Unused.
2722
+ return [command_runner.LocalProcessCommandRunner()]
2723
+
2724
+
2699
2725
  @registry.BACKEND_REGISTRY.type_register(name='cloudvmray')
2700
2726
  class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2701
2727
  """Backend: runs on cloud virtual machines, managed by Ray.
@@ -4043,19 +4069,27 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4043
4069
  # list should aready be in descending order
4044
4070
  job_id = job_ids[0]
4045
4071
 
4046
- # get the run_timestamp
4047
- # the function takes in [job_id]
4048
- code = job_lib.JobLibCodeGen.get_log_dirs_for_jobs([str(job_id)])
4049
- returncode, run_timestamps, stderr = self.run_on_head(
4050
- handle,
4051
- code,
4052
- stream_logs=False,
4053
- require_outputs=True,
4054
- separate_stderr=True)
4055
- subprocess_utils.handle_returncode(returncode, code,
4056
- 'Failed to sync logs.', stderr)
4057
- # returns with a dict of {job_id: run_timestamp}
4058
- run_timestamps = message_utils.decode_payload(run_timestamps)
4072
+ if isinstance(handle, LocalResourcesHandle):
4073
+ # In consolidation mode, we don't submit a ray job, therefore no
4074
+ # run_timestamp is available. We use a dummy run_timestamp here.
4075
+ run_timestamps = {
4076
+ job_id: f'managed-jobs-consolidation-mode-{job_id}'
4077
+ }
4078
+ else:
4079
+ # get the run_timestamp
4080
+ # the function takes in [job_id]
4081
+ code = job_lib.JobLibCodeGen.get_log_dirs_for_jobs([str(job_id)])
4082
+ returncode, run_timestamps_payload, stderr = self.run_on_head(
4083
+ handle,
4084
+ code,
4085
+ stream_logs=False,
4086
+ require_outputs=True,
4087
+ separate_stderr=True)
4088
+ subprocess_utils.handle_returncode(returncode, code,
4089
+ 'Failed to sync logs.', stderr)
4090
+ # returns with a dict of {job_id: run_timestamp}
4091
+ run_timestamps = message_utils.decode_payload(
4092
+ run_timestamps_payload)
4059
4093
  if not run_timestamps:
4060
4094
  logger.info(f'{colorama.Fore.YELLOW}'
4061
4095
  'No matching log directories found'