skypilot-nightly 1.0.0.dev20250922__py3-none-any.whl → 1.0.0.dev20250926__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/backend.py +10 -0
- sky/backends/backend_utils.py +207 -79
- sky/backends/cloud_vm_ray_backend.py +37 -13
- sky/backends/local_docker_backend.py +9 -0
- sky/client/cli/command.py +112 -53
- sky/client/common.py +4 -2
- sky/client/sdk.py +17 -7
- sky/client/sdk_async.py +4 -2
- sky/clouds/kubernetes.py +2 -1
- sky/clouds/runpod.py +20 -7
- sky/core.py +9 -54
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-d8bc3a2b9cf839a9.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-d0c00018a5ba198c.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9525660179df3605.js → [cluster]-e052384df65ef200.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-26167a9e6d91fa51.js → webpack-8e64d11e58eab5cb.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/mounting_utils.py +19 -10
- sky/execution.py +4 -2
- sky/global_user_state.py +271 -67
- sky/jobs/client/sdk.py +10 -1
- sky/jobs/constants.py +2 -0
- sky/jobs/controller.py +11 -7
- sky/jobs/server/core.py +5 -3
- sky/jobs/server/server.py +15 -11
- sky/jobs/utils.py +1 -1
- sky/logs/agent.py +30 -3
- sky/logs/aws.py +9 -19
- sky/provision/__init__.py +2 -1
- sky/provision/aws/instance.py +2 -1
- sky/provision/azure/instance.py +2 -1
- sky/provision/cudo/instance.py +2 -2
- sky/provision/do/instance.py +2 -2
- sky/provision/docker_utils.py +41 -19
- sky/provision/fluidstack/instance.py +2 -2
- sky/provision/gcp/instance.py +2 -1
- sky/provision/hyperbolic/instance.py +2 -1
- sky/provision/instance_setup.py +1 -1
- sky/provision/kubernetes/instance.py +134 -8
- sky/provision/lambda_cloud/instance.py +2 -1
- sky/provision/nebius/instance.py +2 -1
- sky/provision/oci/instance.py +2 -1
- sky/provision/paperspace/instance.py +2 -2
- sky/provision/primeintellect/instance.py +2 -2
- sky/provision/provisioner.py +1 -0
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +2 -2
- sky/provision/scp/instance.py +2 -2
- sky/provision/seeweb/instance.py +2 -1
- sky/provision/vast/instance.py +2 -1
- sky/provision/vsphere/instance.py +6 -5
- sky/schemas/api/responses.py +2 -1
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +45 -19
- sky/serve/replica_managers.py +12 -5
- sky/serve/serve_utils.py +5 -7
- sky/serve/server/core.py +9 -6
- sky/serve/server/impl.py +78 -25
- sky/serve/server/server.py +4 -5
- sky/serve/service_spec.py +33 -0
- sky/server/constants.py +1 -1
- sky/server/daemons.py +2 -3
- sky/server/requests/executor.py +56 -6
- sky/server/requests/payloads.py +32 -8
- sky/server/requests/preconditions.py +2 -3
- sky/server/rest.py +2 -0
- sky/server/server.py +28 -19
- sky/server/stream_utils.py +34 -12
- sky/setup_files/dependencies.py +5 -2
- sky/setup_files/setup.py +44 -44
- sky/skylet/constants.py +4 -1
- sky/skylet/events.py +42 -0
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +24 -18
- sky/usage/usage_lib.py +3 -0
- sky/utils/cli_utils/status_utils.py +4 -5
- sky/utils/context.py +104 -29
- sky/utils/controller_utils.py +7 -6
- sky/utils/db/db_utils.py +5 -1
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/kubernetes/create_cluster.sh +13 -28
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/kubernetes_deploy_utils.py +194 -38
- sky/utils/kubernetes_enums.py +5 -0
- sky/utils/ux_utils.py +35 -1
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +44 -8
- sky/volumes/server/core.py +1 -0
- sky/volumes/server/server.py +33 -7
- sky/volumes/volume.py +35 -28
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/METADATA +38 -33
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/RECORD +118 -117
- sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-9a2538f38c004652.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +0 -16
- /sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ import urllib.request
|
|
|
7
7
|
from sky.utils import directory_utils
|
|
8
8
|
|
|
9
9
|
# Replaced with the current commit when building the wheels.
|
|
10
|
-
_SKYPILOT_COMMIT_SHA = '
|
|
10
|
+
_SKYPILOT_COMMIT_SHA = '827d534c8bbfa61b895467b9431283e923dd9841'
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def _get_git_commit():
|
|
@@ -37,7 +37,7 @@ def _get_git_commit():
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
__commit__ = _get_git_commit()
|
|
40
|
-
__version__ = '1.0.0.
|
|
40
|
+
__version__ = '1.0.0.dev20250926'
|
|
41
41
|
__root_dir__ = directory_utils.get_sky_dir()
|
|
42
42
|
|
|
43
43
|
|
sky/backends/backend.py
CHANGED
|
@@ -95,6 +95,12 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
95
95
|
envs_and_secrets: Dict[str, str]) -> None:
|
|
96
96
|
return self._sync_workdir(handle, workdir, envs_and_secrets)
|
|
97
97
|
|
|
98
|
+
@timeline.event
|
|
99
|
+
@usage_lib.messages.usage.update_runtime('download_file')
|
|
100
|
+
def download_file(self, handle: _ResourceHandleType, local_file_path: str,
|
|
101
|
+
remote_file_path: str) -> None:
|
|
102
|
+
return self._download_file(handle, local_file_path, remote_file_path)
|
|
103
|
+
|
|
98
104
|
@timeline.event
|
|
99
105
|
@usage_lib.messages.usage.update_runtime('sync_file_mounts')
|
|
100
106
|
def sync_file_mounts(
|
|
@@ -172,6 +178,10 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
172
178
|
envs_and_secrets: Dict[str, str]) -> None:
|
|
173
179
|
raise NotImplementedError
|
|
174
180
|
|
|
181
|
+
def _download_file(self, handle: _ResourceHandleType, local_file_path: str,
|
|
182
|
+
remote_file_path: str) -> None:
|
|
183
|
+
raise NotImplementedError
|
|
184
|
+
|
|
175
185
|
def _sync_file_mounts(
|
|
176
186
|
self,
|
|
177
187
|
handle: _ResourceHandleType,
|
sky/backends/backend_utils.py
CHANGED
|
@@ -52,6 +52,7 @@ from sky.utils import cluster_utils
|
|
|
52
52
|
from sky.utils import command_runner
|
|
53
53
|
from sky.utils import common
|
|
54
54
|
from sky.utils import common_utils
|
|
55
|
+
from sky.utils import context as context_lib
|
|
55
56
|
from sky.utils import context_utils
|
|
56
57
|
from sky.utils import controller_utils
|
|
57
58
|
from sky.utils import env_options
|
|
@@ -796,7 +797,7 @@ def write_cluster_config(
|
|
|
796
797
|
cloud=str(cloud).lower(),
|
|
797
798
|
region=region.name,
|
|
798
799
|
keys=('use_ssm',),
|
|
799
|
-
default_value=
|
|
800
|
+
default_value=None)
|
|
800
801
|
|
|
801
802
|
if use_ssm and ssh_proxy_command is not None:
|
|
802
803
|
raise exceptions.InvalidCloudConfigs(
|
|
@@ -804,15 +805,18 @@ def write_cluster_config(
|
|
|
804
805
|
f'is already set to {ssh_proxy_command!r}. Please remove '
|
|
805
806
|
'ssh_proxy_command or set use_ssm to false.')
|
|
806
807
|
|
|
807
|
-
if
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
808
|
+
if use_internal_ips and ssh_proxy_command is None:
|
|
809
|
+
# Only if use_ssm is explicitly not set, we default to using SSM.
|
|
810
|
+
if use_ssm is None:
|
|
811
|
+
logger.warning(
|
|
812
|
+
f'{colorama.Fore.YELLOW}'
|
|
813
|
+
'use_internal_ips is set to true, '
|
|
814
|
+
'but ssh_proxy_command is not set. Defaulting to '
|
|
815
|
+
'using SSM. Specify ssh_proxy_command to use a different '
|
|
816
|
+
'https://docs.skypilot.co/en/latest/reference/config.html#'
|
|
817
|
+
f'aws.ssh_proxy_command.{colorama.Style.RESET_ALL}')
|
|
818
|
+
use_ssm = True
|
|
819
|
+
|
|
816
820
|
if use_ssm:
|
|
817
821
|
aws_profile = os.environ.get('AWS_PROFILE', None)
|
|
818
822
|
profile_str = f'--profile {aws_profile}' if aws_profile else ''
|
|
@@ -1843,7 +1847,9 @@ def check_owner_identity(cluster_name: str) -> None:
|
|
|
1843
1847
|
"""
|
|
1844
1848
|
if env_options.Options.SKIP_CLOUD_IDENTITY_CHECK.get():
|
|
1845
1849
|
return
|
|
1846
|
-
record = global_user_state.get_cluster_from_name(cluster_name
|
|
1850
|
+
record = global_user_state.get_cluster_from_name(cluster_name,
|
|
1851
|
+
include_user_info=False,
|
|
1852
|
+
summary_response=True)
|
|
1847
1853
|
if record is None:
|
|
1848
1854
|
return
|
|
1849
1855
|
handle = record['handle']
|
|
@@ -1930,6 +1936,7 @@ def tag_filter_for_cluster(cluster_name: str) -> Dict[str, str]:
|
|
|
1930
1936
|
}
|
|
1931
1937
|
|
|
1932
1938
|
|
|
1939
|
+
@context_utils.cancellation_guard
|
|
1933
1940
|
def _query_cluster_status_via_cloud_api(
|
|
1934
1941
|
handle: 'cloud_vm_ray_backend.CloudVmRayResourceHandle'
|
|
1935
1942
|
) -> List[Tuple[status_lib.ClusterStatus, Optional[str]]]:
|
|
@@ -2137,7 +2144,10 @@ def check_can_clone_disk_and_override_task(
|
|
|
2137
2144
|
return task, handle
|
|
2138
2145
|
|
|
2139
2146
|
|
|
2140
|
-
def _update_cluster_status(
|
|
2147
|
+
def _update_cluster_status(
|
|
2148
|
+
cluster_name: str,
|
|
2149
|
+
include_user_info: bool = True,
|
|
2150
|
+
summary_response: bool = False) -> Optional[Dict[str, Any]]:
|
|
2141
2151
|
"""Update the cluster status.
|
|
2142
2152
|
|
|
2143
2153
|
The cluster status is updated by checking ray cluster and real status from
|
|
@@ -2164,7 +2174,10 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
|
2164
2174
|
fetched from the cloud provider or there are leaked nodes causing
|
|
2165
2175
|
the node number larger than expected.
|
|
2166
2176
|
"""
|
|
2167
|
-
record = global_user_state.get_cluster_from_name(
|
|
2177
|
+
record = global_user_state.get_cluster_from_name(
|
|
2178
|
+
cluster_name,
|
|
2179
|
+
include_user_info=include_user_info,
|
|
2180
|
+
summary_response=summary_response)
|
|
2168
2181
|
if record is None:
|
|
2169
2182
|
return None
|
|
2170
2183
|
handle = record['handle']
|
|
@@ -2340,7 +2353,10 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
|
2340
2353
|
ready=True,
|
|
2341
2354
|
is_launch=False,
|
|
2342
2355
|
existing_cluster_hash=record['cluster_hash'])
|
|
2343
|
-
return global_user_state.get_cluster_from_name(
|
|
2356
|
+
return global_user_state.get_cluster_from_name(
|
|
2357
|
+
cluster_name,
|
|
2358
|
+
include_user_info=include_user_info,
|
|
2359
|
+
summary_response=summary_response)
|
|
2344
2360
|
|
|
2345
2361
|
# All cases below are transitioning the cluster to non-UP states.
|
|
2346
2362
|
launched_resources = handle.launched_resources.assert_launchable()
|
|
@@ -2552,7 +2568,10 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
|
2552
2568
|
ready=False,
|
|
2553
2569
|
is_launch=False,
|
|
2554
2570
|
existing_cluster_hash=record['cluster_hash'])
|
|
2555
|
-
return global_user_state.get_cluster_from_name(
|
|
2571
|
+
return global_user_state.get_cluster_from_name(
|
|
2572
|
+
cluster_name,
|
|
2573
|
+
include_user_info=include_user_info,
|
|
2574
|
+
summary_response=summary_response)
|
|
2556
2575
|
# Now is_abnormal is False: either node_statuses is empty or all nodes are
|
|
2557
2576
|
# STOPPED.
|
|
2558
2577
|
verb = 'terminated' if to_terminate else 'stopped'
|
|
@@ -2567,7 +2586,10 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
|
2567
2586
|
nop_if_duplicate=True,
|
|
2568
2587
|
)
|
|
2569
2588
|
backend.post_teardown_cleanup(handle, terminate=to_terminate, purge=False)
|
|
2570
|
-
return global_user_state.get_cluster_from_name(
|
|
2589
|
+
return global_user_state.get_cluster_from_name(
|
|
2590
|
+
cluster_name,
|
|
2591
|
+
include_user_info=include_user_info,
|
|
2592
|
+
summary_response=summary_response)
|
|
2571
2593
|
|
|
2572
2594
|
|
|
2573
2595
|
def _must_refresh_cluster_status(
|
|
@@ -2589,12 +2611,13 @@ def _must_refresh_cluster_status(
|
|
|
2589
2611
|
|
|
2590
2612
|
|
|
2591
2613
|
def refresh_cluster_record(
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2614
|
+
cluster_name: str,
|
|
2615
|
+
*,
|
|
2616
|
+
force_refresh_statuses: Optional[Set[status_lib.ClusterStatus]] = None,
|
|
2617
|
+
acquire_per_cluster_status_lock: bool = True,
|
|
2618
|
+
cluster_status_lock_timeout: int = CLUSTER_STATUS_LOCK_TIMEOUT_SECONDS,
|
|
2619
|
+
include_user_info: bool = True,
|
|
2620
|
+
summary_response: bool = False) -> Optional[Dict[str, Any]]:
|
|
2598
2621
|
"""Refresh the cluster, and return the possibly updated record.
|
|
2599
2622
|
|
|
2600
2623
|
The function will update the cached cluster status in the global state. For
|
|
@@ -2634,7 +2657,11 @@ def refresh_cluster_record(
|
|
|
2634
2657
|
the node number larger than expected.
|
|
2635
2658
|
"""
|
|
2636
2659
|
|
|
2637
|
-
|
|
2660
|
+
ctx = context_lib.get()
|
|
2661
|
+
record = global_user_state.get_cluster_from_name(
|
|
2662
|
+
cluster_name,
|
|
2663
|
+
include_user_info=include_user_info,
|
|
2664
|
+
summary_response=summary_response)
|
|
2638
2665
|
if record is None:
|
|
2639
2666
|
return None
|
|
2640
2667
|
# TODO(zhwu, 05/20): switch to the specific workspace to make sure we are
|
|
@@ -2653,12 +2680,16 @@ def refresh_cluster_record(
|
|
|
2653
2680
|
|
|
2654
2681
|
# Loop until we have an up-to-date status or until we acquire the lock.
|
|
2655
2682
|
while True:
|
|
2683
|
+
# Check if the context is canceled.
|
|
2684
|
+
if ctx is not None and ctx.is_canceled():
|
|
2685
|
+
raise asyncio.CancelledError()
|
|
2656
2686
|
# Check to see if we can return the cached status.
|
|
2657
2687
|
if not _must_refresh_cluster_status(record, force_refresh_statuses):
|
|
2658
2688
|
return record
|
|
2659
2689
|
|
|
2660
2690
|
if not acquire_per_cluster_status_lock:
|
|
2661
|
-
return _update_cluster_status(cluster_name
|
|
2691
|
+
return _update_cluster_status(cluster_name, include_user_info,
|
|
2692
|
+
summary_response)
|
|
2662
2693
|
|
|
2663
2694
|
# Try to acquire the lock so we can fetch the status.
|
|
2664
2695
|
try:
|
|
@@ -2666,12 +2697,16 @@ def refresh_cluster_record(
|
|
|
2666
2697
|
# Check the cluster status again, since it could have been
|
|
2667
2698
|
# updated between our last check and acquiring the lock.
|
|
2668
2699
|
record = global_user_state.get_cluster_from_name(
|
|
2669
|
-
cluster_name
|
|
2700
|
+
cluster_name,
|
|
2701
|
+
include_user_info=include_user_info,
|
|
2702
|
+
summary_response=summary_response)
|
|
2670
2703
|
if record is None or not _must_refresh_cluster_status(
|
|
2671
2704
|
record, force_refresh_statuses):
|
|
2672
2705
|
return record
|
|
2673
2706
|
# Update and return the cluster status.
|
|
2674
|
-
return _update_cluster_status(cluster_name
|
|
2707
|
+
return _update_cluster_status(cluster_name,
|
|
2708
|
+
include_user_info,
|
|
2709
|
+
summary_response)
|
|
2675
2710
|
|
|
2676
2711
|
except locks.LockTimeout:
|
|
2677
2712
|
# lock.acquire() will throw a Timeout exception if the lock is not
|
|
@@ -2692,7 +2727,10 @@ def refresh_cluster_record(
|
|
|
2692
2727
|
time.sleep(lock.poll_interval)
|
|
2693
2728
|
|
|
2694
2729
|
# Refresh for next loop iteration.
|
|
2695
|
-
record = global_user_state.get_cluster_from_name(
|
|
2730
|
+
record = global_user_state.get_cluster_from_name(
|
|
2731
|
+
cluster_name,
|
|
2732
|
+
include_user_info=include_user_info,
|
|
2733
|
+
summary_response=summary_response)
|
|
2696
2734
|
if record is None:
|
|
2697
2735
|
return None
|
|
2698
2736
|
|
|
@@ -2717,7 +2755,9 @@ def refresh_cluster_status_handle(
|
|
|
2717
2755
|
cluster_name,
|
|
2718
2756
|
force_refresh_statuses=force_refresh_statuses,
|
|
2719
2757
|
acquire_per_cluster_status_lock=acquire_per_cluster_status_lock,
|
|
2720
|
-
cluster_status_lock_timeout=cluster_status_lock_timeout
|
|
2758
|
+
cluster_status_lock_timeout=cluster_status_lock_timeout,
|
|
2759
|
+
include_user_info=False,
|
|
2760
|
+
summary_response=True)
|
|
2721
2761
|
if record is None:
|
|
2722
2762
|
return None, None
|
|
2723
2763
|
return record['status'], record['handle']
|
|
@@ -2768,7 +2808,9 @@ def check_cluster_available(
|
|
|
2768
2808
|
exceptions.CloudUserIdentityError: if we fail to get the current user
|
|
2769
2809
|
identity.
|
|
2770
2810
|
"""
|
|
2771
|
-
record = global_user_state.get_cluster_from_name(cluster_name
|
|
2811
|
+
record = global_user_state.get_cluster_from_name(cluster_name,
|
|
2812
|
+
include_user_info=False,
|
|
2813
|
+
summary_response=True)
|
|
2772
2814
|
if dryrun:
|
|
2773
2815
|
assert record is not None, cluster_name
|
|
2774
2816
|
return record['handle']
|
|
@@ -2955,7 +2997,8 @@ def is_controller_accessible(
|
|
|
2955
2997
|
f'fatal, but {controller_name} commands/calls may hang or return '
|
|
2956
2998
|
'stale information, when the controller is not up.\n'
|
|
2957
2999
|
f' Details: {common_utils.format_exception(e, use_bracket=True)}')
|
|
2958
|
-
record = global_user_state.get_cluster_from_name(
|
|
3000
|
+
record = global_user_state.get_cluster_from_name(
|
|
3001
|
+
cluster_name, include_user_info=False, summary_response=True)
|
|
2959
3002
|
if record is not None:
|
|
2960
3003
|
controller_status, handle = record['status'], record['handle']
|
|
2961
3004
|
# We check the connection even if the cluster has a cached status UP
|
|
@@ -3012,22 +3055,98 @@ class CloudFilter(enum.Enum):
|
|
|
3012
3055
|
LOCAL = 'local'
|
|
3013
3056
|
|
|
3014
3057
|
|
|
3015
|
-
def _get_glob_clusters(
|
|
3058
|
+
def _get_glob_clusters(
|
|
3059
|
+
clusters: List[str],
|
|
3060
|
+
silent: bool = False,
|
|
3061
|
+
workspaces_filter: Optional[Dict[str, Any]] = None) -> List[str]:
|
|
3016
3062
|
"""Returns a list of clusters that match the glob pattern."""
|
|
3017
3063
|
glob_clusters = []
|
|
3018
3064
|
for cluster in clusters:
|
|
3019
|
-
glob_cluster = global_user_state.get_glob_cluster_names(
|
|
3065
|
+
glob_cluster = global_user_state.get_glob_cluster_names(
|
|
3066
|
+
cluster, workspaces_filter=workspaces_filter)
|
|
3020
3067
|
if len(glob_cluster) == 0 and not silent:
|
|
3021
3068
|
logger.info(f'Cluster {cluster} not found.')
|
|
3022
3069
|
glob_clusters.extend(glob_cluster)
|
|
3023
3070
|
return list(set(glob_clusters))
|
|
3024
3071
|
|
|
3025
3072
|
|
|
3073
|
+
def _refresh_cluster(
|
|
3074
|
+
cluster_name: str,
|
|
3075
|
+
force_refresh_statuses: Optional[Set[status_lib.ClusterStatus]],
|
|
3076
|
+
include_user_info: bool = True,
|
|
3077
|
+
summary_response: bool = False) -> Optional[Dict[str, Any]]:
|
|
3078
|
+
try:
|
|
3079
|
+
record = refresh_cluster_record(
|
|
3080
|
+
cluster_name,
|
|
3081
|
+
force_refresh_statuses=force_refresh_statuses,
|
|
3082
|
+
acquire_per_cluster_status_lock=True,
|
|
3083
|
+
include_user_info=include_user_info,
|
|
3084
|
+
summary_response=summary_response)
|
|
3085
|
+
except (exceptions.ClusterStatusFetchingError,
|
|
3086
|
+
exceptions.CloudUserIdentityError,
|
|
3087
|
+
exceptions.ClusterOwnerIdentityMismatchError) as e:
|
|
3088
|
+
# Do not fail the entire refresh process. The caller will
|
|
3089
|
+
# handle the 'UNKNOWN' status, and collect the errors into
|
|
3090
|
+
# a table.
|
|
3091
|
+
record = {'status': 'UNKNOWN', 'error': e}
|
|
3092
|
+
return record
|
|
3093
|
+
|
|
3094
|
+
|
|
3095
|
+
def refresh_cluster_records() -> None:
|
|
3096
|
+
"""Refreshes the status of all clusters, except managed clusters.
|
|
3097
|
+
|
|
3098
|
+
Used by the background status refresh daemon.
|
|
3099
|
+
This function is a stripped-down version of get_clusters, with only the
|
|
3100
|
+
bare bones refresh logic.
|
|
3101
|
+
|
|
3102
|
+
Returns:
|
|
3103
|
+
None
|
|
3104
|
+
|
|
3105
|
+
Raises:
|
|
3106
|
+
None
|
|
3107
|
+
"""
|
|
3108
|
+
exclude_managed_clusters = True
|
|
3109
|
+
if env_options.Options.SHOW_DEBUG_INFO.get():
|
|
3110
|
+
exclude_managed_clusters = False
|
|
3111
|
+
cluster_names = global_user_state.get_cluster_names(
|
|
3112
|
+
exclude_managed_clusters=exclude_managed_clusters,)
|
|
3113
|
+
|
|
3114
|
+
# TODO(syang): we should try not to leak
|
|
3115
|
+
# request info in backend_utils.py.
|
|
3116
|
+
# Refactor this to use some other info to
|
|
3117
|
+
# determine if a launch is in progress.
|
|
3118
|
+
request = requests_lib.get_request_tasks(
|
|
3119
|
+
req_filter=requests_lib.RequestTaskFilter(
|
|
3120
|
+
status=[requests_lib.RequestStatus.RUNNING],
|
|
3121
|
+
cluster_names=cluster_names,
|
|
3122
|
+
include_request_names=['sky.launch']))
|
|
3123
|
+
cluster_names_with_launch_request = {
|
|
3124
|
+
request.cluster_name for request in request
|
|
3125
|
+
}
|
|
3126
|
+
cluster_names_without_launch_request = [
|
|
3127
|
+
cluster_name for cluster_name in cluster_names
|
|
3128
|
+
if cluster_name not in cluster_names_with_launch_request
|
|
3129
|
+
]
|
|
3130
|
+
|
|
3131
|
+
def _refresh_cluster_record(cluster_name):
|
|
3132
|
+
return _refresh_cluster(cluster_name,
|
|
3133
|
+
force_refresh_statuses=set(
|
|
3134
|
+
status_lib.ClusterStatus),
|
|
3135
|
+
include_user_info=False,
|
|
3136
|
+
summary_response=True)
|
|
3137
|
+
|
|
3138
|
+
if len(cluster_names) > 0:
|
|
3139
|
+
# Do not refresh the clusters that have an active launch request.
|
|
3140
|
+
subprocess_utils.run_in_parallel(_refresh_cluster_record,
|
|
3141
|
+
cluster_names_without_launch_request)
|
|
3142
|
+
|
|
3143
|
+
|
|
3026
3144
|
def get_clusters(
|
|
3027
3145
|
refresh: common.StatusRefreshMode,
|
|
3028
3146
|
cluster_names: Optional[Union[str, List[str]]] = None,
|
|
3029
3147
|
all_users: bool = True,
|
|
3030
3148
|
include_credentials: bool = False,
|
|
3149
|
+
summary_response: bool = False,
|
|
3031
3150
|
# Internal only:
|
|
3032
3151
|
# pylint: disable=invalid-name
|
|
3033
3152
|
_include_is_managed: bool = False,
|
|
@@ -3055,10 +3174,23 @@ def get_clusters(
|
|
|
3055
3174
|
A list of cluster records. If the cluster does not exist or has been
|
|
3056
3175
|
terminated, the record will be omitted from the returned list.
|
|
3057
3176
|
"""
|
|
3177
|
+
accessible_workspaces = workspaces_core.get_workspaces()
|
|
3058
3178
|
if cluster_names is not None:
|
|
3059
3179
|
if isinstance(cluster_names, str):
|
|
3060
3180
|
cluster_names = [cluster_names]
|
|
3061
|
-
|
|
3181
|
+
non_glob_cluster_names = []
|
|
3182
|
+
glob_cluster_names = []
|
|
3183
|
+
for cluster_name in cluster_names:
|
|
3184
|
+
if ux_utils.is_glob_pattern(cluster_name):
|
|
3185
|
+
glob_cluster_names.append(cluster_name)
|
|
3186
|
+
else:
|
|
3187
|
+
non_glob_cluster_names.append(cluster_name)
|
|
3188
|
+
cluster_names = non_glob_cluster_names
|
|
3189
|
+
if glob_cluster_names:
|
|
3190
|
+
cluster_names += _get_glob_clusters(
|
|
3191
|
+
glob_cluster_names,
|
|
3192
|
+
silent=True,
|
|
3193
|
+
workspaces_filter=accessible_workspaces)
|
|
3062
3194
|
|
|
3063
3195
|
exclude_managed_clusters = False
|
|
3064
3196
|
if not (_include_is_managed or env_options.Options.SHOW_DEBUG_INFO.get()):
|
|
@@ -3066,13 +3198,12 @@ def get_clusters(
|
|
|
3066
3198
|
user_hashes_filter = None
|
|
3067
3199
|
if not all_users:
|
|
3068
3200
|
user_hashes_filter = {common_utils.get_current_user().id}
|
|
3069
|
-
accessible_workspaces = workspaces_core.get_workspaces()
|
|
3070
3201
|
records = global_user_state.get_clusters(
|
|
3071
3202
|
exclude_managed_clusters=exclude_managed_clusters,
|
|
3072
3203
|
user_hashes_filter=user_hashes_filter,
|
|
3073
3204
|
workspaces_filter=accessible_workspaces,
|
|
3074
3205
|
cluster_names=cluster_names,
|
|
3075
|
-
|
|
3206
|
+
summary_response=summary_response)
|
|
3076
3207
|
|
|
3077
3208
|
yellow = colorama.Fore.YELLOW
|
|
3078
3209
|
bright = colorama.Style.BRIGHT
|
|
@@ -3080,12 +3211,10 @@ def get_clusters(
|
|
|
3080
3211
|
|
|
3081
3212
|
if cluster_names is not None:
|
|
3082
3213
|
record_names = {record['name'] for record in records}
|
|
3083
|
-
|
|
3084
|
-
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
if not_exist_cluster_names:
|
|
3088
|
-
clusters_str = ', '.join(not_exist_cluster_names)
|
|
3214
|
+
not_found_clusters = ux_utils.get_non_matched_query(
|
|
3215
|
+
cluster_names, record_names)
|
|
3216
|
+
if not_found_clusters:
|
|
3217
|
+
clusters_str = ', '.join(not_found_clusters)
|
|
3089
3218
|
logger.info(f'Cluster(s) not found: {bright}{clusters_str}{reset}.')
|
|
3090
3219
|
|
|
3091
3220
|
def _get_records_with_handle(
|
|
@@ -3096,7 +3225,7 @@ def get_clusters(
|
|
|
3096
3225
|
if record is not None and record['handle'] is not None
|
|
3097
3226
|
]
|
|
3098
3227
|
|
|
3099
|
-
def
|
|
3228
|
+
def _update_records_with_handle_info(
|
|
3100
3229
|
records: List[Optional[Dict[str, Any]]]) -> None:
|
|
3101
3230
|
"""Add resource str to record"""
|
|
3102
3231
|
for record in _get_records_with_handle(records):
|
|
@@ -3107,6 +3236,8 @@ def get_clusters(
|
|
|
3107
3236
|
record[
|
|
3108
3237
|
'resources_str_full'] = resources_utils.get_readable_resources_repr(
|
|
3109
3238
|
handle, simplify=False)
|
|
3239
|
+
if not summary_response:
|
|
3240
|
+
record['cluster_name_on_cloud'] = handle.cluster_name_on_cloud
|
|
3110
3241
|
|
|
3111
3242
|
def _update_records_with_credentials(
|
|
3112
3243
|
records: List[Optional[Dict[str, Any]]]) -> None:
|
|
@@ -3146,7 +3277,7 @@ def get_clusters(
|
|
|
3146
3277
|
record['credentials'] = credential
|
|
3147
3278
|
|
|
3148
3279
|
def _update_records_with_resources(
|
|
3149
|
-
|
|
3280
|
+
records: List[Optional[Dict[str, Any]]],) -> None:
|
|
3150
3281
|
"""Add the resources to the record."""
|
|
3151
3282
|
for record in _get_records_with_handle(records):
|
|
3152
3283
|
handle = record['handle']
|
|
@@ -3165,8 +3296,8 @@ def get_clusters(
|
|
|
3165
3296
|
f'{handle.launched_resources.accelerators}'
|
|
3166
3297
|
if handle.launched_resources.accelerators else None)
|
|
3167
3298
|
|
|
3168
|
-
# Add
|
|
3169
|
-
|
|
3299
|
+
# Add handle info to the records
|
|
3300
|
+
_update_records_with_handle_info(records)
|
|
3170
3301
|
if include_credentials:
|
|
3171
3302
|
_update_records_with_credentials(records)
|
|
3172
3303
|
if refresh == common.StatusRefreshMode.NONE:
|
|
@@ -3187,47 +3318,44 @@ def get_clusters(
|
|
|
3187
3318
|
else:
|
|
3188
3319
|
force_refresh_statuses = None
|
|
3189
3320
|
|
|
3190
|
-
def
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
status=[requests_lib.RequestStatus.RUNNING],
|
|
3198
|
-
cluster_names=[cluster_name],
|
|
3199
|
-
include_request_names=['sky.launch']))
|
|
3200
|
-
if len(request) > 0:
|
|
3201
|
-
# There is an active launch request on the cluster,
|
|
3202
|
-
# so we don't want to update the cluster status until
|
|
3203
|
-
# the request is completed.
|
|
3204
|
-
logger.debug(f'skipping refresh for cluster {cluster_name} '
|
|
3205
|
-
'as there is an active launch request')
|
|
3206
|
-
return global_user_state.get_cluster_from_name(cluster_name)
|
|
3207
|
-
try:
|
|
3208
|
-
record = refresh_cluster_record(
|
|
3209
|
-
cluster_name,
|
|
3210
|
-
force_refresh_statuses=force_refresh_statuses,
|
|
3211
|
-
acquire_per_cluster_status_lock=True)
|
|
3212
|
-
_update_records_with_resources_str([record])
|
|
3321
|
+
def _refresh_cluster_record(cluster_name):
|
|
3322
|
+
record = _refresh_cluster(cluster_name,
|
|
3323
|
+
force_refresh_statuses=force_refresh_statuses,
|
|
3324
|
+
include_user_info=True,
|
|
3325
|
+
summary_response=summary_response)
|
|
3326
|
+
if 'error' not in record:
|
|
3327
|
+
_update_records_with_handle_info([record])
|
|
3213
3328
|
if include_credentials:
|
|
3214
3329
|
_update_records_with_credentials([record])
|
|
3215
|
-
|
|
3216
|
-
exceptions.CloudUserIdentityError,
|
|
3217
|
-
exceptions.ClusterOwnerIdentityMismatchError) as e:
|
|
3218
|
-
# Do not fail the entire refresh process. The caller will
|
|
3219
|
-
# handle the 'UNKNOWN' status, and collect the errors into
|
|
3220
|
-
# a table.
|
|
3221
|
-
record = {'status': 'UNKNOWN', 'error': e}
|
|
3222
|
-
progress.update(task, advance=1)
|
|
3330
|
+
progress.update(task, advance=1)
|
|
3223
3331
|
return record
|
|
3224
3332
|
|
|
3225
3333
|
cluster_names = [record['name'] for record in records]
|
|
3226
|
-
|
|
3227
|
-
|
|
3334
|
+
# TODO(syang): we should try not to leak
|
|
3335
|
+
# request info in backend_utils.py.
|
|
3336
|
+
# Refactor this to use some other info to
|
|
3337
|
+
# determine if a launch is in progress.
|
|
3338
|
+
request = requests_lib.get_request_tasks(
|
|
3339
|
+
req_filter=requests_lib.RequestTaskFilter(
|
|
3340
|
+
status=[requests_lib.RequestStatus.RUNNING],
|
|
3341
|
+
cluster_names=cluster_names,
|
|
3342
|
+
include_request_names=['sky.launch']))
|
|
3343
|
+
cluster_names_with_launch_request = {
|
|
3344
|
+
request.cluster_name for request in request
|
|
3345
|
+
}
|
|
3346
|
+
cluster_names_without_launch_request = [
|
|
3347
|
+
cluster_name for cluster_name in cluster_names
|
|
3348
|
+
if cluster_name not in cluster_names_with_launch_request
|
|
3349
|
+
]
|
|
3350
|
+
# for clusters that have an active launch request, we do not refresh the status
|
|
3351
|
+
updated_records = [
|
|
3352
|
+
record for record in records
|
|
3353
|
+
if record['name'] in cluster_names_with_launch_request
|
|
3354
|
+
]
|
|
3355
|
+
if len(cluster_names_without_launch_request) > 0:
|
|
3228
3356
|
with progress:
|
|
3229
3357
|
updated_records = subprocess_utils.run_in_parallel(
|
|
3230
|
-
|
|
3358
|
+
_refresh_cluster_record, cluster_names_without_launch_request)
|
|
3231
3359
|
|
|
3232
3360
|
# Show information for removed clusters.
|
|
3233
3361
|
kept_records = []
|
|
@@ -116,6 +116,9 @@ Path = str
|
|
|
116
116
|
|
|
117
117
|
SKY_REMOTE_APP_DIR = backend_utils.SKY_REMOTE_APP_DIR
|
|
118
118
|
SKY_REMOTE_WORKDIR = constants.SKY_REMOTE_WORKDIR
|
|
119
|
+
# Unset RAY_RAYLET_PID to prevent the Ray cluster in the SkyPilot runtime
|
|
120
|
+
# from interfering with the Ray cluster in the user's task (if any).
|
|
121
|
+
UNSET_RAY_ENV_VARS = ['RAY_RAYLET_PID']
|
|
119
122
|
|
|
120
123
|
logger = sky_logging.init_logger(__name__)
|
|
121
124
|
|
|
@@ -712,6 +715,8 @@ class RayCodeGen:
|
|
|
712
715
|
done
|
|
713
716
|
echo "skypilot: cached mount uploaded complete"
|
|
714
717
|
fi""")
|
|
718
|
+
unset_ray_env_vars = ' && '.join(
|
|
719
|
+
[f'unset {var}' for var in UNSET_RAY_ENV_VARS])
|
|
715
720
|
self._code += [
|
|
716
721
|
sky_env_vars_dict_str,
|
|
717
722
|
textwrap.dedent(f"""\
|
|
@@ -721,6 +726,7 @@ class RayCodeGen:
|
|
|
721
726
|
script = run_fn({gang_scheduling_id}, gang_scheduling_id_to_ip)
|
|
722
727
|
|
|
723
728
|
if script is not None:
|
|
729
|
+
script=f'{unset_ray_env_vars}; {{script}}'
|
|
724
730
|
script += rclone_flush_script
|
|
725
731
|
sky_env_vars_dict['{constants.SKYPILOT_NUM_GPUS_PER_NODE}'] = {int(math.ceil(num_gpus))!r}
|
|
726
732
|
|
|
@@ -3261,9 +3267,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
3261
3267
|
# Usage Collection:
|
|
3262
3268
|
usage_lib.messages.usage.update_cluster_resources(
|
|
3263
3269
|
handle.launched_nodes, launched_resources)
|
|
3264
|
-
|
|
3265
|
-
if
|
|
3266
|
-
usage_lib.messages.usage.update_cluster_status(
|
|
3270
|
+
status = global_user_state.get_status_from_cluster_name(cluster_name)
|
|
3271
|
+
if status is not None:
|
|
3272
|
+
usage_lib.messages.usage.update_cluster_status(status)
|
|
3267
3273
|
|
|
3268
3274
|
assert launched_resources.region is not None, handle
|
|
3269
3275
|
|
|
@@ -3532,8 +3538,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
3532
3538
|
error_message + '\n' + str(e),
|
|
3533
3539
|
failover_history=e.failover_history) from None
|
|
3534
3540
|
if dryrun:
|
|
3535
|
-
|
|
3536
|
-
|
|
3541
|
+
handle = global_user_state.get_handle_from_cluster_name(
|
|
3542
|
+
cluster_name)
|
|
3543
|
+
return handle if handle is not None else None, False
|
|
3537
3544
|
|
|
3538
3545
|
if config_dict['provisioning_skipped']:
|
|
3539
3546
|
# Skip further provisioning.
|
|
@@ -3541,10 +3548,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
3541
3548
|
# ('handle', 'provision_record', 'resources_vars')
|
|
3542
3549
|
# We need to return the handle - but it should be the existing
|
|
3543
3550
|
# handle for the cluster.
|
|
3544
|
-
|
|
3545
|
-
|
|
3546
|
-
|
|
3547
|
-
return
|
|
3551
|
+
handle = global_user_state.get_handle_from_cluster_name(
|
|
3552
|
+
cluster_name)
|
|
3553
|
+
assert handle is not None, (cluster_name, handle)
|
|
3554
|
+
return handle, True
|
|
3548
3555
|
|
|
3549
3556
|
if 'provision_record' in config_dict:
|
|
3550
3557
|
# New provisioner is used here.
|
|
@@ -3939,6 +3946,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
3939
3946
|
remote_setup_file_name = f'/tmp/sky_setup_{self.run_timestamp}'
|
|
3940
3947
|
# Need this `-i` option to make sure `source ~/.bashrc` work
|
|
3941
3948
|
setup_cmd = f'/bin/bash -i {remote_setup_file_name} 2>&1'
|
|
3949
|
+
unset_ray_env_vars = ' && '.join(
|
|
3950
|
+
[f'unset {var}' for var in UNSET_RAY_ENV_VARS])
|
|
3951
|
+
setup_cmd = f'{unset_ray_env_vars}; {setup_cmd}'
|
|
3942
3952
|
runners = handle.get_command_runners(avoid_ssh_control=True)
|
|
3943
3953
|
|
|
3944
3954
|
def _setup_node(node_id: int) -> None:
|
|
@@ -4088,6 +4098,18 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
4088
4098
|
logger.info(
|
|
4089
4099
|
ux_utils.finishing_message('Setup completed.', setup_log_path))
|
|
4090
4100
|
|
|
4101
|
+
def _download_file(self, handle: CloudVmRayResourceHandle,
|
|
4102
|
+
local_file_path: str, remote_file_path: str) -> None:
|
|
4103
|
+
"""Syncs file from remote to local."""
|
|
4104
|
+
runners = handle.get_command_runners()
|
|
4105
|
+
head_runner = runners[0]
|
|
4106
|
+
head_runner.rsync(
|
|
4107
|
+
source=local_file_path,
|
|
4108
|
+
target=remote_file_path,
|
|
4109
|
+
up=False,
|
|
4110
|
+
stream_logs=False,
|
|
4111
|
+
)
|
|
4112
|
+
|
|
4091
4113
|
def _exec_code_on_head(
|
|
4092
4114
|
self,
|
|
4093
4115
|
handle: CloudVmRayResourceHandle,
|
|
@@ -4992,10 +5014,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
4992
5014
|
f'{handle.cluster_name!r}. Assuming the cluster is still '
|
|
4993
5015
|
'up.')
|
|
4994
5016
|
if not cluster_status_fetched:
|
|
4995
|
-
|
|
5017
|
+
status = global_user_state.get_status_from_cluster_name(
|
|
4996
5018
|
handle.cluster_name)
|
|
4997
|
-
prev_cluster_status =
|
|
4998
|
-
'status'] if record is not None else None
|
|
5019
|
+
prev_cluster_status = status if status is not None else None
|
|
4999
5020
|
if prev_cluster_status is None:
|
|
5000
5021
|
# When the cluster is not in the cluster table, we guarantee that
|
|
5001
5022
|
# all related resources / cache / config are cleaned up, i.e. it
|
|
@@ -5568,7 +5589,8 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
5568
5589
|
exceptions.InvalidClusterNameError: If the cluster name is invalid.
|
|
5569
5590
|
# TODO(zhwu): complete the list of exceptions.
|
|
5570
5591
|
"""
|
|
5571
|
-
record = global_user_state.get_cluster_from_name(
|
|
5592
|
+
record = global_user_state.get_cluster_from_name(
|
|
5593
|
+
cluster_name, include_user_info=False, summary_response=True)
|
|
5572
5594
|
if record is None:
|
|
5573
5595
|
handle_before_refresh = None
|
|
5574
5596
|
status_before_refresh = None
|
|
@@ -5589,6 +5611,8 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
5589
5611
|
cluster_name,
|
|
5590
5612
|
force_refresh_statuses={status_lib.ClusterStatus.INIT},
|
|
5591
5613
|
acquire_per_cluster_status_lock=False,
|
|
5614
|
+
include_user_info=False,
|
|
5615
|
+
summary_response=True,
|
|
5592
5616
|
)
|
|
5593
5617
|
if record is not None:
|
|
5594
5618
|
prev_cluster_status = record['status']
|