skypilot-nightly 1.0.0.dev20251016__py3-none-any.whl → 1.0.0.dev20251018__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/authentication.py +17 -157
- sky/backends/backend_utils.py +6 -5
- sky/catalog/kubernetes_catalog.py +5 -3
- sky/client/cli/command.py +0 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/fluidstack/instance.py +2 -2
- sky/provision/seeweb/instance.py +3 -3
- sky/serve/service.py +2 -2
- sky/server/requests/preconditions.py +2 -2
- sky/server/requests/requests.py +32 -24
- sky/server/server.py +4 -5
- sky/server/stream_utils.py +10 -3
- sky/setup_files/dependencies.py +19 -8
- sky/utils/auth_utils.py +153 -0
- sky/utils/command_runner.py +3 -0
- sky/utils/locks.py +5 -2
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/METADATA +277 -48
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/RECORD +40 -39
- /sky/dashboard/out/_next/static/{pbgtEUoCUdmJyLHjgln5A → Rn37hj-nuHOYT-HwxSDXC}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{pbgtEUoCUdmJyLHjgln5A → Rn37hj-nuHOYT-HwxSDXC}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251016.dist-info → skypilot_nightly-1.0.0.dev20251018.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/Rn37hj-nuHOYT-HwxSDXC/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Rn37hj-nuHOYT-HwxSDXC/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"Rn37hj-nuHOYT-HwxSDXC","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/Rn37hj-nuHOYT-HwxSDXC/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Rn37hj-nuHOYT-HwxSDXC/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"Rn37hj-nuHOYT-HwxSDXC","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -3,11 +3,11 @@ import os
|
|
|
3
3
|
import time
|
|
4
4
|
from typing import Any, Dict, List, Optional, Tuple
|
|
5
5
|
|
|
6
|
-
from sky import authentication as auth
|
|
7
6
|
from sky import exceptions
|
|
8
7
|
from sky import sky_logging
|
|
9
8
|
from sky.provision import common
|
|
10
9
|
from sky.provision.fluidstack import fluidstack_utils as utils
|
|
10
|
+
from sky.utils import auth_utils
|
|
11
11
|
from sky.utils import command_runner
|
|
12
12
|
from sky.utils import common_utils
|
|
13
13
|
from sky.utils import status_lib
|
|
@@ -27,7 +27,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
27
27
|
def get_internal_ip(node_info: Dict[str, Any]) -> None:
|
|
28
28
|
node_info['internal_ip'] = node_info['ip_address']
|
|
29
29
|
|
|
30
|
-
private_key_path, _ =
|
|
30
|
+
private_key_path, _ = auth_utils.get_or_generate_keys()
|
|
31
31
|
runner = command_runner.SSHCommandRunner(
|
|
32
32
|
(node_info['ip_address'], 22),
|
|
33
33
|
ssh_user='ubuntu',
|
sky/provision/seeweb/instance.py
CHANGED
|
@@ -9,7 +9,6 @@ import subprocess
|
|
|
9
9
|
import time
|
|
10
10
|
from typing import Any, Dict, List, Optional, Tuple
|
|
11
11
|
|
|
12
|
-
from sky import authentication as auth
|
|
13
12
|
from sky import sky_logging
|
|
14
13
|
from sky.adaptors import seeweb as seeweb_adaptor
|
|
15
14
|
from sky.provision import common
|
|
@@ -17,6 +16,7 @@ from sky.provision.common import ClusterInfo
|
|
|
17
16
|
from sky.provision.common import InstanceInfo
|
|
18
17
|
from sky.provision.common import ProvisionConfig
|
|
19
18
|
from sky.provision.common import ProvisionRecord
|
|
19
|
+
from sky.utils import auth_utils
|
|
20
20
|
from sky.utils import command_runner # Unified SSH helper
|
|
21
21
|
from sky.utils import common_utils
|
|
22
22
|
from sky.utils import status_lib
|
|
@@ -75,7 +75,7 @@ class SeewebNodeProvider:
|
|
|
75
75
|
if self.config and self.config.authentication_config:
|
|
76
76
|
key_path = self.config.authentication_config.get('ssh_private_key')
|
|
77
77
|
if not key_path:
|
|
78
|
-
key_path, _ =
|
|
78
|
+
key_path, _ = auth_utils.get_or_generate_keys()
|
|
79
79
|
return os.path.expanduser(key_path)
|
|
80
80
|
|
|
81
81
|
# ------------------------------------------------------------------ #
|
|
@@ -661,7 +661,7 @@ def _ping_server_standalone(server_ip: str) -> bool:
|
|
|
661
661
|
def _check_ssh_ready_standalone(server_ip: str) -> bool:
|
|
662
662
|
"""Check that SSH is available on the server (standalone version)."""
|
|
663
663
|
try:
|
|
664
|
-
private_key_path, _ =
|
|
664
|
+
private_key_path, _ = auth_utils.get_or_generate_keys()
|
|
665
665
|
private_key_path = os.path.expanduser(private_key_path)
|
|
666
666
|
ssh_user = 'ecuser'
|
|
667
667
|
result = subprocess.run([
|
sky/serve/service.py
CHANGED
|
@@ -13,7 +13,6 @@ from typing import Dict
|
|
|
13
13
|
|
|
14
14
|
import filelock
|
|
15
15
|
|
|
16
|
-
from sky import authentication
|
|
17
16
|
from sky import exceptions
|
|
18
17
|
from sky import global_user_state
|
|
19
18
|
from sky import sky_logging
|
|
@@ -28,6 +27,7 @@ from sky.serve import replica_managers
|
|
|
28
27
|
from sky.serve import serve_state
|
|
29
28
|
from sky.serve import serve_utils
|
|
30
29
|
from sky.skylet import constants as skylet_constants
|
|
30
|
+
from sky.utils import auth_utils
|
|
31
31
|
from sky.utils import common_utils
|
|
32
32
|
from sky.utils import controller_utils
|
|
33
33
|
from sky.utils import subprocess_utils
|
|
@@ -227,7 +227,7 @@ def _start(service_name: str, tmp_task_yaml: str, job_id: int, entrypoint: str):
|
|
|
227
227
|
"""
|
|
228
228
|
# Generate ssh key pair to avoid race condition when multiple sky.launch
|
|
229
229
|
# are executed at the same time.
|
|
230
|
-
|
|
230
|
+
auth_utils.get_or_generate_keys()
|
|
231
231
|
|
|
232
232
|
# Initialize database record for the service.
|
|
233
233
|
task = task_lib.Task.from_yaml(tmp_task_yaml)
|
|
@@ -162,8 +162,8 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
162
162
|
requests = await api_requests.get_request_tasks_async(
|
|
163
163
|
req_filter=api_requests.RequestTaskFilter(
|
|
164
164
|
status=[
|
|
165
|
-
api_requests.RequestStatus.
|
|
166
|
-
api_requests.RequestStatus.
|
|
165
|
+
api_requests.RequestStatus.PENDING,
|
|
166
|
+
api_requests.RequestStatus.RUNNING
|
|
167
167
|
],
|
|
168
168
|
include_request_names=['sky.launch', 'sky.start'],
|
|
169
169
|
cluster_names=[self.cluster_name]))
|
sky/server/requests/requests.py
CHANGED
|
@@ -398,9 +398,9 @@ def kill_cluster_requests(cluster_name: str, exclude_request_name: str):
|
|
|
398
398
|
request_ids = [
|
|
399
399
|
request_task.request_id
|
|
400
400
|
for request_task in get_request_tasks(req_filter=RequestTaskFilter(
|
|
401
|
-
cluster_names=[cluster_name],
|
|
402
401
|
status=[RequestStatus.PENDING, RequestStatus.RUNNING],
|
|
403
|
-
exclude_request_names=[exclude_request_name]
|
|
402
|
+
exclude_request_names=[exclude_request_name],
|
|
403
|
+
cluster_names=[cluster_name]))
|
|
404
404
|
]
|
|
405
405
|
kill_requests(request_ids)
|
|
406
406
|
|
|
@@ -422,10 +422,10 @@ def kill_requests(request_ids: Optional[List[str]] = None,
|
|
|
422
422
|
request_ids = [
|
|
423
423
|
request_task.request_id
|
|
424
424
|
for request_task in get_request_tasks(req_filter=RequestTaskFilter(
|
|
425
|
-
|
|
426
|
-
status=[RequestStatus.RUNNING, RequestStatus.PENDING],
|
|
425
|
+
status=[RequestStatus.PENDING, RequestStatus.RUNNING],
|
|
427
426
|
# Avoid cancelling the cancel request itself.
|
|
428
|
-
exclude_request_names=['sky.api_cancel']
|
|
427
|
+
exclude_request_names=['sky.api_cancel'],
|
|
428
|
+
user_id=user_id))
|
|
429
429
|
]
|
|
430
430
|
cancelled_request_ids = []
|
|
431
431
|
for request_id in request_ids:
|
|
@@ -497,6 +497,21 @@ def create_table(cursor, conn):
|
|
|
497
497
|
db_utils.add_column_to_table(cursor, conn, REQUEST_TABLE, COL_FINISHED_AT,
|
|
498
498
|
'REAL')
|
|
499
499
|
|
|
500
|
+
# Add an index on (status, name) to speed up queries
|
|
501
|
+
# that filter on these columns.
|
|
502
|
+
cursor.execute(f"""\
|
|
503
|
+
CREATE INDEX IF NOT EXISTS status_name_idx ON {REQUEST_TABLE} (status, name) WHERE status IN ('PENDING', 'RUNNING');
|
|
504
|
+
""")
|
|
505
|
+
# Add an index on cluster_name to speed up queries
|
|
506
|
+
# that filter on this column.
|
|
507
|
+
cursor.execute(f"""\
|
|
508
|
+
CREATE INDEX IF NOT EXISTS cluster_name_idx ON {REQUEST_TABLE} ({COL_CLUSTER_NAME}) WHERE status IN ('PENDING', 'RUNNING');
|
|
509
|
+
""")
|
|
510
|
+
# Add an index on created_at to speed up queries that sort on this column.
|
|
511
|
+
cursor.execute(f"""\
|
|
512
|
+
CREATE INDEX IF NOT EXISTS created_at_idx ON {REQUEST_TABLE} (created_at);
|
|
513
|
+
""")
|
|
514
|
+
|
|
500
515
|
|
|
501
516
|
_DB = None
|
|
502
517
|
_init_db_lock = threading.Lock()
|
|
@@ -753,6 +768,10 @@ class RequestTaskFilter:
|
|
|
753
768
|
status_list_str = ','.join(
|
|
754
769
|
repr(status.value) for status in self.status)
|
|
755
770
|
filters.append(f'status IN ({status_list_str})')
|
|
771
|
+
if self.include_request_names is not None:
|
|
772
|
+
request_names_str = ','.join(
|
|
773
|
+
repr(name) for name in self.include_request_names)
|
|
774
|
+
filters.append(f'name IN ({request_names_str})')
|
|
756
775
|
if self.exclude_request_names is not None:
|
|
757
776
|
exclude_request_names_str = ','.join(
|
|
758
777
|
repr(name) for name in self.exclude_request_names)
|
|
@@ -764,10 +783,6 @@ class RequestTaskFilter:
|
|
|
764
783
|
if self.user_id is not None:
|
|
765
784
|
filters.append(f'{COL_USER_ID} = ?')
|
|
766
785
|
filter_params.append(self.user_id)
|
|
767
|
-
if self.include_request_names is not None:
|
|
768
|
-
request_names_str = ','.join(
|
|
769
|
-
repr(name) for name in self.include_request_names)
|
|
770
|
-
filters.append(f'name IN ({request_names_str})')
|
|
771
786
|
if self.finished_before is not None:
|
|
772
787
|
filters.append('finished_at < ?')
|
|
773
788
|
filter_params.append(self.finished_before)
|
|
@@ -800,6 +815,10 @@ def get_request_tasks(req_filter: RequestTaskFilter) -> List[Request]:
|
|
|
800
815
|
rows = cursor.fetchall()
|
|
801
816
|
if rows is None:
|
|
802
817
|
return []
|
|
818
|
+
if req_filter.fields:
|
|
819
|
+
rows = [
|
|
820
|
+
_update_request_row_fields(row, req_filter.fields) for row in rows
|
|
821
|
+
]
|
|
803
822
|
return [Request.from_row(row) for row in rows]
|
|
804
823
|
|
|
805
824
|
|
|
@@ -812,21 +831,10 @@ async def get_request_tasks_async(
|
|
|
812
831
|
async with _DB.execute_fetchall_async(*req_filter.build_query()) as rows:
|
|
813
832
|
if not rows:
|
|
814
833
|
return []
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
@metrics_lib.time_me_async
|
|
820
|
-
async def get_request_tasks_with_fields_async(
|
|
821
|
-
req_filter: RequestTaskFilter,
|
|
822
|
-
fields: Optional[List[str]] = None,
|
|
823
|
-
) -> List[Request]:
|
|
824
|
-
"""Async version of get_request_tasks."""
|
|
825
|
-
assert _DB is not None
|
|
826
|
-
async with _DB.execute_fetchall_async(*req_filter.build_query()) as rows:
|
|
827
|
-
if not rows:
|
|
828
|
-
return []
|
|
829
|
-
rows = [_update_request_row_fields(row, fields) for row in rows]
|
|
834
|
+
if req_filter.fields:
|
|
835
|
+
rows = [
|
|
836
|
+
_update_request_row_fields(row, req_filter.fields) for row in rows
|
|
837
|
+
]
|
|
830
838
|
return [Request.from_row(row) for row in rows]
|
|
831
839
|
|
|
832
840
|
|
sky/server/server.py
CHANGED
|
@@ -1667,14 +1667,12 @@ async def api_status(
|
|
|
1667
1667
|
requests_lib.RequestStatus.PENDING,
|
|
1668
1668
|
requests_lib.RequestStatus.RUNNING,
|
|
1669
1669
|
]
|
|
1670
|
-
request_tasks = await requests_lib.
|
|
1670
|
+
request_tasks = await requests_lib.get_request_tasks_async(
|
|
1671
1671
|
req_filter=requests_lib.RequestTaskFilter(
|
|
1672
1672
|
status=statuses,
|
|
1673
1673
|
limit=limit,
|
|
1674
1674
|
fields=fields,
|
|
1675
|
-
)
|
|
1676
|
-
fields=fields,
|
|
1677
|
-
)
|
|
1675
|
+
))
|
|
1678
1676
|
return requests_lib.encode_requests(request_tasks)
|
|
1679
1677
|
else:
|
|
1680
1678
|
encoded_request_tasks = []
|
|
@@ -2058,7 +2056,8 @@ if __name__ == '__main__':
|
|
|
2058
2056
|
uvicorn_config = uvicorn.Config('sky.server.server:app',
|
|
2059
2057
|
host=cmd_args.host,
|
|
2060
2058
|
port=cmd_args.port,
|
|
2061
|
-
workers=num_workers
|
|
2059
|
+
workers=num_workers,
|
|
2060
|
+
ws_per_message_deflate=False)
|
|
2062
2061
|
skyuvicorn.run(uvicorn_config,
|
|
2063
2062
|
max_db_connections=config.num_db_connections_per_worker)
|
|
2064
2063
|
except Exception as exc: # pylint: disable=broad-except
|
sky/server/stream_utils.py
CHANGED
|
@@ -215,11 +215,18 @@ async def _tail_log_file(
|
|
|
215
215
|
# periodically to see if provisioning is done.
|
|
216
216
|
if cluster_name is not None and should_check_status:
|
|
217
217
|
last_status_check_time = current_time
|
|
218
|
-
|
|
218
|
+
cluster_status = await (
|
|
219
219
|
global_user_state.get_status_from_cluster_name_async(
|
|
220
220
|
cluster_name))
|
|
221
|
-
if
|
|
222
|
-
|
|
221
|
+
if cluster_status is None:
|
|
222
|
+
logger.debug(
|
|
223
|
+
'Stop tailing provision logs for cluster'
|
|
224
|
+
f' status for cluster {cluster_name} not found')
|
|
225
|
+
break
|
|
226
|
+
if cluster_status != status_lib.ClusterStatus.INIT:
|
|
227
|
+
logger.debug(f'Stop tailing provision logs for cluster'
|
|
228
|
+
f' {cluster_name} has status {cluster_status} '
|
|
229
|
+
'(not in INIT state)')
|
|
223
230
|
break
|
|
224
231
|
if current_time - last_heartbeat_time >= _HEARTBEAT_INTERVAL:
|
|
225
232
|
# Currently just used to keep the connection busy, refer to
|
sky/setup_files/dependencies.py
CHANGED
|
@@ -86,7 +86,6 @@ install_requires = [
|
|
|
86
86
|
'types-paramiko',
|
|
87
87
|
'alembic',
|
|
88
88
|
'aiohttp',
|
|
89
|
-
'aiosqlite',
|
|
90
89
|
'anyio',
|
|
91
90
|
]
|
|
92
91
|
|
|
@@ -104,6 +103,10 @@ GRPC = 'grpcio>=1.63.0'
|
|
|
104
103
|
PROTOBUF = 'protobuf>=5.26.1, < 7.0.0'
|
|
105
104
|
|
|
106
105
|
server_dependencies = [
|
|
106
|
+
# TODO: Some of these dependencies are also specified in install_requires,
|
|
107
|
+
# so they are redundant here. We should figure out if they are only needed
|
|
108
|
+
# on the server (should remove from install_requires), or if they are needed
|
|
109
|
+
# on the client (should remove from here).
|
|
107
110
|
'casbin',
|
|
108
111
|
'sqlalchemy_adapter',
|
|
109
112
|
'passlib',
|
|
@@ -148,7 +151,7 @@ aws_dependencies = [
|
|
|
148
151
|
# a few places.
|
|
149
152
|
AZURE_CLI = 'azure-cli>=2.65.0'
|
|
150
153
|
|
|
151
|
-
|
|
154
|
+
cloud_dependencies: Dict[str, List[str]] = {
|
|
152
155
|
'aws': aws_dependencies,
|
|
153
156
|
# TODO(zongheng): azure-cli is huge and takes a long time to install.
|
|
154
157
|
# Tracked in: https://github.com/Azure/azure-cli/issues/7387
|
|
@@ -191,7 +194,6 @@ extras_require: Dict[str, List[str]] = {
|
|
|
191
194
|
'kubernetes>=20.0.0,!=32.0.0', 'websockets', 'python-dateutil'
|
|
192
195
|
],
|
|
193
196
|
'ssh': ['kubernetes>=20.0.0,!=32.0.0', 'websockets', 'python-dateutil'],
|
|
194
|
-
'remote': remote,
|
|
195
197
|
# For the container registry auth api. Reference:
|
|
196
198
|
# https://github.com/runpod/runpod-python/releases/tag/1.6.1
|
|
197
199
|
# RunPod needs a TOML parser to read ~/.runpod/config.toml. On Python 3.11+
|
|
@@ -221,13 +223,11 @@ extras_require: Dict[str, List[str]] = {
|
|
|
221
223
|
] + aws_dependencies,
|
|
222
224
|
'hyperbolic': [], # No dependencies needed for hyperbolic
|
|
223
225
|
'seeweb': ['ecsapi>=0.2.0'],
|
|
224
|
-
'server': server_dependencies,
|
|
225
226
|
'shadeform': [], # No dependencies needed for shadeform
|
|
226
227
|
}
|
|
227
228
|
|
|
228
229
|
# Calculate which clouds should be included in the [all] installation.
|
|
229
|
-
clouds_for_all = set(
|
|
230
|
-
clouds_for_all.remove('remote')
|
|
230
|
+
clouds_for_all = set(cloud_dependencies)
|
|
231
231
|
|
|
232
232
|
if sys.version_info < (3, 10):
|
|
233
233
|
# Nebius needs python3.10. If python 3.9 [all] will not install nebius
|
|
@@ -242,5 +242,16 @@ if sys.version_info >= (3, 12):
|
|
|
242
242
|
# TODO: Remove once https://github.com/vast-ai/vast-sdk/pull/6 is released
|
|
243
243
|
clouds_for_all.remove('vast')
|
|
244
244
|
|
|
245
|
-
|
|
246
|
-
|
|
245
|
+
cloud_extras = {
|
|
246
|
+
cloud: dependencies + server_dependencies
|
|
247
|
+
for cloud, dependencies in cloud_dependencies.items()
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
extras_require: Dict[str, List[str]] = {
|
|
251
|
+
# Include server_dependencies with each cloud.
|
|
252
|
+
**cloud_extras,
|
|
253
|
+
'all': list(set().union(*[cloud_extras[cloud] for cloud in clouds_for_all])
|
|
254
|
+
),
|
|
255
|
+
'remote': remote,
|
|
256
|
+
'server': server_dependencies,
|
|
257
|
+
}
|
sky/utils/auth_utils.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Utils for managing SkyPilot SSH key pairs."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import os
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
import filelock
|
|
8
|
+
|
|
9
|
+
from sky import global_user_state
|
|
10
|
+
from sky import sky_logging
|
|
11
|
+
from sky.utils import common_utils
|
|
12
|
+
|
|
13
|
+
logger = sky_logging.init_logger(__name__)
|
|
14
|
+
|
|
15
|
+
MAX_TRIALS = 64
|
|
16
|
+
# TODO(zhwu): Support user specified key pair.
|
|
17
|
+
# We intentionally not have the ssh key pair to be stored in
|
|
18
|
+
# ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
|
|
19
|
+
# because ssh key pair need to persist across API server restarts, while
|
|
20
|
+
# the former dir is empheral.
|
|
21
|
+
_SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
|
|
25
|
+
user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
|
|
26
|
+
|
|
27
|
+
os.makedirs(os.path.expanduser(user_ssh_key_prefix),
|
|
28
|
+
exist_ok=True,
|
|
29
|
+
mode=0o700)
|
|
30
|
+
private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
|
|
31
|
+
public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
|
|
32
|
+
lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
|
|
33
|
+
return private_key_path, public_key_path, lock_path
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
37
|
+
# Keep the import of the cryptography local to avoid expensive
|
|
38
|
+
# third-party imports when not needed.
|
|
39
|
+
# pylint: disable=import-outside-toplevel
|
|
40
|
+
from cryptography.hazmat.backends import default_backend
|
|
41
|
+
from cryptography.hazmat.primitives import serialization
|
|
42
|
+
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
43
|
+
|
|
44
|
+
key = rsa.generate_private_key(backend=default_backend(),
|
|
45
|
+
public_exponent=65537,
|
|
46
|
+
key_size=2048)
|
|
47
|
+
|
|
48
|
+
private_key = key.private_bytes(
|
|
49
|
+
encoding=serialization.Encoding.PEM,
|
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
|
51
|
+
encryption_algorithm=serialization.NoEncryption()).decode(
|
|
52
|
+
'utf-8').strip()
|
|
53
|
+
|
|
54
|
+
public_key = key.public_key().public_bytes(
|
|
55
|
+
serialization.Encoding.OpenSSH,
|
|
56
|
+
serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
|
|
57
|
+
|
|
58
|
+
return public_key, private_key
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
|
+
private_key: str, public_key: str) -> None:
|
|
63
|
+
key_dir = os.path.dirname(private_key_path)
|
|
64
|
+
os.makedirs(key_dir, exist_ok=True, mode=0o700)
|
|
65
|
+
|
|
66
|
+
with open(
|
|
67
|
+
private_key_path,
|
|
68
|
+
'w',
|
|
69
|
+
encoding='utf-8',
|
|
70
|
+
opener=functools.partial(os.open, mode=0o600),
|
|
71
|
+
) as f:
|
|
72
|
+
f.write(private_key)
|
|
73
|
+
|
|
74
|
+
with open(public_key_path,
|
|
75
|
+
'w',
|
|
76
|
+
encoding='utf-8',
|
|
77
|
+
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
|
+
f.write(public_key)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
|
+
"""Returns the absolute private and public key paths."""
|
|
83
|
+
user_hash = common_utils.get_user_hash()
|
|
84
|
+
private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
|
|
85
|
+
user_hash)
|
|
86
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
87
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
88
|
+
lock_path = os.path.expanduser(lock_path)
|
|
89
|
+
|
|
90
|
+
lock_dir = os.path.dirname(lock_path)
|
|
91
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
92
|
+
# as the ssh configs will be written to this folder as well in
|
|
93
|
+
# backend_utils.SSHConfigHelper
|
|
94
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
95
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
96
|
+
if not os.path.exists(private_key_path):
|
|
97
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
98
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
99
|
+
if not exists:
|
|
100
|
+
ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
|
|
101
|
+
global_user_state.set_ssh_keys(user_hash, ssh_public_key,
|
|
102
|
+
ssh_private_key)
|
|
103
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
104
|
+
ssh_public_key)
|
|
105
|
+
assert os.path.exists(public_key_path), (
|
|
106
|
+
'Private key found, but associated public key '
|
|
107
|
+
f'{public_key_path} does not exist.')
|
|
108
|
+
return private_key_path, public_key_path
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
112
|
+
"""Creates the ssh key files from the database.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if the ssh key files are created successfully, False otherwise.
|
|
116
|
+
"""
|
|
117
|
+
# Assume private key path is in the format of
|
|
118
|
+
# ~/.sky/clients/<user_hash>/ssh/sky-key
|
|
119
|
+
separated_path = os.path.normpath(private_key_path).split(os.path.sep)
|
|
120
|
+
assert separated_path[-1] == 'sky-key'
|
|
121
|
+
assert separated_path[-2] == 'ssh'
|
|
122
|
+
user_hash = separated_path[-3]
|
|
123
|
+
|
|
124
|
+
private_key_path_generated, public_key_path, lock_path = (
|
|
125
|
+
get_ssh_key_and_lock_path(user_hash))
|
|
126
|
+
assert private_key_path == os.path.expanduser(private_key_path_generated), (
|
|
127
|
+
f'Private key path {private_key_path} does not '
|
|
128
|
+
'match the generated path '
|
|
129
|
+
f'{os.path.expanduser(private_key_path_generated)}')
|
|
130
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
131
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
132
|
+
lock_path = os.path.expanduser(lock_path)
|
|
133
|
+
lock_dir = os.path.dirname(lock_path)
|
|
134
|
+
|
|
135
|
+
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
136
|
+
return True
|
|
137
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
|
+
# as the ssh configs will be written to this folder as well in
|
|
139
|
+
# backend_utils.SSHConfigHelper
|
|
140
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
141
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
142
|
+
if not os.path.exists(private_key_path):
|
|
143
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
144
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
145
|
+
if not exists:
|
|
146
|
+
logger.debug(f'SSH keys not found for user {user_hash}')
|
|
147
|
+
return False
|
|
148
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
149
|
+
ssh_public_key)
|
|
150
|
+
assert os.path.exists(public_key_path), (
|
|
151
|
+
'Private key found, but associated public key '
|
|
152
|
+
f'{public_key_path} does not exist.')
|
|
153
|
+
return True
|
sky/utils/command_runner.py
CHANGED
|
@@ -14,6 +14,7 @@ from sky import exceptions
|
|
|
14
14
|
from sky import sky_logging
|
|
15
15
|
from sky.skylet import constants
|
|
16
16
|
from sky.skylet import log_lib
|
|
17
|
+
from sky.utils import auth_utils
|
|
17
18
|
from sky.utils import common_utils
|
|
18
19
|
from sky.utils import context_utils
|
|
19
20
|
from sky.utils import control_master_utils
|
|
@@ -649,6 +650,8 @@ class SSHCommandRunner(CommandRunner):
|
|
|
649
650
|
self.disable_control_master = (
|
|
650
651
|
disable_control_master or
|
|
651
652
|
control_master_utils.should_disable_control_master())
|
|
653
|
+
# ensure the ssh key files are created from the database
|
|
654
|
+
auth_utils.create_ssh_key_files_from_db(ssh_private_key)
|
|
652
655
|
if docker_user is not None:
|
|
653
656
|
assert port is None or port == 22, (
|
|
654
657
|
f'port must be None or 22 for docker_user, got {port}.')
|
sky/utils/locks.py
CHANGED
|
@@ -312,8 +312,11 @@ class PostgresLock(DistributedLock):
|
|
|
312
312
|
else:
|
|
313
313
|
self._connection.close()
|
|
314
314
|
except Exception as e: # pylint: disable=broad-except
|
|
315
|
-
|
|
316
|
-
|
|
315
|
+
if invalidate:
|
|
316
|
+
logger.debug(
|
|
317
|
+
f'Failed to invalidate postgres connection: {e}')
|
|
318
|
+
else:
|
|
319
|
+
logger.debug(f'Failed to close postgres connection: {e}')
|
|
317
320
|
self._connection = None
|
|
318
321
|
|
|
319
322
|
def is_locked(self) -> bool:
|