skypilot-nightly 1.0.0.dev20250915__py3-none-any.whl → 1.0.0.dev20250918__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +68 -4
- sky/authentication.py +25 -0
- sky/backends/__init__.py +3 -2
- sky/backends/backend_utils.py +16 -12
- sky/backends/cloud_vm_ray_backend.py +61 -4
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/client/sdk.py +6 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/primeintellect.py +314 -0
- sky/core.py +10 -3
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/3015-ba5be550eb80fd8c.js +1 -0
- sky/dashboard/out/_next/static/chunks/5339.4a881570243431a5.js +51 -0
- sky/dashboard/out/_next/static/chunks/{6856-e0754534b3015377.js → 6856-9a2538f38c004652.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{6990-11c8e9b982e8ffec.js → 6990-f6818c84ed8f1c86.js} +1 -1
- sky/dashboard/out/_next/static/chunks/8969-a3e3f0683e19d340.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +6 -0
- sky/dashboard/out/_next/static/chunks/{webpack-d1e29b3aa66bf4cf.js → webpack-487697b47d8c5e50.js} +1 -1
- sky/dashboard/out/_next/static/{dG6B0i0HO4jIoKb4ZFYJ_ → k1mo5xWZrV9djgjd0moOT}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +42 -34
- sky/jobs/server/server.py +14 -1
- sky/jobs/state.py +26 -1
- sky/provision/__init__.py +1 -0
- sky/provision/docker_utils.py +50 -3
- sky/provision/instance_setup.py +15 -1
- sky/provision/lambda_cloud/instance.py +12 -11
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/resources.py +9 -1
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_utils.py +29 -12
- sky/serve/server/core.py +37 -19
- sky/serve/server/impl.py +221 -129
- sky/server/common.py +13 -0
- sky/server/constants.py +3 -0
- sky/server/requests/executor.py +23 -6
- sky/server/server.py +10 -5
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +5 -3
- sky/skylet/services.py +98 -0
- sky/skylet/skylet.py +3 -1
- sky/skypilot_config.py +10 -3
- sky/templates/kubernetes-ray.yml.j2 +22 -12
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/METADATA +39 -38
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/RECORD +74 -62
- sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +0 -51
- sky/dashboard/out/_next/static/chunks/8969-0487dfbf149d9e53.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +0 -6
- /sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-7528cc0ef8c522c5.js} +0 -0
- /sky/dashboard/out/_next/static/{dG6B0i0HO4jIoKb4ZFYJ_ → k1mo5xWZrV9djgjd0moOT}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250915.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/top_level.txt +0 -0
sky/skylet/services.py
CHANGED
|
@@ -10,7 +10,11 @@ from sky.schemas.generated import autostopv1_pb2
|
|
|
10
10
|
from sky.schemas.generated import autostopv1_pb2_grpc
|
|
11
11
|
from sky.schemas.generated import jobsv1_pb2
|
|
12
12
|
from sky.schemas.generated import jobsv1_pb2_grpc
|
|
13
|
+
from sky.schemas.generated import servev1_pb2
|
|
14
|
+
from sky.schemas.generated import servev1_pb2_grpc
|
|
15
|
+
from sky.serve import serve_rpc_utils
|
|
13
16
|
from sky.serve import serve_state
|
|
17
|
+
from sky.serve import serve_utils
|
|
14
18
|
from sky.skylet import autostop_lib
|
|
15
19
|
from sky.skylet import constants
|
|
16
20
|
from sky.skylet import job_lib
|
|
@@ -52,6 +56,100 @@ class AutostopServiceImpl(autostopv1_pb2_grpc.AutostopServiceServicer):
|
|
|
52
56
|
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
53
57
|
|
|
54
58
|
|
|
59
|
+
class ServeServiceImpl(servev1_pb2_grpc.ServeServiceServicer):
|
|
60
|
+
"""Implementation of the ServeService gRPC service."""
|
|
61
|
+
|
|
62
|
+
# NOTE (kyuds): this grpc service will run cluster-side,
|
|
63
|
+
# thus guaranteeing that SERVE_VERSION is above 5.
|
|
64
|
+
# Therefore, we removed some SERVE_VERSION checks
|
|
65
|
+
# present in the original codegen.
|
|
66
|
+
|
|
67
|
+
def GetServiceStatus( # type: ignore[return]
|
|
68
|
+
self, request: servev1_pb2.GetServiceStatusRequest,
|
|
69
|
+
context: grpc.ServicerContext
|
|
70
|
+
) -> servev1_pb2.GetServiceStatusResponse:
|
|
71
|
+
"""Gets serve status."""
|
|
72
|
+
try:
|
|
73
|
+
service_names, pool = (
|
|
74
|
+
serve_rpc_utils.GetServiceStatusRequestConverter.from_proto(request)) # pylint: disable=line-too-long
|
|
75
|
+
statuses = serve_utils.get_service_status_pickled(
|
|
76
|
+
service_names, pool)
|
|
77
|
+
return serve_rpc_utils.GetServiceStatusResponseConverter.to_proto(
|
|
78
|
+
statuses)
|
|
79
|
+
except Exception as e: # pylint: disable=broad-except
|
|
80
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
81
|
+
|
|
82
|
+
def AddVersion( # type: ignore[return]
|
|
83
|
+
self, request: servev1_pb2.AddVersionRequest,
|
|
84
|
+
context: grpc.ServicerContext) -> servev1_pb2.AddVersionResponse:
|
|
85
|
+
"""Adds serve version"""
|
|
86
|
+
try:
|
|
87
|
+
service_name = request.service_name
|
|
88
|
+
version = serve_state.add_version(service_name)
|
|
89
|
+
return servev1_pb2.AddVersionResponse(version=version)
|
|
90
|
+
except Exception as e: # pylint: disable=broad-except
|
|
91
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
92
|
+
|
|
93
|
+
def TerminateServices( # type: ignore[return]
|
|
94
|
+
self, request: servev1_pb2.TerminateServicesRequest,
|
|
95
|
+
context: grpc.ServicerContext
|
|
96
|
+
) -> servev1_pb2.TerminateServicesResponse:
|
|
97
|
+
"""Terminates serve"""
|
|
98
|
+
try:
|
|
99
|
+
service_names, purge, pool = (
|
|
100
|
+
serve_rpc_utils.TerminateServicesRequestConverter.from_proto(request)) # pylint: disable=line-too-long
|
|
101
|
+
message = serve_utils.terminate_services(service_names, purge, pool)
|
|
102
|
+
return servev1_pb2.TerminateServicesResponse(message=message)
|
|
103
|
+
except Exception as e: # pylint: disable=broad-except
|
|
104
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
105
|
+
|
|
106
|
+
def TerminateReplica( # type: ignore[return]
|
|
107
|
+
self, request: servev1_pb2.TerminateReplicaRequest,
|
|
108
|
+
context: grpc.ServicerContext
|
|
109
|
+
) -> servev1_pb2.TerminateReplicaResponse:
|
|
110
|
+
"""Terminate replica"""
|
|
111
|
+
try:
|
|
112
|
+
service_name = request.service_name
|
|
113
|
+
replica_id = request.replica_id
|
|
114
|
+
purge = request.purge
|
|
115
|
+
message = serve_utils.terminate_replica(service_name, replica_id,
|
|
116
|
+
purge)
|
|
117
|
+
return servev1_pb2.TerminateReplicaResponse(message=message)
|
|
118
|
+
except Exception as e: # pylint: disable=broad-except
|
|
119
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
120
|
+
|
|
121
|
+
def WaitServiceRegistration( # type: ignore[return]
|
|
122
|
+
self, request: servev1_pb2.WaitServiceRegistrationRequest,
|
|
123
|
+
context: grpc.ServicerContext
|
|
124
|
+
) -> servev1_pb2.WaitServiceRegistrationResponse:
|
|
125
|
+
"""Wait for service to be registered"""
|
|
126
|
+
try:
|
|
127
|
+
service_name = request.service_name
|
|
128
|
+
job_id = request.job_id
|
|
129
|
+
pool = request.pool
|
|
130
|
+
encoded = serve_utils.wait_service_registration(
|
|
131
|
+
service_name, job_id, pool)
|
|
132
|
+
lb_port = serve_utils.load_service_initialization_result(encoded)
|
|
133
|
+
return servev1_pb2.WaitServiceRegistrationResponse(lb_port=lb_port)
|
|
134
|
+
except Exception as e: # pylint: disable=broad-except
|
|
135
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
136
|
+
|
|
137
|
+
def UpdateService( # type: ignore[return]
|
|
138
|
+
self, request: servev1_pb2.UpdateServiceRequest,
|
|
139
|
+
context: grpc.ServicerContext) -> servev1_pb2.UpdateServiceResponse:
|
|
140
|
+
"""Update service"""
|
|
141
|
+
try:
|
|
142
|
+
service_name = request.service_name
|
|
143
|
+
version = request.version
|
|
144
|
+
mode = request.mode
|
|
145
|
+
pool = request.pool
|
|
146
|
+
serve_utils.update_service_encoded(service_name, version, mode,
|
|
147
|
+
pool)
|
|
148
|
+
return servev1_pb2.UpdateServiceResponse()
|
|
149
|
+
except Exception as e: # pylint: disable=broad-except
|
|
150
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
151
|
+
|
|
152
|
+
|
|
55
153
|
class JobsServiceImpl(jobsv1_pb2_grpc.JobsServiceServicer):
|
|
56
154
|
"""Implementation of the JobsService gRPC service."""
|
|
57
155
|
|
sky/skylet/skylet.py
CHANGED
|
@@ -10,6 +10,7 @@ import sky
|
|
|
10
10
|
from sky import sky_logging
|
|
11
11
|
from sky.schemas.generated import autostopv1_pb2_grpc
|
|
12
12
|
from sky.schemas.generated import jobsv1_pb2_grpc
|
|
13
|
+
from sky.schemas.generated import servev1_pb2_grpc
|
|
13
14
|
from sky.skylet import constants
|
|
14
15
|
from sky.skylet import events
|
|
15
16
|
from sky.skylet import services
|
|
@@ -50,9 +51,10 @@ def start_grpc_server(port: int = constants.SKYLET_GRPC_PORT) -> grpc.Server:
|
|
|
50
51
|
|
|
51
52
|
autostopv1_pb2_grpc.add_AutostopServiceServicer_to_server(
|
|
52
53
|
services.AutostopServiceImpl(), server)
|
|
53
|
-
|
|
54
54
|
jobsv1_pb2_grpc.add_JobsServiceServicer_to_server(
|
|
55
55
|
services.JobsServiceImpl(), server)
|
|
56
|
+
servev1_pb2_grpc.add_ServeServiceServicer_to_server(
|
|
57
|
+
services.ServeServiceImpl(), server)
|
|
56
58
|
|
|
57
59
|
listen_addr = f'127.0.0.1:{port}'
|
|
58
60
|
server.add_insecure_port(listen_addr)
|
sky/skypilot_config.py
CHANGED
|
@@ -415,10 +415,17 @@ def local_active_workspace_ctx(workspace: str) -> Iterator[None]:
|
|
|
415
415
|
def get_active_workspace(force_user_workspace: bool = False) -> str:
|
|
416
416
|
context_workspace = getattr(_active_workspace_context, 'workspace', None)
|
|
417
417
|
if not force_user_workspace and context_workspace is not None:
|
|
418
|
-
logger.debug(f'
|
|
418
|
+
logger.debug(f'Got context workspace: {context_workspace}')
|
|
419
419
|
return context_workspace
|
|
420
|
-
|
|
421
|
-
|
|
420
|
+
active_workspace = get_nested(keys=('active_workspace',),
|
|
421
|
+
default_value=None)
|
|
422
|
+
if active_workspace is None:
|
|
423
|
+
logger.debug(f'No active workspace found, using default workspace: '
|
|
424
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE}')
|
|
425
|
+
active_workspace = constants.SKYPILOT_DEFAULT_WORKSPACE
|
|
426
|
+
else:
|
|
427
|
+
logger.debug(f'Got active workspace: {active_workspace}')
|
|
428
|
+
return active_workspace
|
|
422
429
|
|
|
423
430
|
|
|
424
431
|
def set_nested(keys: Tuple[str, ...], value: Any) -> Dict[str, Any]:
|
|
@@ -823,7 +823,8 @@ available_node_types:
|
|
|
823
823
|
fi
|
|
824
824
|
$(prefix_cmd) cp -p "$FUSERMOUNT_PATH" "${FUSERMOUNT_PATH}-original"
|
|
825
825
|
$(prefix_cmd) ln -sf {{k8s_fusermount_shared_dir}}/fusermount-shim "$FUSERMOUNT_PATH"
|
|
826
|
-
|
|
826
|
+
# "|| true" because fusermount3 is not always available
|
|
827
|
+
FUSERMOUNT3_PATH=$(which fusermount3) || true
|
|
827
828
|
if [ -z "$FUSERMOUNT3_PATH" ]; then
|
|
828
829
|
FUSERMOUNT3_PATH="${FUSERMOUNT_PATH}3"
|
|
829
830
|
fi
|
|
@@ -872,11 +873,14 @@ available_node_types:
|
|
|
872
873
|
|
|
873
874
|
touch /tmp/apt_ssh_setup_complete
|
|
874
875
|
echo "=== SSH setup completed ==="
|
|
875
|
-
) > /tmp/${STEPS[0]}.log 2>&1
|
|
876
|
-
|
|
876
|
+
) > /tmp/${STEPS[0]}.log 2>&1
|
|
877
|
+
if [ "$?" -ne "0" ]; then
|
|
878
|
+
{
|
|
879
|
+
echo "Error: ${STEPS[0]} failed. Continuing anyway..." > /tmp/${STEPS[0]}.failed 2>&1
|
|
877
880
|
cat /tmp/${STEPS[0]}.log
|
|
878
881
|
exit 1
|
|
879
|
-
|
|
882
|
+
}
|
|
883
|
+
fi
|
|
880
884
|
) &
|
|
881
885
|
|
|
882
886
|
# STEP 2: Install conda, ray and skypilot (for dependencies); start
|
|
@@ -931,11 +935,14 @@ available_node_types:
|
|
|
931
935
|
set +e
|
|
932
936
|
{{ ray_worker_start_command }}
|
|
933
937
|
fi
|
|
934
|
-
) > /tmp/${STEPS[1]}.log 2>&1
|
|
935
|
-
|
|
938
|
+
) > /tmp/${STEPS[1]}.log 2>&1
|
|
939
|
+
if [ "$?" -ne "0" ]; then
|
|
940
|
+
{
|
|
941
|
+
echo "Error: ${STEPS[1]} failed. Continuing anyway..." > /tmp/${STEPS[1]}.failed 2>&1
|
|
936
942
|
cat /tmp/${STEPS[1]}.log
|
|
937
943
|
exit 1
|
|
938
|
-
|
|
944
|
+
}
|
|
945
|
+
fi
|
|
939
946
|
) &
|
|
940
947
|
|
|
941
948
|
|
|
@@ -953,11 +960,14 @@ available_node_types:
|
|
|
953
960
|
fi;
|
|
954
961
|
fi;
|
|
955
962
|
export -p > ~/container_env_var.sh && $(prefix_cmd) mv ~/container_env_var.sh /etc/profile.d/container_env_var.sh
|
|
956
|
-
) > /tmp/${STEPS[2]}.log 2>&1
|
|
957
|
-
|
|
963
|
+
) > /tmp/${STEPS[2]}.log 2>&1
|
|
964
|
+
if [ "$?" -ne "0" ]; then
|
|
965
|
+
{
|
|
966
|
+
echo "Error: ${STEPS[2]} failed. Continuing anyway..." > /tmp/${STEPS[2]}.failed 2>&1
|
|
958
967
|
cat /tmp/${STEPS[2]}.log
|
|
959
968
|
exit 1
|
|
960
|
-
|
|
969
|
+
}
|
|
970
|
+
fi
|
|
961
971
|
) &
|
|
962
972
|
|
|
963
973
|
function mylsof { p=$(for pid in /proc/{0..9}*; do i=$(basename "$pid"); for file in "$pid"/fd/*; do link=$(readlink -e "$file"); if [ "$link" = "$1" ]; then echo "$i"; fi; done; done); echo "$p"; };
|
|
@@ -1324,7 +1334,7 @@ setup_commands:
|
|
|
1324
1334
|
if [ -f /tmp/apt_ssh_setup_started ]; then
|
|
1325
1335
|
echo "=== Logs for asynchronous SSH setup ===";
|
|
1326
1336
|
[ -f /tmp/apt_ssh_setup_complete ] && cat /tmp/${STEPS[0]}.log ||
|
|
1327
|
-
{ tail -f -n +1 /tmp/${STEPS[0]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/apt_ssh_setup_complete ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
|
1337
|
+
{ tail -f -n +1 /tmp/${STEPS[0]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/apt_ssh_setup_complete ] || [ -f /tmp/${STEPS[0]}.failed ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
|
1328
1338
|
[ -f /tmp/${STEPS[0]}.failed ] && { echo "Error: ${STEPS[0]} failed. Exiting."; exit 1; } || true;
|
|
1329
1339
|
fi
|
|
1330
1340
|
|
|
@@ -1332,7 +1342,7 @@ setup_commands:
|
|
|
1332
1342
|
if [ -f /tmp/skypilot_is_nimbus ]; then
|
|
1333
1343
|
echo "=== Logs for asynchronous ray and skypilot installation ===";
|
|
1334
1344
|
[ -f /tmp/ray_skypilot_installation_complete ] && cat /tmp/${STEPS[1]}.log ||
|
|
1335
|
-
{ tail -f -n +1 /tmp/${STEPS[1]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/ray_skypilot_installation_complete ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
|
1345
|
+
{ tail -f -n +1 /tmp/${STEPS[1]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/ray_skypilot_installation_complete ] || [ -f /tmp/${STEPS[1]}.failed ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
|
1336
1346
|
[ -f /tmp/${STEPS[1]}.failed ] && { echo "Error: ${STEPS[1]} failed. Exiting."; exit 1; } || true;
|
|
1337
1347
|
fi
|
|
1338
1348
|
end_epoch=$(date +%s);
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
cluster_name: {{cluster_name_on_cloud}}
|
|
2
|
+
|
|
3
|
+
# The maximum number of workers nodes to launch in addition to the head node.
|
|
4
|
+
max_workers: {{num_nodes - 1}}
|
|
5
|
+
upscaling_speed: {{num_nodes - 1}}
|
|
6
|
+
idle_timeout_minutes: 60
|
|
7
|
+
|
|
8
|
+
provider:
|
|
9
|
+
type: external
|
|
10
|
+
module: sky.provision.primeintellect
|
|
11
|
+
region: "{{region}}"
|
|
12
|
+
zones: "{{zones}}"
|
|
13
|
+
|
|
14
|
+
auth:
|
|
15
|
+
ssh_user: skypilot:ssh_user
|
|
16
|
+
ssh_private_key: {{ssh_private_key}}
|
|
17
|
+
|
|
18
|
+
available_node_types:
|
|
19
|
+
ray_head_default:
|
|
20
|
+
resources: {}
|
|
21
|
+
node_config:
|
|
22
|
+
InstanceType: {{instance_type}}
|
|
23
|
+
DiskSize: {{disk_size}}
|
|
24
|
+
ImageId: {{image_id}}
|
|
25
|
+
PublicKey: |-
|
|
26
|
+
skypilot:ssh_public_key_content
|
|
27
|
+
|
|
28
|
+
head_node_type: ray_head_default
|
|
29
|
+
|
|
30
|
+
# Format: `REMOTE_PATH : LOCAL_PATH`
|
|
31
|
+
file_mounts: {
|
|
32
|
+
"{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
|
|
33
|
+
"{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
|
|
34
|
+
{%- for remote_path, local_path in credentials.items() %}
|
|
35
|
+
"{{remote_path}}": "{{local_path}}",
|
|
36
|
+
"~/.ssh/sky-cluster-key": "{{ssh_private_key}}",
|
|
37
|
+
{%- endfor %}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
rsync_exclude: []
|
|
41
|
+
|
|
42
|
+
initialization_commands: []
|
|
43
|
+
|
|
44
|
+
# List of shell commands to run to set up nodes.
|
|
45
|
+
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
|
|
46
|
+
# connection, which is expensive. Try your best to co-locate commands into fewer
|
|
47
|
+
# items!
|
|
48
|
+
#
|
|
49
|
+
# Increment the following for catching performance bugs easier:
|
|
50
|
+
# current num items (num SSH connections): 1
|
|
51
|
+
setup_commands:
|
|
52
|
+
# Disable unattended-upgrades and handle apt-get locks
|
|
53
|
+
# Install patch utility for Ray
|
|
54
|
+
# Install conda and Ray
|
|
55
|
+
# Set system limits for Ray performance (nofile and TasksMax)
|
|
56
|
+
- {%- for initial_setup_command in initial_setup_commands %}
|
|
57
|
+
{{ initial_setup_command }}
|
|
58
|
+
{%- endfor %}
|
|
59
|
+
sudo systemctl stop unattended-upgrades || true;
|
|
60
|
+
sudo systemctl disable unattended-upgrades || true;
|
|
61
|
+
sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
|
|
62
|
+
sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true;
|
|
63
|
+
sudo pkill -9 apt-get;
|
|
64
|
+
sudo pkill -9 dpkg;
|
|
65
|
+
sudo dpkg --configure -a;
|
|
66
|
+
which patch > /dev/null || sudo apt install -y patch;
|
|
67
|
+
{{ conda_installation_commands }}
|
|
68
|
+
{{ ray_skypilot_installation_commands }}
|
|
69
|
+
sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
|
|
70
|
+
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
|
71
|
+
{{ ssh_max_sessions_config }}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20250918
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -121,6 +121,7 @@ Provides-Extra: fluidstack
|
|
|
121
121
|
Provides-Extra: cudo
|
|
122
122
|
Requires-Dist: cudo-compute>=0.1.10; extra == "cudo"
|
|
123
123
|
Provides-Extra: paperspace
|
|
124
|
+
Provides-Extra: primeintellect
|
|
124
125
|
Provides-Extra: do
|
|
125
126
|
Requires-Dist: pydo>=0.3.0; extra == "do"
|
|
126
127
|
Requires-Dist: azure-core>=1.24.0; extra == "do"
|
|
@@ -151,49 +152,49 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
|
|
|
151
152
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
|
|
152
153
|
Requires-Dist: aiosqlite; extra == "server"
|
|
153
154
|
Provides-Extra: all
|
|
154
|
-
Requires-Dist:
|
|
155
|
-
Requires-Dist:
|
|
156
|
-
Requires-Dist:
|
|
157
|
-
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
158
|
-
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
159
|
-
Requires-Dist: anyio; extra == "all"
|
|
160
|
-
Requires-Dist: msgraph-sdk; extra == "all"
|
|
161
|
-
Requires-Dist: aiohttp; extra == "all"
|
|
162
|
-
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
163
|
-
Requires-Dist: pyjwt; extra == "all"
|
|
164
|
-
Requires-Dist: passlib; extra == "all"
|
|
155
|
+
Requires-Dist: casbin; extra == "all"
|
|
156
|
+
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
157
|
+
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
165
158
|
Requires-Dist: ecsapi>=0.2.0; extra == "all"
|
|
159
|
+
Requires-Dist: aiosqlite; extra == "all"
|
|
160
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
161
|
+
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
162
|
+
Requires-Dist: docker; extra == "all"
|
|
163
|
+
Requires-Dist: google-cloud-storage; extra == "all"
|
|
166
164
|
Requires-Dist: azure-common; extra == "all"
|
|
167
|
-
Requires-Dist:
|
|
168
|
-
Requires-Dist:
|
|
169
|
-
Requires-Dist:
|
|
165
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
166
|
+
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
167
|
+
Requires-Dist: python-dateutil; extra == "all"
|
|
170
168
|
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
171
|
-
Requires-Dist:
|
|
172
|
-
Requires-Dist: azure-
|
|
173
|
-
Requires-Dist: oci; extra == "all"
|
|
174
|
-
Requires-Dist: docker; extra == "all"
|
|
169
|
+
Requires-Dist: anyio; extra == "all"
|
|
170
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
|
175
171
|
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
176
|
-
Requires-Dist:
|
|
177
|
-
Requires-Dist:
|
|
178
|
-
Requires-Dist:
|
|
179
|
-
Requires-Dist: casbin; extra == "all"
|
|
172
|
+
Requires-Dist: boto3>=1.26.1; extra == "all"
|
|
173
|
+
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
174
|
+
Requires-Dist: aiohttp; extra == "all"
|
|
180
175
|
Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
|
|
181
|
-
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
182
|
-
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
183
|
-
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
184
|
-
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
185
|
-
Requires-Dist: ibm-vpc; extra == "all"
|
|
186
176
|
Requires-Dist: colorama<0.4.5; extra == "all"
|
|
187
|
-
Requires-Dist:
|
|
177
|
+
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
188
178
|
Requires-Dist: msrestazure; extra == "all"
|
|
189
|
-
Requires-Dist:
|
|
179
|
+
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
180
|
+
Requires-Dist: oci; extra == "all"
|
|
181
|
+
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
182
|
+
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
183
|
+
Requires-Dist: ibm-vpc; extra == "all"
|
|
184
|
+
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
185
|
+
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
186
|
+
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
187
|
+
Requires-Dist: msgraph-sdk; extra == "all"
|
|
188
|
+
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
189
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
190
|
+
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
191
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
192
|
+
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
193
|
+
Requires-Dist: pyjwt; extra == "all"
|
|
194
|
+
Requires-Dist: websockets; extra == "all"
|
|
195
|
+
Requires-Dist: passlib; extra == "all"
|
|
190
196
|
Requires-Dist: runpod>=1.6.1; extra == "all"
|
|
191
197
|
Requires-Dist: azure-cli>=2.65.0; extra == "all"
|
|
192
|
-
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
193
|
-
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
194
|
-
Requires-Dist: websockets; extra == "all"
|
|
195
|
-
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
196
|
-
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
197
198
|
Dynamic: author
|
|
198
199
|
Dynamic: classifier
|
|
199
200
|
Dynamic: description
|
|
@@ -291,12 +292,12 @@ SkyPilot supports your existing GPU, TPU, and CPU workloads, with no code change
|
|
|
291
292
|
Install with pip:
|
|
292
293
|
```bash
|
|
293
294
|
# Choose your clouds:
|
|
294
|
-
pip install -U "skypilot[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp]"
|
|
295
|
+
pip install -U "skypilot[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp,seeweb]"
|
|
295
296
|
```
|
|
296
297
|
To get the latest features and fixes, use the nightly build or [install from source](https://docs.skypilot.co/en/latest/getting-started/installation.html):
|
|
297
298
|
```bash
|
|
298
299
|
# Choose your clouds:
|
|
299
|
-
pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp]"
|
|
300
|
+
pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp,seeweb]"
|
|
300
301
|
```
|
|
301
302
|
|
|
302
303
|
<p align="center">
|
|
@@ -305,7 +306,7 @@ pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,
|
|
|
305
306
|
|
|
306
307
|
Current supported infra: Kubernetes, AWS, GCP, Azure, OCI, Nebius, Lambda Cloud, RunPod, Fluidstack,
|
|
307
308
|
Cudo, Digital Ocean, Paperspace, Cloudflare, Samsung, IBM, Vast.ai,
|
|
308
|
-
VMware vSphere.
|
|
309
|
+
VMware vSphere, Seeweb.
|
|
309
310
|
<p align="center">
|
|
310
311
|
<img alt="SkyPilot" src="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/cloud-logos-light.png" width=85%>
|
|
311
312
|
</p>
|