skypilot-nightly 1.0.0.dev20250808__py3-none-any.whl → 1.0.0.dev20250814__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/kubernetes.py +5 -2
- sky/backends/backend_utils.py +102 -8
- sky/backends/cloud_vm_ray_backend.py +197 -31
- sky/catalog/cudo_catalog.py +1 -1
- sky/catalog/data_fetchers/fetch_cudo.py +1 -1
- sky/catalog/data_fetchers/fetch_nebius.py +6 -3
- sky/client/cli/command.py +60 -77
- sky/client/common.py +1 -1
- sky/client/sdk.py +19 -19
- sky/client/sdk_async.py +5 -4
- sky/clouds/aws.py +52 -1
- sky/clouds/kubernetes.py +14 -0
- sky/core.py +5 -0
- sky/dag.py +1 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{-DXZksWqf2waNHeU9YTQe → Y0eNlwi85qGRecLTin11y}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/{6989-6129c1cfbcf51063.js → 6989-37611fe6b86d274d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-491a4d699d95e808.js → _app-c2ea34fda4f1f8c8.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-078751bad714c017.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-f5ccf5d39d87aebe.js → [pool]-664c36eda967b1ba.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-339efec49c0cc7d0.js → webpack-00c0a51d21157453.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +11 -1
- sky/exceptions.py +5 -0
- sky/execution.py +15 -0
- sky/global_user_state.py +160 -2
- sky/jobs/constants.py +1 -1
- sky/jobs/controller.py +0 -1
- sky/jobs/recovery_strategy.py +6 -3
- sky/jobs/scheduler.py +23 -68
- sky/jobs/server/core.py +22 -12
- sky/jobs/state.py +6 -2
- sky/jobs/utils.py +17 -2
- sky/provision/__init__.py +4 -2
- sky/provision/aws/config.py +9 -0
- sky/provision/aws/instance.py +41 -17
- sky/provision/azure/instance.py +7 -4
- sky/provision/cudo/cudo_wrapper.py +1 -1
- sky/provision/cudo/instance.py +7 -4
- sky/provision/do/instance.py +7 -4
- sky/provision/fluidstack/instance.py +7 -4
- sky/provision/gcp/instance.py +7 -4
- sky/provision/hyperbolic/instance.py +7 -5
- sky/provision/kubernetes/instance.py +169 -6
- sky/provision/lambda_cloud/instance.py +7 -4
- sky/provision/nebius/instance.py +7 -4
- sky/provision/oci/instance.py +7 -4
- sky/provision/paperspace/instance.py +7 -5
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/provisioner.py +6 -0
- sky/provision/runpod/instance.py +7 -4
- sky/provision/runpod/utils.py +1 -1
- sky/provision/scp/instance.py +7 -5
- sky/provision/vast/instance.py +7 -5
- sky/provision/vsphere/instance.py +7 -4
- sky/resources.py +1 -2
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +70 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +1 -1
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/serve_state/001_initial_schema.py +1 -1
- sky/schemas/db/spot_jobs/001_initial_schema.py +1 -1
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/serve/constants.py +3 -7
- sky/serve/replica_managers.py +15 -16
- sky/serve/serve_state.py +10 -0
- sky/serve/serve_utils.py +58 -23
- sky/serve/server/impl.py +15 -19
- sky/serve/service.py +31 -16
- sky/server/server.py +20 -14
- sky/setup_files/dependencies.py +11 -10
- sky/skylet/autostop_lib.py +38 -5
- sky/skylet/constants.py +3 -1
- sky/skylet/services.py +44 -0
- sky/skylet/skylet.py +49 -4
- sky/skypilot_config.py +4 -4
- sky/task.py +19 -16
- sky/templates/aws-ray.yml.j2 +2 -2
- sky/templates/jobs-controller.yaml.j2 +6 -0
- sky/users/permission.py +1 -1
- sky/utils/cli_utils/status_utils.py +9 -0
- sky/utils/command_runner.py +1 -1
- sky/utils/config_utils.py +29 -5
- sky/utils/controller_utils.py +73 -0
- sky/utils/db/db_utils.py +39 -1
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/schemas.py +3 -0
- sky/volumes/server/core.py +2 -2
- sky/volumes/server/server.py +2 -2
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/METADATA +5 -7
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/RECORD +117 -108
- sky/dashboard/out/_next/static/chunks/8056-34d27f51e6d1c631.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ae17cec0fc6483d9.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +0 -1
- /sky/dashboard/out/_next/static/{-DXZksWqf2waNHeU9YTQe → Y0eNlwi85qGRecLTin11y}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/top_level.txt +0 -0
|
@@ -15,6 +15,12 @@ file_mounts:
|
|
|
15
15
|
{{controller_file_mount_path}}: {{local_file_mount_path}}
|
|
16
16
|
{%- endfor %}
|
|
17
17
|
|
|
18
|
+
# NOTE(dev): This needs to be a subset of sky/templates/sky-serve-controller.yaml.j2.
|
|
19
|
+
# It is because we use the --fast flag to submit jobs and no --fast flag to launch pools.
|
|
20
|
+
# So when we launch a new pool, it will install the required dependencies.
|
|
21
|
+
# TODO(tian): Add --fast to launch pools as well, and figure out the dependency installation.
|
|
22
|
+
# Maybe in the --fast implementation, we can store the hash of setup commands that used to be
|
|
23
|
+
# run and don't skip setup phase if the hash is different.
|
|
18
24
|
setup: |
|
|
19
25
|
{{ sky_activate_python_env }}
|
|
20
26
|
# Disable the pip version check to avoid the warning message, which makes the
|
sky/users/permission.py
CHANGED
|
@@ -44,7 +44,7 @@ class PermissionService:
|
|
|
44
44
|
if _enforcer_instance is None:
|
|
45
45
|
_enforcer_instance = self
|
|
46
46
|
engine = global_user_state.initialize_and_get_db()
|
|
47
|
-
db_utils.
|
|
47
|
+
db_utils.add_all_tables_to_db_sqlalchemy(
|
|
48
48
|
sqlalchemy_adapter.Base.metadata, engine)
|
|
49
49
|
adapter = sqlalchemy_adapter.Adapter(engine)
|
|
50
50
|
model_path = os.path.join(os.path.dirname(__file__),
|
|
@@ -81,6 +81,7 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
|
81
81
|
_get_command,
|
|
82
82
|
truncate=not show_all,
|
|
83
83
|
show_by_default=False),
|
|
84
|
+
StatusColumn('LAST_EVENT', _get_last_event, show_by_default=False),
|
|
84
85
|
]
|
|
85
86
|
|
|
86
87
|
columns = []
|
|
@@ -314,6 +315,14 @@ def _get_head_ip(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
|
|
314
315
|
return handle.head_ip
|
|
315
316
|
|
|
316
317
|
|
|
318
|
+
def _get_last_event(cluster_record: _ClusterRecord,
|
|
319
|
+
truncate: bool = True) -> str:
|
|
320
|
+
del truncate
|
|
321
|
+
if cluster_record.get('last_event', None) is None:
|
|
322
|
+
return 'No recorded events.'
|
|
323
|
+
return cluster_record['last_event']
|
|
324
|
+
|
|
325
|
+
|
|
317
326
|
def _is_pending_autostop(cluster_record: _ClusterRecord) -> bool:
|
|
318
327
|
# autostop < 0 means nothing scheduled.
|
|
319
328
|
return cluster_record['autostop'] >= 0 and _get_status(
|
sky/utils/command_runner.py
CHANGED
|
@@ -674,7 +674,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
674
674
|
ssh += ['-tt']
|
|
675
675
|
if port_forward is not None:
|
|
676
676
|
for local, remote in port_forward:
|
|
677
|
-
logger.
|
|
677
|
+
logger.debug(
|
|
678
678
|
f'Forwarding local port {local} to remote port {remote}.')
|
|
679
679
|
ssh += ['-NL', f'{local}:localhost:{remote}']
|
|
680
680
|
if self._docker_ssh_proxy_command is not None:
|
sky/utils/config_utils.py
CHANGED
|
@@ -8,6 +8,26 @@ logger = sky_logging.init_logger(__name__)
|
|
|
8
8
|
|
|
9
9
|
_REGION_CONFIG_CLOUDS = ['nebius', 'oci']
|
|
10
10
|
|
|
11
|
+
# Kubernetes API use list to represent dictionary fields with patch strategy
|
|
12
|
+
# merge and each item is indexed by the patch merge key. The following map
|
|
13
|
+
# maps the field name to the patch merge key.
|
|
14
|
+
# pylint: disable=line-too-long
|
|
15
|
+
# Ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podspec-v1-core
|
|
16
|
+
# NOTE: field containers and imagePullSecrets are not included deliberately for
|
|
17
|
+
# backward compatibility (we only support one container per pod now).
|
|
18
|
+
_PATCH_MERGE_KEYS = {
|
|
19
|
+
'initContainers': 'name',
|
|
20
|
+
'ephemeralContainers': 'name',
|
|
21
|
+
'volumes': 'name',
|
|
22
|
+
'volumeMounts': 'name',
|
|
23
|
+
'resourceClaims': 'name',
|
|
24
|
+
'env': 'name',
|
|
25
|
+
'hostAliases': 'ip',
|
|
26
|
+
'topologySpreadConstraints': 'topologyKey',
|
|
27
|
+
'ports': 'containerPort',
|
|
28
|
+
'volumeDevices': 'devicePath',
|
|
29
|
+
}
|
|
30
|
+
|
|
11
31
|
|
|
12
32
|
class Config(Dict[str, Any]):
|
|
13
33
|
"""SkyPilot config that supports setting/getting values with nested keys."""
|
|
@@ -211,19 +231,23 @@ def merge_k8s_configs(
|
|
|
211
231
|
merge_k8s_configs(base_config[key][0], value[0],
|
|
212
232
|
next_allowed_override_keys,
|
|
213
233
|
next_disallowed_override_keys)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
234
|
+
# For list fields with patch strategy "merge", we merge the list
|
|
235
|
+
# by the patch merge key.
|
|
236
|
+
elif key in _PATCH_MERGE_KEYS:
|
|
237
|
+
patch_merge_key = _PATCH_MERGE_KEYS[key]
|
|
217
238
|
for override_item in value:
|
|
218
|
-
override_item_name = override_item.get(
|
|
239
|
+
override_item_name = override_item.get(patch_merge_key)
|
|
219
240
|
if override_item_name is not None:
|
|
220
241
|
existing_base_item = next(
|
|
221
242
|
(v for v in base_config[key]
|
|
222
|
-
if v.get(
|
|
243
|
+
if v.get(patch_merge_key) == override_item_name),
|
|
244
|
+
None)
|
|
223
245
|
if existing_base_item is not None:
|
|
224
246
|
merge_k8s_configs(existing_base_item, override_item)
|
|
225
247
|
else:
|
|
226
248
|
base_config[key].append(override_item)
|
|
249
|
+
else:
|
|
250
|
+
base_config[key].append(override_item)
|
|
227
251
|
else:
|
|
228
252
|
base_config[key].extend(value)
|
|
229
253
|
else:
|
sky/utils/controller_utils.py
CHANGED
|
@@ -23,11 +23,14 @@ from sky.clouds import gcp
|
|
|
23
23
|
from sky.data import data_utils
|
|
24
24
|
from sky.data import storage as storage_lib
|
|
25
25
|
from sky.jobs import constants as managed_job_constants
|
|
26
|
+
from sky.jobs import state as managed_job_state
|
|
26
27
|
from sky.provision.kubernetes import constants as kubernetes_constants
|
|
27
28
|
from sky.serve import constants as serve_constants
|
|
29
|
+
from sky.serve import serve_state
|
|
28
30
|
from sky.setup_files import dependencies
|
|
29
31
|
from sky.skylet import constants
|
|
30
32
|
from sky.skylet import log_lib
|
|
33
|
+
from sky.utils import annotations
|
|
31
34
|
from sky.utils import common
|
|
32
35
|
from sky.utils import common_utils
|
|
33
36
|
from sky.utils import config_utils
|
|
@@ -37,8 +40,13 @@ from sky.utils import rich_utils
|
|
|
37
40
|
from sky.utils import ux_utils
|
|
38
41
|
|
|
39
42
|
if typing.TYPE_CHECKING:
|
|
43
|
+
import psutil
|
|
44
|
+
|
|
40
45
|
from sky import task as task_lib
|
|
41
46
|
from sky.backends import cloud_vm_ray_backend
|
|
47
|
+
else:
|
|
48
|
+
from sky.adaptors import common as adaptors_common
|
|
49
|
+
psutil = adaptors_common.LazyImport('psutil')
|
|
42
50
|
|
|
43
51
|
logger = sky_logging.init_logger(__name__)
|
|
44
52
|
|
|
@@ -1161,3 +1169,68 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
|
1161
1169
|
task.update_storage_mounts(updated_mount_storages)
|
|
1162
1170
|
if msg:
|
|
1163
1171
|
logger.info(ux_utils.finishing_message('Uploaded local files/folders.'))
|
|
1172
|
+
|
|
1173
|
+
|
|
1174
|
+
# ======================= Resources Management Functions =======================
|
|
1175
|
+
|
|
1176
|
+
# Based on testing, assume a running job process uses 350MB memory. We use the
|
|
1177
|
+
# same estimation for service controller process.
|
|
1178
|
+
JOB_MEMORY_MB = 350
|
|
1179
|
+
# Monitoring process for service is 1GB. This is based on an old estimation but
|
|
1180
|
+
# we keep it here for now.
|
|
1181
|
+
# TODO(tian): Remeasure this.
|
|
1182
|
+
SERVE_MONITORING_MEMORY_MB = 1024
|
|
1183
|
+
# The ratio of service controller process to job process. We will treat each
|
|
1184
|
+
# service as SERVE_PROC_RATIO job processes.
|
|
1185
|
+
SERVE_PROC_RATIO = SERVE_MONITORING_MEMORY_MB / JOB_MEMORY_MB
|
|
1186
|
+
# Past 2000 simultaneous jobs, we become unstable.
|
|
1187
|
+
# See https://github.com/skypilot-org/skypilot/issues/4649.
|
|
1188
|
+
MAX_JOB_LIMIT = 2000
|
|
1189
|
+
# Number of ongoing launches launches allowed per CPU, for managed jobs.
|
|
1190
|
+
JOB_LAUNCHES_PER_CPU = 4
|
|
1191
|
+
# Number of ongoing launches launches allowed per CPU, for services. This is
|
|
1192
|
+
# also based on an old estimation, but SKyServe indeed spawn a new process
|
|
1193
|
+
# for each launch operation, so it should be slightly more resources demanding
|
|
1194
|
+
# than managed jobs.
|
|
1195
|
+
SERVE_LAUNCHES_PER_CPU = 2
|
|
1196
|
+
# The ratio of service launch to job launch. This is inverted as the parallelism
|
|
1197
|
+
# is determined by 1 / LAUNCHES_PER_CPU.
|
|
1198
|
+
SERVE_LAUNCH_RATIO = JOB_LAUNCHES_PER_CPU / SERVE_LAUNCHES_PER_CPU
|
|
1199
|
+
|
|
1200
|
+
# The _RESOURCES_LOCK should be held whenever we are checking the parallelism
|
|
1201
|
+
# control or updating the schedule_state of any job or service. Any code that
|
|
1202
|
+
# takes this lock must conclude by calling maybe_schedule_next_jobs.
|
|
1203
|
+
_RESOURCES_LOCK = '~/.sky/locks/controller_resources.lock'
|
|
1204
|
+
|
|
1205
|
+
|
|
1206
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
|
1207
|
+
def get_resources_lock_path() -> str:
|
|
1208
|
+
path = os.path.expanduser(_RESOURCES_LOCK)
|
|
1209
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
1210
|
+
return path
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
@annotations.lru_cache(scope='request')
|
|
1214
|
+
def _get_job_parallelism() -> int:
|
|
1215
|
+
job_memory = JOB_MEMORY_MB * 1024 * 1024
|
|
1216
|
+
job_limit = min(psutil.virtual_memory().total // job_memory, MAX_JOB_LIMIT)
|
|
1217
|
+
return max(job_limit, 1)
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
@annotations.lru_cache(scope='request')
|
|
1221
|
+
def _get_launch_parallelism() -> int:
|
|
1222
|
+
cpus = os.cpu_count()
|
|
1223
|
+
return cpus * JOB_LAUNCHES_PER_CPU if cpus is not None else 1
|
|
1224
|
+
|
|
1225
|
+
|
|
1226
|
+
def can_provision() -> bool:
|
|
1227
|
+
num_provision = (
|
|
1228
|
+
serve_state.total_number_provisioning_replicas() * SERVE_LAUNCH_RATIO +
|
|
1229
|
+
managed_job_state.get_num_launching_jobs())
|
|
1230
|
+
return num_provision < _get_launch_parallelism()
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
def can_start_new_process() -> bool:
|
|
1234
|
+
num_procs = (serve_state.get_num_services() * SERVE_PROC_RATIO +
|
|
1235
|
+
managed_job_state.get_num_alive_jobs())
|
|
1236
|
+
return num_procs < _get_job_parallelism()
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -32,6 +32,23 @@ if typing.TYPE_CHECKING:
|
|
|
32
32
|
_DB_TIMEOUT_S = 60
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
class UniqueConstraintViolationError(Exception):
|
|
36
|
+
"""Exception raised for unique constraint violation.
|
|
37
|
+
Attributes:
|
|
38
|
+
value -- the input value that caused the error
|
|
39
|
+
message -- explanation of the error
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, value, message='Unique constraint violation'):
|
|
43
|
+
self.value = value
|
|
44
|
+
self.message = message
|
|
45
|
+
super().__init__(self.message)
|
|
46
|
+
|
|
47
|
+
def __str__(self):
|
|
48
|
+
return (f'UniqueConstraintViolationError: {self.message} '
|
|
49
|
+
f'(Value: {self.value})')
|
|
50
|
+
|
|
51
|
+
|
|
35
52
|
class SQLAlchemyDialect(enum.Enum):
|
|
36
53
|
SQLITE = 'sqlite'
|
|
37
54
|
POSTGRESQL = 'postgresql'
|
|
@@ -87,7 +104,7 @@ def add_column_to_table(
|
|
|
87
104
|
conn.commit()
|
|
88
105
|
|
|
89
106
|
|
|
90
|
-
def
|
|
107
|
+
def add_all_tables_to_db_sqlalchemy(
|
|
91
108
|
metadata: sqlalchemy.MetaData,
|
|
92
109
|
engine: sqlalchemy.Engine,
|
|
93
110
|
):
|
|
@@ -103,6 +120,27 @@ def add_tables_to_db_sqlalchemy(
|
|
|
103
120
|
raise
|
|
104
121
|
|
|
105
122
|
|
|
123
|
+
def add_table_to_db_sqlalchemy(
|
|
124
|
+
metadata: sqlalchemy.MetaData,
|
|
125
|
+
engine: sqlalchemy.Engine,
|
|
126
|
+
table_name: str,
|
|
127
|
+
):
|
|
128
|
+
"""Add a specific table to the database."""
|
|
129
|
+
try:
|
|
130
|
+
table = metadata.tables[table_name]
|
|
131
|
+
except KeyError as e:
|
|
132
|
+
raise e
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
table.create(bind=engine, checkfirst=True)
|
|
136
|
+
except (sqlalchemy_exc.OperationalError,
|
|
137
|
+
sqlalchemy_exc.ProgrammingError) as e:
|
|
138
|
+
if 'already exists' in str(e):
|
|
139
|
+
pass
|
|
140
|
+
else:
|
|
141
|
+
raise
|
|
142
|
+
|
|
143
|
+
|
|
106
144
|
def add_column_to_table_sqlalchemy(
|
|
107
145
|
session: 'Session',
|
|
108
146
|
table_name: str,
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
19
19
|
DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
20
20
|
|
|
21
21
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
22
|
-
GLOBAL_USER_STATE_VERSION = '
|
|
22
|
+
GLOBAL_USER_STATE_VERSION = '005'
|
|
23
23
|
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
|
|
24
24
|
|
|
25
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
sky/utils/schemas.py
CHANGED
sky/volumes/server/core.py
CHANGED
|
@@ -7,12 +7,12 @@ import uuid
|
|
|
7
7
|
|
|
8
8
|
import filelock
|
|
9
9
|
|
|
10
|
-
import sky
|
|
11
10
|
from sky import global_user_state
|
|
12
11
|
from sky import models
|
|
13
12
|
from sky import provision
|
|
14
13
|
from sky import sky_logging
|
|
15
14
|
from sky.utils import common_utils
|
|
15
|
+
from sky.utils import registry
|
|
16
16
|
from sky.utils import rich_utils
|
|
17
17
|
from sky.utils import status_lib
|
|
18
18
|
from sky.utils import ux_utils
|
|
@@ -180,7 +180,7 @@ def volume_apply(name: str, volume_type: str, cloud: str, region: Optional[str],
|
|
|
180
180
|
with rich_utils.safe_status(ux_utils.spinner_message('Creating volume')):
|
|
181
181
|
# Reuse the method for cluster name on cloud to
|
|
182
182
|
# generate the storage name on cloud.
|
|
183
|
-
cloud_obj =
|
|
183
|
+
cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud)
|
|
184
184
|
assert cloud_obj is not None
|
|
185
185
|
name_uuid = str(uuid.uuid4())[:6]
|
|
186
186
|
name_on_cloud = common_utils.make_cluster_name_on_cloud(
|
sky/volumes/server/server.py
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
import fastapi
|
|
4
4
|
|
|
5
|
-
import sky
|
|
6
5
|
from sky import clouds
|
|
7
6
|
from sky import sky_logging
|
|
8
7
|
from sky.server.requests import executor
|
|
9
8
|
from sky.server.requests import payloads
|
|
10
9
|
from sky.server.requests import requests as requests_lib
|
|
10
|
+
from sky.utils import registry
|
|
11
11
|
from sky.utils import volume
|
|
12
12
|
from sky.volumes.server import core
|
|
13
13
|
|
|
@@ -55,7 +55,7 @@ async def volume_apply(request: fastapi.Request,
|
|
|
55
55
|
if volume_type not in supported_volume_types:
|
|
56
56
|
raise fastapi.HTTPException(
|
|
57
57
|
status_code=400, detail=f'Invalid volume type: {volume_type}')
|
|
58
|
-
cloud =
|
|
58
|
+
cloud = registry.CLOUD_REGISTRY.from_str(volume_cloud)
|
|
59
59
|
if cloud is None:
|
|
60
60
|
raise fastapi.HTTPException(status_code=400,
|
|
61
61
|
detail=f'Invalid cloud: {volume_cloud}')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20250814
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -104,9 +104,8 @@ Provides-Extra: ssh
|
|
|
104
104
|
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "ssh"
|
|
105
105
|
Requires-Dist: websockets; extra == "ssh"
|
|
106
106
|
Provides-Extra: remote
|
|
107
|
-
Requires-Dist: grpcio
|
|
108
|
-
Requires-Dist:
|
|
109
|
-
Requires-Dist: protobuf!=3.19.5,>=3.15.3; extra == "remote"
|
|
107
|
+
Requires-Dist: grpcio>=1.63.0; extra == "remote"
|
|
108
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "remote"
|
|
110
109
|
Provides-Extra: runpod
|
|
111
110
|
Requires-Dist: runpod>=1.6.1; extra == "runpod"
|
|
112
111
|
Provides-Extra: fluidstack
|
|
@@ -169,9 +168,8 @@ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
|
169
168
|
Requires-Dist: websockets; extra == "all"
|
|
170
169
|
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
171
170
|
Requires-Dist: websockets; extra == "all"
|
|
172
|
-
Requires-Dist: grpcio
|
|
173
|
-
Requires-Dist:
|
|
174
|
-
Requires-Dist: protobuf!=3.19.5,>=3.15.3; extra == "all"
|
|
171
|
+
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
172
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
175
173
|
Requires-Dist: runpod>=1.6.1; extra == "all"
|
|
176
174
|
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
177
175
|
Requires-Dist: pydo>=0.3.0; extra == "all"
|