skypilot-nightly 1.0.0.dev20251009__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +6 -2
- sky/adaptors/aws.py +25 -7
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +59 -149
- sky/backends/backend_utils.py +104 -63
- sky/backends/cloud_vm_ray_backend.py +84 -39
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +25 -13
- sky/client/cli/command.py +335 -86
- sky/client/cli/flags.py +4 -2
- sky/client/cli/table_utils.py +17 -9
- sky/client/sdk.py +59 -12
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +71 -16
- sky/clouds/azure.py +12 -5
- sky/clouds/cloud.py +19 -9
- sky/clouds/cudo.py +12 -5
- sky/clouds/do.py +4 -1
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +12 -5
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +62 -25
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +12 -5
- sky/clouds/oci.py +12 -5
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +4 -1
- sky/clouds/runpod.py +12 -5
- sky/clouds/scp.py +12 -5
- sky/clouds/seeweb.py +4 -1
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/vast.py +12 -5
- sky/clouds/vsphere.py +4 -1
- sky/core.py +12 -11
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-74503c8e80fd253b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.ad6adaa2a0fa9768.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{4725.10f7a9a5d3ea8208.js → 4725.a830b5c9e7867c92.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-a37d2063af475a1c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-d44859594e6f8064.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-6edeb7d06032adfc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-479dde13399cf270.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-5ab3b907622cf0fe.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-b84b948ff357c43e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-c5a3eeee1c218af1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-22b23febb3e89ce1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +143 -19
- sky/data/storage.py +168 -11
- sky/exceptions.py +13 -1
- sky/execution.py +13 -0
- sky/global_user_state.py +189 -113
- sky/jobs/client/sdk.py +32 -10
- sky/jobs/client/sdk_async.py +9 -3
- sky/jobs/constants.py +3 -1
- sky/jobs/controller.py +164 -192
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +59 -82
- sky/jobs/scheduler.py +20 -9
- sky/jobs/server/core.py +105 -23
- sky/jobs/server/server.py +40 -28
- sky/jobs/server/utils.py +32 -11
- sky/jobs/state.py +588 -110
- sky/jobs/utils.py +442 -209
- sky/logs/agent.py +1 -1
- sky/metrics/utils.py +45 -6
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +7 -0
- sky/provision/aws/instance.py +2 -1
- sky/provision/azure/instance.py +2 -1
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +2 -1
- sky/provision/do/instance.py +2 -1
- sky/provision/fluidstack/instance.py +4 -3
- sky/provision/gcp/instance.py +2 -1
- sky/provision/hyperbolic/instance.py +2 -1
- sky/provision/instance_setup.py +10 -2
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +222 -89
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/utils.py +114 -53
- sky/provision/kubernetes/volume.py +5 -4
- sky/provision/lambda_cloud/instance.py +2 -1
- sky/provision/nebius/instance.py +2 -1
- sky/provision/oci/instance.py +2 -1
- sky/provision/paperspace/instance.py +2 -1
- sky/provision/provisioner.py +11 -2
- sky/provision/runpod/instance.py +2 -1
- sky/provision/scp/instance.py +2 -1
- sky/provision/seeweb/instance.py +3 -3
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +2 -1
- sky/provision/vsphere/instance.py +2 -1
- sky/resources.py +1 -1
- sky/schemas/api/responses.py +9 -5
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/jobsv1_pb2.py +52 -52
- sky/schemas/generated/jobsv1_pb2.pyi +4 -2
- sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
- sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
- sky/serve/client/impl.py +11 -3
- sky/serve/replica_managers.py +5 -2
- sky/serve/serve_utils.py +9 -2
- sky/serve/server/impl.py +7 -2
- sky/serve/server/server.py +18 -15
- sky/serve/service.py +2 -2
- sky/server/auth/oauth2_proxy.py +2 -5
- sky/server/common.py +31 -28
- sky/server/constants.py +5 -1
- sky/server/daemons.py +27 -19
- sky/server/requests/executor.py +138 -74
- sky/server/requests/payloads.py +9 -1
- sky/server/requests/preconditions.py +13 -10
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +485 -153
- sky/server/requests/serializers/decoders.py +26 -13
- sky/server/requests/serializers/encoders.py +56 -11
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +70 -18
- sky/server/server.py +283 -104
- sky/server/stream_utils.py +233 -59
- sky/server/uvicorn.py +18 -17
- sky/setup_files/alembic.ini +4 -0
- sky/setup_files/dependencies.py +32 -13
- sky/sky_logging.py +0 -2
- sky/skylet/constants.py +30 -7
- sky/skylet/events.py +7 -0
- sky/skylet/log_lib.py +8 -2
- sky/skylet/log_lib.pyi +1 -1
- sky/skylet/services.py +26 -13
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +87 -75
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +67 -54
- sky/templates/kubernetes-ray.yml.j2 +8 -1
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/websocket_proxy.py +142 -12
- sky/users/permission.py +8 -1
- sky/utils/admin_policy_utils.py +16 -3
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +8 -2
- sky/utils/command_runner.py +11 -0
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +7 -4
- sky/utils/context.py +57 -51
- sky/utils/context_utils.py +30 -12
- sky/utils/controller_utils.py +35 -8
- sky/utils/db/db_utils.py +37 -10
- sky/utils/db/migration_utils.py +8 -4
- sky/utils/locks.py +24 -6
- sky/utils/resource_checker.py +4 -1
- sky/utils/resources_utils.py +53 -29
- sky/utils/schemas.py +23 -4
- sky/utils/subprocess_utils.py +17 -4
- sky/volumes/server/server.py +7 -6
- sky/workspaces/server.py +13 -12
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/METADATA +306 -55
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/RECORD +215 -195
- sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
- sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
- sky/dashboard/out/_next/static/chunks/3015-8d748834fcc60b46.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +0 -1
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-f6818c84ed8f1c86.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f058b0346db2aff.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4f7079dcab6ed653.js +0 -16
- sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{hIViZcQBkn0HE8SpaSsUU → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/controller_utils.py
CHANGED
|
@@ -72,7 +72,8 @@ class _ControllerSpec:
|
|
|
72
72
|
"""Spec for skypilot controllers."""
|
|
73
73
|
controller_type: str
|
|
74
74
|
name: str
|
|
75
|
-
|
|
75
|
+
_cluster_name_func: Callable[[], str]
|
|
76
|
+
_cluster_name_from_server: Optional[str] # For client-side only
|
|
76
77
|
in_progress_hint: Callable[[bool], str]
|
|
77
78
|
decline_cancel_hint: str
|
|
78
79
|
_decline_down_when_failed_to_fetch_status_hint: str
|
|
@@ -93,6 +94,24 @@ class _ControllerSpec:
|
|
|
93
94
|
return self._check_cluster_name_hint.format(
|
|
94
95
|
cluster_name=self.cluster_name)
|
|
95
96
|
|
|
97
|
+
@property
|
|
98
|
+
def cluster_name(self) -> str:
|
|
99
|
+
"""The cluster name of the controller.
|
|
100
|
+
|
|
101
|
+
On the server-side, the cluster name is the actual cluster name,
|
|
102
|
+
which is read from common.(JOB|SKY_SERVE)_CONTROLLER_NAME.
|
|
103
|
+
|
|
104
|
+
On the client-side, the cluster name may not be accurate,
|
|
105
|
+
as we may not know the exact name, because we are missing
|
|
106
|
+
the server-side common.SERVER_ID. We have to wait until
|
|
107
|
+
we get the actual cluster name from the server.
|
|
108
|
+
"""
|
|
109
|
+
return (self._cluster_name_from_server if self._cluster_name_from_server
|
|
110
|
+
is not None else self._cluster_name_func())
|
|
111
|
+
|
|
112
|
+
def set_cluster_name_from_server(self, cluster_name: str) -> None:
|
|
113
|
+
self._cluster_name_from_server = cluster_name
|
|
114
|
+
|
|
96
115
|
|
|
97
116
|
# TODO: refactor controller class to not be an enum.
|
|
98
117
|
class Controllers(enum.Enum):
|
|
@@ -102,7 +121,8 @@ class Controllers(enum.Enum):
|
|
|
102
121
|
JOBS_CONTROLLER = _ControllerSpec(
|
|
103
122
|
controller_type='jobs',
|
|
104
123
|
name='managed jobs controller',
|
|
105
|
-
|
|
124
|
+
_cluster_name_func=lambda: common.JOB_CONTROLLER_NAME,
|
|
125
|
+
_cluster_name_from_server=None,
|
|
106
126
|
in_progress_hint=lambda _:
|
|
107
127
|
('* {job_info}To see all managed jobs: '
|
|
108
128
|
f'{colorama.Style.BRIGHT}sky jobs queue{colorama.Style.RESET_ALL}'),
|
|
@@ -133,7 +153,8 @@ class Controllers(enum.Enum):
|
|
|
133
153
|
SKY_SERVE_CONTROLLER = _ControllerSpec(
|
|
134
154
|
controller_type='serve',
|
|
135
155
|
name='serve controller',
|
|
136
|
-
|
|
156
|
+
_cluster_name_func=lambda: common.SKY_SERVE_CONTROLLER_NAME,
|
|
157
|
+
_cluster_name_from_server=None,
|
|
137
158
|
in_progress_hint=(
|
|
138
159
|
lambda pool:
|
|
139
160
|
(f'* To see detailed pool status: {colorama.Style.BRIGHT}'
|
|
@@ -166,7 +187,9 @@ class Controllers(enum.Enum):
|
|
|
166
187
|
default_autostop_config=serve_constants.CONTROLLER_AUTOSTOP)
|
|
167
188
|
|
|
168
189
|
@classmethod
|
|
169
|
-
def from_name(cls,
|
|
190
|
+
def from_name(cls,
|
|
191
|
+
name: Optional[str],
|
|
192
|
+
expect_exact_match: bool = True) -> Optional['Controllers']:
|
|
170
193
|
"""Check if the cluster name is a controller name.
|
|
171
194
|
|
|
172
195
|
Returns:
|
|
@@ -187,7 +210,11 @@ class Controllers(enum.Enum):
|
|
|
187
210
|
elif name.startswith(common.JOB_CONTROLLER_PREFIX):
|
|
188
211
|
controller = cls.JOBS_CONTROLLER
|
|
189
212
|
prefix = common.JOB_CONTROLLER_PREFIX
|
|
190
|
-
|
|
213
|
+
|
|
214
|
+
if controller is not None and expect_exact_match:
|
|
215
|
+
assert name == controller.value.cluster_name, (
|
|
216
|
+
name, controller.value.cluster_name)
|
|
217
|
+
elif controller is not None and name != controller.value.cluster_name:
|
|
191
218
|
# The client-side cluster_name is not accurate. Assume that `name`
|
|
192
219
|
# is the actual cluster name, so need to set the controller's
|
|
193
220
|
# cluster name to the input name.
|
|
@@ -201,7 +228,7 @@ class Controllers(enum.Enum):
|
|
|
201
228
|
prefix)
|
|
202
229
|
|
|
203
230
|
# Update the cluster name.
|
|
204
|
-
controller.value.
|
|
231
|
+
controller.value.set_cluster_name_from_server(name)
|
|
205
232
|
return controller
|
|
206
233
|
|
|
207
234
|
@classmethod
|
|
@@ -228,7 +255,7 @@ def get_controller_for_pool(pool: bool) -> Controllers:
|
|
|
228
255
|
def high_availability_specified(cluster_name: Optional[str]) -> bool:
|
|
229
256
|
"""Check if the controller high availability is specified in user config.
|
|
230
257
|
"""
|
|
231
|
-
controller = Controllers.from_name(cluster_name)
|
|
258
|
+
controller = Controllers.from_name(cluster_name, expect_exact_match=False)
|
|
232
259
|
if controller is None:
|
|
233
260
|
return False
|
|
234
261
|
|
|
@@ -411,7 +438,7 @@ def check_cluster_name_not_controller(
|
|
|
411
438
|
Returns:
|
|
412
439
|
None, if the cluster name is not a controller name.
|
|
413
440
|
"""
|
|
414
|
-
controller = Controllers.from_name(cluster_name)
|
|
441
|
+
controller = Controllers.from_name(cluster_name, expect_exact_match=False)
|
|
415
442
|
if controller is not None:
|
|
416
443
|
msg = controller.value.check_cluster_name_hint
|
|
417
444
|
if operation_str is not None:
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -185,7 +185,7 @@ def add_column_to_table_sqlalchemy(
|
|
|
185
185
|
pass
|
|
186
186
|
else:
|
|
187
187
|
raise
|
|
188
|
-
#
|
|
188
|
+
#postgresql
|
|
189
189
|
except sqlalchemy_exc.ProgrammingError as e:
|
|
190
190
|
if 'already exists' in str(e):
|
|
191
191
|
pass
|
|
@@ -358,6 +358,27 @@ class SQLiteConn(threading.local):
|
|
|
358
358
|
conn = await self._get_async_conn()
|
|
359
359
|
return await conn.execute_fetchall(sql, parameters)
|
|
360
360
|
|
|
361
|
+
async def execute_get_returning_value_async(
|
|
362
|
+
self,
|
|
363
|
+
sql: str,
|
|
364
|
+
parameters: Optional[Iterable[Any]] = None
|
|
365
|
+
) -> Optional[sqlite3.Row]:
|
|
366
|
+
conn = await self._get_async_conn()
|
|
367
|
+
|
|
368
|
+
if parameters is None:
|
|
369
|
+
parameters = []
|
|
370
|
+
|
|
371
|
+
def exec_and_get_returning_value(sql: str,
|
|
372
|
+
parameters: Optional[Iterable[Any]]):
|
|
373
|
+
# pylint: disable=protected-access
|
|
374
|
+
row = conn._conn.execute(sql, parameters).fetchone()
|
|
375
|
+
conn._conn.commit()
|
|
376
|
+
return row
|
|
377
|
+
|
|
378
|
+
# pylint: disable=protected-access
|
|
379
|
+
return await conn._execute(exec_and_get_returning_value, sql,
|
|
380
|
+
parameters)
|
|
381
|
+
|
|
361
382
|
async def close(self):
|
|
362
383
|
if self._async_conn is not None:
|
|
363
384
|
await self._async_conn.close()
|
|
@@ -382,21 +403,28 @@ def get_max_connections():
|
|
|
382
403
|
|
|
383
404
|
@typing.overload
|
|
384
405
|
def get_engine(
|
|
385
|
-
db_name: str,
|
|
406
|
+
db_name: Optional[str],
|
|
386
407
|
async_engine: Literal[False] = False) -> sqlalchemy.engine.Engine:
|
|
387
408
|
...
|
|
388
409
|
|
|
389
410
|
|
|
390
411
|
@typing.overload
|
|
391
|
-
def get_engine(db_name: str,
|
|
412
|
+
def get_engine(db_name: Optional[str],
|
|
392
413
|
async_engine: Literal[True]) -> sqlalchemy_async.AsyncEngine:
|
|
393
414
|
...
|
|
394
415
|
|
|
395
416
|
|
|
396
417
|
def get_engine(
|
|
397
|
-
db_name: str,
|
|
418
|
+
db_name: Optional[str],
|
|
398
419
|
async_engine: bool = False
|
|
399
420
|
) -> Union[sqlalchemy.engine.Engine, sqlalchemy_async.AsyncEngine]:
|
|
421
|
+
"""Get the engine for the given database name.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
db_name: The name of the database. ONLY used for SQLite. On Postgres,
|
|
425
|
+
we use a single database, which we get from the connection string.
|
|
426
|
+
async_engine: Whether to return an async engine.
|
|
427
|
+
"""
|
|
400
428
|
conn_string = None
|
|
401
429
|
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
|
402
430
|
conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
|
|
@@ -416,19 +444,18 @@ def get_engine(
|
|
|
416
444
|
_postgres_engine_cache[conn_string] = (
|
|
417
445
|
sqlalchemy.create_engine(
|
|
418
446
|
conn_string, poolclass=sqlalchemy.pool.NullPool))
|
|
419
|
-
elif _max_connections == 1:
|
|
420
|
-
_postgres_engine_cache[conn_string] = (
|
|
421
|
-
sqlalchemy.create_engine(
|
|
422
|
-
conn_string, poolclass=sqlalchemy.pool.StaticPool))
|
|
423
447
|
else:
|
|
424
448
|
_postgres_engine_cache[conn_string] = (
|
|
425
449
|
sqlalchemy.create_engine(
|
|
426
450
|
conn_string,
|
|
427
451
|
poolclass=sqlalchemy.pool.QueuePool,
|
|
428
|
-
|
|
429
|
-
max_overflow=0)
|
|
452
|
+
pool_size=_max_connections,
|
|
453
|
+
max_overflow=max(0, 5 - _max_connections),
|
|
454
|
+
pool_pre_ping=True,
|
|
455
|
+
pool_recycle=1800))
|
|
430
456
|
engine = _postgres_engine_cache[conn_string]
|
|
431
457
|
else:
|
|
458
|
+
assert db_name is not None, 'db_name must be provided for SQLite'
|
|
432
459
|
db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
|
|
433
460
|
pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
|
|
434
461
|
if async_engine:
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -19,15 +19,19 @@ DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
|
19
19
|
|
|
20
20
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
21
21
|
GLOBAL_USER_STATE_VERSION = '010'
|
|
22
|
-
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.
|
|
22
|
+
GLOBAL_USER_STATE_LOCK_PATH = f'~/.sky/locks/.{GLOBAL_USER_STATE_DB_NAME}.lock'
|
|
23
23
|
|
|
24
24
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
|
25
|
-
SPOT_JOBS_VERSION = '
|
|
26
|
-
SPOT_JOBS_LOCK_PATH = '~/.sky/locks/.
|
|
25
|
+
SPOT_JOBS_VERSION = '005'
|
|
26
|
+
SPOT_JOBS_LOCK_PATH = f'~/.sky/locks/.{SPOT_JOBS_DB_NAME}.lock'
|
|
27
27
|
|
|
28
28
|
SERVE_DB_NAME = 'serve_db'
|
|
29
29
|
SERVE_VERSION = '001'
|
|
30
|
-
SERVE_LOCK_PATH = '~/.sky/locks/.
|
|
30
|
+
SERVE_LOCK_PATH = f'~/.sky/locks/.{SERVE_DB_NAME}.lock'
|
|
31
|
+
|
|
32
|
+
SKYPILOT_CONFIG_DB_NAME = 'sky_config_db'
|
|
33
|
+
SKYPILOT_CONFIG_VERSION = '001'
|
|
34
|
+
SKYPILOT_CONFIG_LOCK_PATH = f'~/.sky/locks/.{SKYPILOT_CONFIG_DB_NAME}.lock'
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
@contextlib.contextmanager
|
sky/utils/locks.py
CHANGED
|
@@ -243,6 +243,7 @@ class PostgresLock(DistributedLock):
|
|
|
243
243
|
if not self._acquired or not self._connection:
|
|
244
244
|
return
|
|
245
245
|
|
|
246
|
+
connection_lost = False
|
|
246
247
|
try:
|
|
247
248
|
cursor = self._connection.cursor()
|
|
248
249
|
cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
|
|
@@ -252,8 +253,11 @@ class PostgresLock(DistributedLock):
|
|
|
252
253
|
# Lost connection to the database, likely the lock is force unlocked
|
|
253
254
|
# by other routines.
|
|
254
255
|
logger.debug(f'Failed to release postgres lock {self.lock_id}: {e}')
|
|
256
|
+
connection_lost = True
|
|
255
257
|
finally:
|
|
256
|
-
|
|
258
|
+
# Invalidate if connection was lost to prevent SQLAlchemy from
|
|
259
|
+
# trying to reset a dead connection
|
|
260
|
+
self._close_connection(invalidate=connection_lost)
|
|
257
261
|
|
|
258
262
|
def force_unlock(self) -> None:
|
|
259
263
|
"""Force unlock the postgres advisory lock."""
|
|
@@ -270,7 +274,7 @@ class PostgresLock(DistributedLock):
|
|
|
270
274
|
cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
|
|
271
275
|
result = cursor.fetchone()[0]
|
|
272
276
|
if result:
|
|
273
|
-
# The lock is held by current routine and unlock
|
|
277
|
+
# The lock is held by current routine and unlock succeed
|
|
274
278
|
self._connection.commit()
|
|
275
279
|
self._acquired = False
|
|
276
280
|
return
|
|
@@ -292,13 +296,27 @@ class PostgresLock(DistributedLock):
|
|
|
292
296
|
finally:
|
|
293
297
|
self._close_connection()
|
|
294
298
|
|
|
295
|
-
def _close_connection(self) -> None:
|
|
296
|
-
"""Close the postgres connection.
|
|
299
|
+
def _close_connection(self, invalidate: bool = False) -> None:
|
|
300
|
+
"""Close the postgres connection.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
invalidate: If True, invalidate connection instead of closing it.
|
|
304
|
+
Use this when the connection might be broken (e.g., after
|
|
305
|
+
pg_terminate_backend) to prevent SQLAlchemy from trying to
|
|
306
|
+
reset it (which would result in an error being logged).
|
|
307
|
+
"""
|
|
297
308
|
if self._connection:
|
|
298
309
|
try:
|
|
299
|
-
|
|
310
|
+
if invalidate:
|
|
311
|
+
self._connection.invalidate()
|
|
312
|
+
else:
|
|
313
|
+
self._connection.close()
|
|
300
314
|
except Exception as e: # pylint: disable=broad-except
|
|
301
|
-
|
|
315
|
+
if invalidate:
|
|
316
|
+
logger.debug(
|
|
317
|
+
f'Failed to invalidate postgres connection: {e}')
|
|
318
|
+
else:
|
|
319
|
+
logger.debug(f'Failed to close postgres connection: {e}')
|
|
302
320
|
self._connection = None
|
|
303
321
|
|
|
304
322
|
def is_locked(self) -> bool:
|
sky/utils/resource_checker.py
CHANGED
|
@@ -278,7 +278,10 @@ def _get_active_resources(
|
|
|
278
278
|
from sky.jobs.server import core as managed_jobs_core
|
|
279
279
|
try:
|
|
280
280
|
filtered_jobs, _, _, _ = managed_jobs_core.queue_v2(
|
|
281
|
-
refresh=False,
|
|
281
|
+
refresh=False,
|
|
282
|
+
skip_finished=True,
|
|
283
|
+
all_users=True,
|
|
284
|
+
fields=['job_id', 'user_hash', 'workspace'])
|
|
282
285
|
return filtered_jobs
|
|
283
286
|
except exceptions.ClusterNotUpError:
|
|
284
287
|
logger.warning('All jobs should be finished.')
|
sky/utils/resources_utils.py
CHANGED
|
@@ -181,57 +181,81 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
|
181
181
|
|
|
182
182
|
|
|
183
183
|
def format_resource(resource: 'resources_lib.Resources',
|
|
184
|
-
|
|
184
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
185
185
|
resource = resource.assert_launchable()
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
is_k8s = str(resource.cloud).lower() == 'kubernetes'
|
|
187
|
+
if resource.accelerators is None or is_k8s or not simplified_only:
|
|
188
|
+
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
|
189
|
+
resource.instance_type)
|
|
188
190
|
|
|
189
|
-
|
|
191
|
+
elements_simple = []
|
|
192
|
+
elements_full = []
|
|
190
193
|
|
|
191
194
|
if resource.accelerators is not None:
|
|
192
195
|
acc, count = list(resource.accelerators.items())[0]
|
|
193
|
-
|
|
196
|
+
elements_simple.append(f'gpus={acc}:{count}')
|
|
197
|
+
elements_full.append(f'gpus={acc}:{count}')
|
|
194
198
|
|
|
195
|
-
|
|
196
|
-
|
|
199
|
+
if (resource.accelerators is None or is_k8s):
|
|
200
|
+
if vcpu is not None:
|
|
201
|
+
elements_simple.append(f'cpus={int(vcpu)}')
|
|
202
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
203
|
+
if mem is not None:
|
|
204
|
+
elements_simple.append(f'mem={int(mem)}')
|
|
205
|
+
elements_full.append(f'mem={int(mem)}')
|
|
206
|
+
elif not simplified_only:
|
|
197
207
|
if vcpu is not None:
|
|
198
|
-
|
|
208
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
199
209
|
if mem is not None:
|
|
200
|
-
|
|
210
|
+
elements_full.append(f'mem={int(mem)}')
|
|
201
211
|
|
|
202
|
-
instance_type = resource.instance_type
|
|
203
|
-
if simplify:
|
|
204
|
-
instance_type = common_utils.truncate_long_string(instance_type, 15)
|
|
205
212
|
if not is_k8s:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
213
|
+
instance_type_full = resource.instance_type
|
|
214
|
+
instance_type_simple = common_utils.truncate_long_string(
|
|
215
|
+
instance_type_full, 15)
|
|
216
|
+
elements_simple.append(instance_type_simple)
|
|
217
|
+
elements_full.append(instance_type_full)
|
|
218
|
+
elements_simple.append('...')
|
|
219
|
+
if not simplified_only:
|
|
210
220
|
image_id = resource.image_id
|
|
211
221
|
if image_id is not None:
|
|
212
222
|
if None in image_id:
|
|
213
|
-
|
|
223
|
+
elements_full.append(f'image_id={image_id[None]}')
|
|
214
224
|
else:
|
|
215
|
-
|
|
216
|
-
|
|
225
|
+
elements_full.append(f'image_id={image_id}')
|
|
226
|
+
elements_full.append(f'disk={resource.disk_size}')
|
|
217
227
|
disk_tier = resource.disk_tier
|
|
218
228
|
if disk_tier is not None:
|
|
219
|
-
|
|
229
|
+
elements_full.append(f'disk_tier={disk_tier.value}')
|
|
220
230
|
ports = resource.ports
|
|
221
231
|
if ports is not None:
|
|
222
|
-
|
|
232
|
+
elements_full.append(f'ports={ports}')
|
|
223
233
|
|
|
224
234
|
spot = '[spot]' if resource.use_spot else ''
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
235
|
+
resources_str_simple = (
|
|
236
|
+
f'{spot}({"" if not elements_simple else ", ".join(elements_simple)})')
|
|
237
|
+
if simplified_only:
|
|
238
|
+
return resources_str_simple, None
|
|
239
|
+
else:
|
|
240
|
+
resources_str_full = (
|
|
241
|
+
f'{spot}({"" if not elements_full else ", ".join(elements_full)})')
|
|
242
|
+
return resources_str_simple, resources_str_full
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_readable_resources_repr(
|
|
246
|
+
handle: 'backends.CloudVmRayResourceHandle',
|
|
247
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
248
|
+
resource_str_simple, resource_str_full = format_resource(
|
|
249
|
+
handle.launched_resources, simplified_only)
|
|
250
|
+
if not simplified_only:
|
|
251
|
+
assert resource_str_full is not None
|
|
230
252
|
if (handle.launched_nodes is not None and
|
|
231
253
|
handle.launched_resources is not None):
|
|
232
|
-
return (f'{handle.launched_nodes}x'
|
|
233
|
-
|
|
234
|
-
|
|
254
|
+
return (f'{handle.launched_nodes}x{resource_str_simple}',
|
|
255
|
+
None if simplified_only else
|
|
256
|
+
f'{handle.launched_nodes}x{resource_str_full}')
|
|
257
|
+
return (_DEFAULT_MESSAGE_HANDLE_INITIALIZING,
|
|
258
|
+
_DEFAULT_MESSAGE_HANDLE_INITIALIZING)
|
|
235
259
|
|
|
236
260
|
|
|
237
261
|
def make_ray_custom_resources_str(
|
sky/utils/schemas.py
CHANGED
|
@@ -1190,7 +1190,13 @@ def get_config_schema():
|
|
|
1190
1190
|
'consolidation_mode': {
|
|
1191
1191
|
'type': 'boolean',
|
|
1192
1192
|
'default': False,
|
|
1193
|
-
}
|
|
1193
|
+
},
|
|
1194
|
+
'controller_logs_gc_retention_hours': {
|
|
1195
|
+
'type': 'integer',
|
|
1196
|
+
},
|
|
1197
|
+
'task_logs_gc_retention_hours': {
|
|
1198
|
+
'type': 'integer',
|
|
1199
|
+
},
|
|
1194
1200
|
},
|
|
1195
1201
|
},
|
|
1196
1202
|
'bucket': {
|
|
@@ -1592,10 +1598,10 @@ def get_config_schema():
|
|
|
1592
1598
|
|
|
1593
1599
|
allowed_workspace_cloud_names = list(constants.ALL_CLOUDS) + ['cloudflare']
|
|
1594
1600
|
# Create pattern for not supported clouds, i.e.
|
|
1595
|
-
# all clouds except gcp, kubernetes, ssh
|
|
1601
|
+
# all clouds except aws, gcp, kubernetes, ssh, nebius
|
|
1596
1602
|
not_supported_clouds = [
|
|
1597
1603
|
cloud for cloud in allowed_workspace_cloud_names
|
|
1598
|
-
if cloud.lower() not in ['gcp', 'kubernetes', 'ssh', 'nebius']
|
|
1604
|
+
if cloud.lower() not in ['aws', 'gcp', 'kubernetes', 'ssh', 'nebius']
|
|
1599
1605
|
]
|
|
1600
1606
|
not_supported_cloud_regex = '|'.join(not_supported_clouds)
|
|
1601
1607
|
workspaces_schema = {
|
|
@@ -1606,7 +1612,8 @@ def get_config_schema():
|
|
|
1606
1612
|
'type': 'object',
|
|
1607
1613
|
'additionalProperties': False,
|
|
1608
1614
|
'patternProperties': {
|
|
1609
|
-
# Pattern for
|
|
1615
|
+
# Pattern for clouds with no workspace-specific config -
|
|
1616
|
+
# only allow 'disabled' property.
|
|
1610
1617
|
f'^({not_supported_cloud_regex})$': {
|
|
1611
1618
|
'type': 'object',
|
|
1612
1619
|
'additionalProperties': False,
|
|
@@ -1641,6 +1648,18 @@ def get_config_schema():
|
|
|
1641
1648
|
},
|
|
1642
1649
|
'additionalProperties': False,
|
|
1643
1650
|
},
|
|
1651
|
+
'aws': {
|
|
1652
|
+
'type': 'object',
|
|
1653
|
+
'properties': {
|
|
1654
|
+
'profile': {
|
|
1655
|
+
'type': 'string'
|
|
1656
|
+
},
|
|
1657
|
+
'disabled': {
|
|
1658
|
+
'type': 'boolean'
|
|
1659
|
+
},
|
|
1660
|
+
},
|
|
1661
|
+
'additionalProperties': False,
|
|
1662
|
+
},
|
|
1644
1663
|
'ssh': {
|
|
1645
1664
|
'type': 'object',
|
|
1646
1665
|
'required': [],
|
sky/utils/subprocess_utils.py
CHANGED
|
@@ -10,7 +10,8 @@ import sys
|
|
|
10
10
|
import threading
|
|
11
11
|
import time
|
|
12
12
|
import typing
|
|
13
|
-
from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple,
|
|
13
|
+
from typing import (Any, Callable, Dict, List, Optional, Protocol, Set, Tuple,
|
|
14
|
+
Union)
|
|
14
15
|
|
|
15
16
|
import colorama
|
|
16
17
|
|
|
@@ -18,6 +19,7 @@ from sky import exceptions
|
|
|
18
19
|
from sky import sky_logging
|
|
19
20
|
from sky.adaptors import common as adaptors_common
|
|
20
21
|
from sky.skylet import log_lib
|
|
22
|
+
from sky.skylet import subprocess_daemon
|
|
21
23
|
from sky.utils import common_utils
|
|
22
24
|
from sky.utils import timeline
|
|
23
25
|
from sky.utils import ux_utils
|
|
@@ -107,7 +109,7 @@ def get_parallel_threads(cloud_str: Optional[str] = None) -> int:
|
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
def run_in_parallel(func: Callable,
|
|
110
|
-
args: List[Any],
|
|
112
|
+
args: Union[List[Any], Set[Any]],
|
|
111
113
|
num_threads: Optional[int] = None) -> List[Any]:
|
|
112
114
|
"""Run a function in parallel on a list of arguments.
|
|
113
115
|
|
|
@@ -128,7 +130,7 @@ def run_in_parallel(func: Callable,
|
|
|
128
130
|
if len(args) == 0:
|
|
129
131
|
return []
|
|
130
132
|
if len(args) == 1:
|
|
131
|
-
return [func(args[0])]
|
|
133
|
+
return [func(list(args)[0])]
|
|
132
134
|
|
|
133
135
|
processes = (num_threads
|
|
134
136
|
if num_threads is not None else get_parallel_threads())
|
|
@@ -305,11 +307,17 @@ def run_with_retries(
|
|
|
305
307
|
return returncode, stdout, stderr
|
|
306
308
|
|
|
307
309
|
|
|
308
|
-
def kill_process_daemon(process_pid: int) -> None:
|
|
310
|
+
def kill_process_daemon(process_pid: int, use_kill_pg: bool = False) -> None:
|
|
309
311
|
"""Start a daemon as a safety net to kill the process.
|
|
310
312
|
|
|
311
313
|
Args:
|
|
312
314
|
process_pid: The PID of the process to kill.
|
|
315
|
+
use_kill_pg: Whether to use kill process group to kill the process. If
|
|
316
|
+
True, the process will use os.killpg() to kill the target process
|
|
317
|
+
group on UNIX system, which is more efficient than using the daemon
|
|
318
|
+
to refresh the process tree in the daemon. Note that both
|
|
319
|
+
implementations have corner cases where subprocesses might not be
|
|
320
|
+
killed. Refer to subprocess_daemon.py for more details.
|
|
313
321
|
"""
|
|
314
322
|
# Get initial children list
|
|
315
323
|
try:
|
|
@@ -336,6 +344,10 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
336
344
|
','.join(map(str, initial_children)),
|
|
337
345
|
]
|
|
338
346
|
|
|
347
|
+
env = os.environ.copy()
|
|
348
|
+
if use_kill_pg:
|
|
349
|
+
env[subprocess_daemon.USE_KILL_PG_ENV_VAR] = '1'
|
|
350
|
+
|
|
339
351
|
# We do not need to set `start_new_session=True` here, as the
|
|
340
352
|
# daemon script will detach itself from the parent process with
|
|
341
353
|
# fork to avoid being killed by parent process. See the reason we
|
|
@@ -347,6 +359,7 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
347
359
|
stderr=subprocess.DEVNULL,
|
|
348
360
|
# Disable input
|
|
349
361
|
stdin=subprocess.DEVNULL,
|
|
362
|
+
env=env,
|
|
350
363
|
)
|
|
351
364
|
|
|
352
365
|
|
sky/volumes/server/server.py
CHANGED
|
@@ -7,6 +7,7 @@ from sky import exceptions
|
|
|
7
7
|
from sky import sky_logging
|
|
8
8
|
from sky.server.requests import executor
|
|
9
9
|
from sky.server.requests import payloads
|
|
10
|
+
from sky.server.requests import request_names
|
|
10
11
|
from sky.server.requests import requests as requests_lib
|
|
11
12
|
from sky.utils import registry
|
|
12
13
|
from sky.utils import volume as volume_utils
|
|
@@ -25,9 +26,9 @@ async def volume_list(request: fastapi.Request) -> None:
|
|
|
25
26
|
'env_vars': auth_user.to_env_vars()
|
|
26
27
|
} if auth_user else {}
|
|
27
28
|
request_body = payloads.RequestBody(**auth_user_env_vars_kwargs)
|
|
28
|
-
executor.
|
|
29
|
+
await executor.schedule_request_async(
|
|
29
30
|
request_id=request.state.request_id,
|
|
30
|
-
request_name=
|
|
31
|
+
request_name=request_names.RequestName.VOLUME_LIST,
|
|
31
32
|
request_body=request_body,
|
|
32
33
|
func=core.volume_list,
|
|
33
34
|
schedule_type=requests_lib.ScheduleType.SHORT,
|
|
@@ -38,9 +39,9 @@ async def volume_list(request: fastapi.Request) -> None:
|
|
|
38
39
|
async def volume_delete(request: fastapi.Request,
|
|
39
40
|
volume_delete_body: payloads.VolumeDeleteBody) -> None:
|
|
40
41
|
"""Deletes a volume."""
|
|
41
|
-
executor.
|
|
42
|
+
await executor.schedule_request_async(
|
|
42
43
|
request_id=request.state.request_id,
|
|
43
|
-
request_name=
|
|
44
|
+
request_name=request_names.RequestName.VOLUME_DELETE,
|
|
44
45
|
request_body=volume_delete_body,
|
|
45
46
|
func=core.volume_delete,
|
|
46
47
|
schedule_type=requests_lib.ScheduleType.LONG,
|
|
@@ -112,9 +113,9 @@ async def volume_apply(request: fastapi.Request,
|
|
|
112
113
|
raise fastapi.HTTPException(
|
|
113
114
|
status_code=400,
|
|
114
115
|
detail='Runpod network volume is only supported on Runpod')
|
|
115
|
-
executor.
|
|
116
|
+
await executor.schedule_request_async(
|
|
116
117
|
request_id=request.state.request_id,
|
|
117
|
-
request_name=
|
|
118
|
+
request_name=request_names.RequestName.VOLUME_APPLY,
|
|
118
119
|
request_body=volume_apply_body,
|
|
119
120
|
func=core.volume_apply,
|
|
120
121
|
schedule_type=requests_lib.ScheduleType.LONG,
|
sky/workspaces/server.py
CHANGED
|
@@ -4,6 +4,7 @@ import fastapi
|
|
|
4
4
|
|
|
5
5
|
from sky.server.requests import executor
|
|
6
6
|
from sky.server.requests import payloads
|
|
7
|
+
from sky.server.requests import request_names
|
|
7
8
|
from sky.server.requests import requests as api_requests
|
|
8
9
|
from sky.workspaces import core
|
|
9
10
|
|
|
@@ -22,9 +23,9 @@ async def get(request: fastapi.Request) -> None:
|
|
|
22
23
|
} if auth_user else {}
|
|
23
24
|
request_body = payloads.RequestBody(**auth_user_env_vars_kwargs)
|
|
24
25
|
|
|
25
|
-
executor.
|
|
26
|
+
await executor.schedule_request_async(
|
|
26
27
|
request_id=request.state.request_id,
|
|
27
|
-
request_name=
|
|
28
|
+
request_name=request_names.RequestName.WORKSPACES_GET,
|
|
28
29
|
request_body=request_body,
|
|
29
30
|
func=core.get_workspaces,
|
|
30
31
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
@@ -35,9 +36,9 @@ async def get(request: fastapi.Request) -> None:
|
|
|
35
36
|
async def update(request: fastapi.Request,
|
|
36
37
|
update_workspace_body: payloads.UpdateWorkspaceBody) -> None:
|
|
37
38
|
"""Updates a specific workspace configuration."""
|
|
38
|
-
executor.
|
|
39
|
+
await executor.schedule_request_async(
|
|
39
40
|
request_id=request.state.request_id,
|
|
40
|
-
request_name=
|
|
41
|
+
request_name=request_names.RequestName.WORKSPACES_UPDATE,
|
|
41
42
|
request_body=update_workspace_body,
|
|
42
43
|
func=core.update_workspace,
|
|
43
44
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
@@ -48,9 +49,9 @@ async def update(request: fastapi.Request,
|
|
|
48
49
|
async def create(request: fastapi.Request,
|
|
49
50
|
create_workspace_body: payloads.CreateWorkspaceBody) -> None:
|
|
50
51
|
"""Creates a new workspace configuration."""
|
|
51
|
-
executor.
|
|
52
|
+
await executor.schedule_request_async(
|
|
52
53
|
request_id=request.state.request_id,
|
|
53
|
-
request_name=
|
|
54
|
+
request_name=request_names.RequestName.WORKSPACES_CREATE,
|
|
54
55
|
request_body=create_workspace_body,
|
|
55
56
|
func=core.create_workspace,
|
|
56
57
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
@@ -61,9 +62,9 @@ async def create(request: fastapi.Request,
|
|
|
61
62
|
async def delete(request: fastapi.Request,
|
|
62
63
|
delete_workspace_body: payloads.DeleteWorkspaceBody) -> None:
|
|
63
64
|
"""Deletes a workspace configuration."""
|
|
64
|
-
executor.
|
|
65
|
+
await executor.schedule_request_async(
|
|
65
66
|
request_id=request.state.request_id,
|
|
66
|
-
request_name=
|
|
67
|
+
request_name=request_names.RequestName.WORKSPACES_DELETE,
|
|
67
68
|
request_body=delete_workspace_body,
|
|
68
69
|
func=core.delete_workspace,
|
|
69
70
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
@@ -78,9 +79,9 @@ async def get_config(request: fastapi.Request) -> None:
|
|
|
78
79
|
'env_vars': auth_user.to_env_vars()
|
|
79
80
|
} if auth_user else {}
|
|
80
81
|
get_config_body = payloads.GetConfigBody(**auth_user_env_vars_kwargs)
|
|
81
|
-
executor.
|
|
82
|
+
await executor.schedule_request_async(
|
|
82
83
|
request_id=request.state.request_id,
|
|
83
|
-
request_name=
|
|
84
|
+
request_name=request_names.RequestName.WORKSPACES_GET_CONFIG,
|
|
84
85
|
request_body=get_config_body,
|
|
85
86
|
func=core.get_config,
|
|
86
87
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
@@ -91,9 +92,9 @@ async def get_config(request: fastapi.Request) -> None:
|
|
|
91
92
|
async def update_config(request: fastapi.Request,
|
|
92
93
|
update_config_body: payloads.UpdateConfigBody) -> None:
|
|
93
94
|
"""Updates the entire SkyPilot configuration."""
|
|
94
|
-
executor.
|
|
95
|
+
await executor.schedule_request_async(
|
|
95
96
|
request_id=request.state.request_id,
|
|
96
|
-
request_name=
|
|
97
|
+
request_name=request_names.RequestName.WORKSPACES_UPDATE_CONFIG,
|
|
97
98
|
request_body=update_config_body,
|
|
98
99
|
func=core.update_config,
|
|
99
100
|
schedule_type=api_requests.ScheduleType.SHORT,
|