skypilot-nightly 1.0.0.dev20250524__py3-none-any.whl → 1.0.0.dev20250527__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/check.py +32 -6
- sky/cli.py +17 -24
- sky/client/cli.py +17 -24
- sky/client/sdk.py +5 -2
- sky/clouds/cloud.py +2 -2
- sky/clouds/kubernetes.py +10 -5
- sky/clouds/service_catalog/kubernetes_catalog.py +4 -0
- sky/clouds/ssh.py +24 -8
- sky/core.py +20 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/D5bjIfl4Ob3SV3LJz3CO0/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-e220ba0c35bf089e.js +6 -0
- sky/dashboard/out/_next/static/chunks/{498-d7722313e5e5b4e6.js → 320-afea3ddcc5bd1c6c.js} +1 -16
- sky/dashboard/out/_next/static/chunks/470-1d784f5c8750744a.js +1 -0
- sky/dashboard/out/_next/static/chunks/578-24f35aa98d38d638.js +6 -0
- sky/dashboard/out/_next/static/chunks/627-31b701e69f52db0c.js +1 -0
- sky/dashboard/out/_next/static/chunks/843-e35d71cf1c7f706e.js +11 -0
- sky/dashboard/out/_next/static/chunks/990-f85643b521f7ca65.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-3985f074c163a856.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-339b59921ccfe266.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e23fcddf60578a0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e6d1ec6e1ac5b29.js → clusters-8afda8efa5b74997.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-72b8c6c2edfd0e39.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-1521baab6992916b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4d913940b4fa6f5a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ff7e8e377d02b651.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-9900af52acf8648d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-63763ffa3edb4508.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-3ede7a13caf23375.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-72330c4d0fc9a4a2.js +1 -0
- sky/dashboard/out/_next/static/css/6a1c0d711a4bdaf1.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +592 -552
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +33 -3
- sky/server/requests/serializers/decoders.py +0 -11
- sky/server/server.py +23 -22
- sky/setup_files/dependencies.py +1 -0
- sky/skypilot_config.py +35 -9
- sky/utils/db_utils.py +53 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +20 -4
- sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -0
- sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
- sky/utils/kubernetes/ssh-tunnel.sh +20 -28
- sky/utils/schemas.py +57 -5
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +431 -0
- sky/workspaces/server.py +87 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/RECORD +69 -57
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +0 -6
- sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +0 -1
- sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +0 -3
- /sky/dashboard/out/_next/static/{aHej19bZyl4hoHgrzPCn7 → D5bjIfl4Ob3SV3LJz3CO0}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{573-f17bd89d9f9118b3.js → 573-82bd40a37af834f1.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/top_level.txt +0 -0
sky/global_user_state.py
CHANGED
@@ -10,12 +10,17 @@ import json
|
|
10
10
|
import os
|
11
11
|
import pathlib
|
12
12
|
import pickle
|
13
|
-
import sqlite3
|
14
13
|
import time
|
15
14
|
import typing
|
16
15
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
17
16
|
import uuid
|
18
17
|
|
18
|
+
import sqlalchemy
|
19
|
+
from sqlalchemy import exc as sqlalchemy_exc
|
20
|
+
from sqlalchemy import orm
|
21
|
+
from sqlalchemy.dialects import sqlite
|
22
|
+
from sqlalchemy.ext import declarative
|
23
|
+
|
19
24
|
from sky import models
|
20
25
|
from sky import sky_logging
|
21
26
|
from sky.skylet import constants
|
@@ -38,168 +43,215 @@ _ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
|
|
38
43
|
_DB_PATH = os.path.expanduser('~/.sky/state.db')
|
39
44
|
pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
|
40
45
|
|
41
|
-
|
42
|
-
|
46
|
+
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(f'sqlite:///{_DB_PATH}')
|
47
|
+
|
48
|
+
Base = declarative.declarative_base()
|
49
|
+
|
50
|
+
config_table = sqlalchemy.Table(
|
51
|
+
'config',
|
52
|
+
Base.metadata,
|
53
|
+
sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
|
54
|
+
sqlalchemy.Column('value', sqlalchemy.Text),
|
55
|
+
)
|
56
|
+
|
57
|
+
user_table = sqlalchemy.Table(
|
58
|
+
'users',
|
59
|
+
Base.metadata,
|
60
|
+
sqlalchemy.Column('id', sqlalchemy.Text, primary_key=True),
|
61
|
+
sqlalchemy.Column('name', sqlalchemy.Text),
|
62
|
+
)
|
63
|
+
|
64
|
+
cluster_table = sqlalchemy.Table(
|
65
|
+
'clusters',
|
66
|
+
Base.metadata,
|
67
|
+
sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
|
68
|
+
sqlalchemy.Column('launched_at', sqlalchemy.Integer),
|
69
|
+
sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
|
70
|
+
sqlalchemy.Column('last_use', sqlalchemy.Text),
|
71
|
+
sqlalchemy.Column('status', sqlalchemy.Text),
|
72
|
+
sqlalchemy.Column('autostop', sqlalchemy.Integer, server_default='-1'),
|
73
|
+
sqlalchemy.Column('to_down', sqlalchemy.Integer, server_default='0'),
|
74
|
+
sqlalchemy.Column('metadata', sqlalchemy.Text, server_default='{}'),
|
75
|
+
sqlalchemy.Column('owner', sqlalchemy.Text, server_default=None),
|
76
|
+
sqlalchemy.Column('cluster_hash', sqlalchemy.Text, server_default=None),
|
77
|
+
sqlalchemy.Column('storage_mounts_metadata',
|
78
|
+
sqlalchemy.LargeBinary,
|
79
|
+
server_default=None),
|
80
|
+
sqlalchemy.Column('cluster_ever_up', sqlalchemy.Integer,
|
81
|
+
server_default='0'),
|
82
|
+
sqlalchemy.Column('status_updated_at',
|
83
|
+
sqlalchemy.Integer,
|
84
|
+
server_default=None),
|
85
|
+
sqlalchemy.Column('config_hash', sqlalchemy.Text, server_default=None),
|
86
|
+
sqlalchemy.Column('user_hash', sqlalchemy.Text, server_default=None),
|
87
|
+
sqlalchemy.Column('workspace',
|
88
|
+
sqlalchemy.Text,
|
89
|
+
server_default=constants.SKYPILOT_DEFAULT_WORKSPACE),
|
90
|
+
)
|
91
|
+
|
92
|
+
storage_table = sqlalchemy.Table(
|
93
|
+
'storage',
|
94
|
+
Base.metadata,
|
95
|
+
sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
|
96
|
+
sqlalchemy.Column('launched_at', sqlalchemy.Integer),
|
97
|
+
sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
|
98
|
+
sqlalchemy.Column('last_use', sqlalchemy.Text),
|
99
|
+
sqlalchemy.Column('status', sqlalchemy.Text),
|
100
|
+
)
|
101
|
+
|
102
|
+
# Table for Cluster History
|
103
|
+
# usage_intervals: List[Tuple[int, int]]
|
104
|
+
# Specifies start and end timestamps of cluster.
|
105
|
+
# When the last end time is None, the cluster is still UP.
|
106
|
+
# Example: [(start1, end1), (start2, end2), (start3, None)]
|
107
|
+
|
108
|
+
# requested_resources: Set[resource_lib.Resource]
|
109
|
+
# Requested resources fetched from task that user specifies.
|
110
|
+
|
111
|
+
# launched_resources: Optional[resources_lib.Resources]
|
112
|
+
# Actual launched resources fetched from handle for cluster.
|
113
|
+
|
114
|
+
# num_nodes: Optional[int] number of nodes launched.
|
115
|
+
cluster_history_table = sqlalchemy.Table(
|
116
|
+
'cluster_history',
|
117
|
+
Base.metadata,
|
118
|
+
sqlalchemy.Column('cluster_hash', sqlalchemy.Text, primary_key=True),
|
119
|
+
sqlalchemy.Column('name', sqlalchemy.Text),
|
120
|
+
sqlalchemy.Column('num_nodes', sqlalchemy.Integer),
|
121
|
+
sqlalchemy.Column('requested_resources', sqlalchemy.LargeBinary),
|
122
|
+
sqlalchemy.Column('launched_resources', sqlalchemy.LargeBinary),
|
123
|
+
sqlalchemy.Column('usage_intervals', sqlalchemy.LargeBinary),
|
124
|
+
sqlalchemy.Column('user_hash', sqlalchemy.Text),
|
125
|
+
)
|
126
|
+
|
127
|
+
|
128
|
+
def create_table():
|
43
129
|
# Enable WAL mode to avoid locking issues.
|
44
130
|
# See: issue #1441 and PR #1509
|
45
131
|
# https://github.com/microsoft/WSL/issues/2395
|
46
132
|
# TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
|
47
133
|
# This may cause the database locked problem from WSL issue #1441.
|
48
|
-
if
|
134
|
+
if (_SQLALCHEMY_ENGINE.dialect.name
|
135
|
+
== db_utils.SQLAlchemyDialect.SQLITE.value and
|
136
|
+
not common_utils.is_wsl()):
|
49
137
|
try:
|
50
|
-
|
51
|
-
|
138
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
139
|
+
session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
|
140
|
+
session.commit()
|
141
|
+
except sqlalchemy_exc.OperationalError as e:
|
52
142
|
if 'database is locked' not in str(e):
|
53
143
|
raise
|
54
144
|
# If the database is locked, it is OK to continue, as the WAL mode
|
55
145
|
# is not critical and is likely to be enabled by other processes.
|
56
146
|
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
name TEXT PRIMARY KEY,
|
61
|
-
launched_at INTEGER,
|
62
|
-
handle BLOB,
|
63
|
-
last_use TEXT,
|
64
|
-
status TEXT,
|
65
|
-
autostop INTEGER DEFAULT -1,
|
66
|
-
metadata TEXT DEFAULT '{}',
|
67
|
-
to_down INTEGER DEFAULT 0,
|
68
|
-
owner TEXT DEFAULT null,
|
69
|
-
cluster_hash TEXT DEFAULT null,
|
70
|
-
storage_mounts_metadata BLOB DEFAULT null,
|
71
|
-
cluster_ever_up INTEGER DEFAULT 0,
|
72
|
-
status_updated_at INTEGER DEFAULT null,
|
73
|
-
config_hash TEXT DEFAULT null,
|
74
|
-
user_hash TEXT DEFAULT null,
|
75
|
-
workspace TEXT DEFAULT 'default')""")
|
76
|
-
|
77
|
-
# Table for Cluster History
|
78
|
-
# usage_intervals: List[Tuple[int, int]]
|
79
|
-
# Specifies start and end timestamps of cluster.
|
80
|
-
# When the last end time is None, the cluster is still UP.
|
81
|
-
# Example: [(start1, end1), (start2, end2), (start3, None)]
|
82
|
-
|
83
|
-
# requested_resources: Set[resource_lib.Resource]
|
84
|
-
# Requested resources fetched from task that user specifies.
|
85
|
-
|
86
|
-
# launched_resources: Optional[resources_lib.Resources]
|
87
|
-
# Actual launched resources fetched from handle for cluster.
|
88
|
-
|
89
|
-
# num_nodes: Optional[int] number of nodes launched.
|
90
|
-
|
91
|
-
cursor.execute("""\
|
92
|
-
CREATE TABLE IF NOT EXISTS cluster_history (
|
93
|
-
cluster_hash TEXT PRIMARY KEY,
|
94
|
-
name TEXT,
|
95
|
-
num_nodes int,
|
96
|
-
requested_resources BLOB,
|
97
|
-
launched_resources BLOB,
|
98
|
-
usage_intervals BLOB,
|
99
|
-
user_hash TEXT)""")
|
100
|
-
# Table for configs (e.g. enabled clouds)
|
101
|
-
cursor.execute("""\
|
102
|
-
CREATE TABLE IF NOT EXISTS config (
|
103
|
-
key TEXT PRIMARY KEY, value TEXT)""")
|
104
|
-
# Table for Storage
|
105
|
-
cursor.execute("""\
|
106
|
-
CREATE TABLE IF NOT EXISTS storage (
|
107
|
-
name TEXT PRIMARY KEY,
|
108
|
-
launched_at INTEGER,
|
109
|
-
handle BLOB,
|
110
|
-
last_use TEXT,
|
111
|
-
status TEXT)""")
|
112
|
-
# Table for User
|
113
|
-
cursor.execute("""\
|
114
|
-
CREATE TABLE IF NOT EXISTS users (
|
115
|
-
id TEXT PRIMARY KEY,
|
116
|
-
name TEXT)""")
|
147
|
+
# Create tables if they don't exist
|
148
|
+
Base.metadata.create_all(bind=_SQLALCHEMY_ENGINE)
|
149
|
+
|
117
150
|
# For backward compatibility.
|
118
151
|
# TODO(zhwu): Remove this function after all users have migrated to
|
119
152
|
# the latest version of SkyPilot.
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
153
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
154
|
+
# Add autostop column to clusters table
|
155
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'autostop',
|
156
|
+
'INTEGER DEFAULT -1')
|
157
|
+
|
158
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'metadata',
|
159
|
+
'TEXT DEFAULT \'{}\'')
|
160
|
+
|
161
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'to_down',
|
162
|
+
'INTEGER DEFAULT 0')
|
163
|
+
|
164
|
+
# The cloud identity that created the cluster.
|
165
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'owner',
|
166
|
+
'TEXT')
|
167
|
+
|
168
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
|
169
|
+
'cluster_hash',
|
170
|
+
'TEXT DEFAULT null')
|
171
|
+
|
172
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
|
173
|
+
'storage_mounts_metadata',
|
174
|
+
'BLOB DEFAULT null')
|
175
|
+
db_utils.add_column_to_table_sqlalchemy(
|
176
|
+
session,
|
177
|
+
'clusters',
|
178
|
+
'cluster_ever_up',
|
179
|
+
'INTEGER DEFAULT 0',
|
180
|
+
# Set the value to 1 so that all the existing clusters before #2977
|
181
|
+
# are considered as ever up, i.e:
|
182
|
+
# existing cluster's default (null) -> 1;
|
183
|
+
# new cluster's default -> 0;
|
184
|
+
# This is conservative for the existing clusters: even if some INIT
|
185
|
+
# clusters were never really UP, setting it to 1 means they won't be
|
186
|
+
# auto-deleted during any failover.
|
187
|
+
value_to_replace_existing_entries=1)
|
188
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
|
189
|
+
'status_updated_at',
|
190
|
+
'INTEGER DEFAULT null')
|
191
|
+
db_utils.add_column_to_table_sqlalchemy(
|
192
|
+
session,
|
193
|
+
'clusters',
|
194
|
+
'user_hash',
|
195
|
+
'TEXT DEFAULT null',
|
196
|
+
value_to_replace_existing_entries=common_utils.get_user_hash())
|
197
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
|
198
|
+
'config_hash',
|
199
|
+
'TEXT DEFAULT null')
|
200
|
+
|
201
|
+
db_utils.add_column_to_table_sqlalchemy(session, 'cluster_history',
|
202
|
+
'user_hash',
|
203
|
+
'TEXT DEFAULT null')
|
204
|
+
|
205
|
+
db_utils.add_column_to_table_sqlalchemy(
|
206
|
+
session,
|
207
|
+
'clusters',
|
208
|
+
'workspace',
|
209
|
+
'TEXT DEFAULT \'default\'',
|
210
|
+
value_to_replace_existing_entries=constants.
|
211
|
+
SKYPILOT_DEFAULT_WORKSPACE)
|
212
|
+
session.commit()
|
213
|
+
|
214
|
+
|
215
|
+
create_table()
|
181
216
|
|
182
217
|
|
183
218
|
def add_or_update_user(user: models.User):
|
184
219
|
"""Store the mapping from user hash to user name for display purposes."""
|
185
220
|
if user.name is None:
|
186
221
|
return
|
187
|
-
|
188
|
-
|
189
|
-
|
222
|
+
|
223
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
224
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
225
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
226
|
+
insert_stmnt = sqlite.insert(user_table).values(id=user.id,
|
227
|
+
name=user.name)
|
228
|
+
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
229
|
+
index_elements=[user_table.c.id],
|
230
|
+
set_={user_table.c.name: user.name})
|
231
|
+
session.execute(do_update_stmt)
|
232
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
233
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
234
|
+
# TODO(syang) support postgres dialect
|
235
|
+
session.rollback()
|
236
|
+
raise ValueError('Unsupported database dialect')
|
237
|
+
else:
|
238
|
+
session.rollback()
|
239
|
+
raise ValueError('Unsupported database dialect')
|
240
|
+
session.commit()
|
190
241
|
|
191
242
|
|
192
243
|
def get_user(user_id: str) -> models.User:
|
193
|
-
|
194
|
-
|
244
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
245
|
+
row = session.query(user_table).filter_by(id=user_id).first()
|
195
246
|
if row is None:
|
196
247
|
return models.User(id=user_id)
|
197
|
-
return models.User(id=row
|
248
|
+
return models.User(id=row.id, name=row.name)
|
198
249
|
|
199
250
|
|
200
251
|
def get_all_users() -> List[models.User]:
|
201
|
-
|
202
|
-
|
252
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
253
|
+
rows = session.query(user_table).all()
|
254
|
+
return [models.User(id=row.id, name=row.name) for row in rows]
|
203
255
|
|
204
256
|
|
205
257
|
def add_or_update_cluster(cluster_name: str,
|
@@ -257,145 +309,116 @@ def add_or_update_cluster(cluster_name: str,
|
|
257
309
|
user_hash = common_utils.get_user_hash()
|
258
310
|
active_workspace = skypilot_config.get_active_workspace()
|
259
311
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
#
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
# requested resources
|
371
|
-
'?, '
|
372
|
-
# launched resources
|
373
|
-
'?, '
|
374
|
-
# number of nodes
|
375
|
-
'?, '
|
376
|
-
# usage intervals
|
377
|
-
'?, '
|
378
|
-
# user_hash
|
379
|
-
'?'
|
380
|
-
')',
|
381
|
-
(
|
382
|
-
# hash
|
383
|
-
cluster_hash,
|
384
|
-
# name
|
385
|
-
cluster_name,
|
386
|
-
# number of nodes
|
387
|
-
launched_nodes,
|
388
|
-
# requested resources
|
389
|
-
pickle.dumps(requested_resources),
|
390
|
-
# launched resources
|
391
|
-
pickle.dumps(launched_resources),
|
392
|
-
# usage intervals
|
393
|
-
pickle.dumps(usage_intervals),
|
394
|
-
# user_hash
|
395
|
-
user_hash,
|
396
|
-
))
|
397
|
-
|
398
|
-
_DB.conn.commit()
|
312
|
+
conditional_values = {}
|
313
|
+
if is_launch:
|
314
|
+
conditional_values.update({
|
315
|
+
'launched_at': cluster_launched_at,
|
316
|
+
'last_use': last_use
|
317
|
+
})
|
318
|
+
|
319
|
+
if int(ready) == 1:
|
320
|
+
conditional_values.update({
|
321
|
+
'cluster_ever_up': 1,
|
322
|
+
})
|
323
|
+
|
324
|
+
if config_hash is not None:
|
325
|
+
conditional_values.update({
|
326
|
+
'config_hash': config_hash,
|
327
|
+
})
|
328
|
+
|
329
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
330
|
+
# with_for_update() locks the row until commit() or rollback()
|
331
|
+
# is called, or until the code escapes the with block.
|
332
|
+
cluster_row = session.query(cluster_table).filter_by(
|
333
|
+
name=cluster_name).with_for_update().first()
|
334
|
+
if (not cluster_row or
|
335
|
+
cluster_row.status == status_lib.ClusterStatus.STOPPED.value):
|
336
|
+
conditional_values.update({
|
337
|
+
'autostop': -1,
|
338
|
+
'to_down': 0,
|
339
|
+
})
|
340
|
+
if not cluster_row or not cluster_row.user_hash:
|
341
|
+
conditional_values.update({
|
342
|
+
'user_hash': user_hash,
|
343
|
+
})
|
344
|
+
if not cluster_row or not cluster_row.workspace:
|
345
|
+
conditional_values.update({
|
346
|
+
'workspace': active_workspace,
|
347
|
+
})
|
348
|
+
|
349
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
350
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
351
|
+
insert_stmnt = sqlite.insert(cluster_table).values(
|
352
|
+
name=cluster_name,
|
353
|
+
**conditional_values,
|
354
|
+
handle=handle,
|
355
|
+
status=status.value,
|
356
|
+
# set metadata to server default ('{}')
|
357
|
+
# set owner to server default (null)
|
358
|
+
cluster_hash=cluster_hash,
|
359
|
+
# set storage_mounts_metadata to server default (null)
|
360
|
+
status_updated_at=status_updated_at,
|
361
|
+
)
|
362
|
+
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
363
|
+
index_elements=[cluster_table.c.name],
|
364
|
+
set_={
|
365
|
+
**conditional_values,
|
366
|
+
cluster_table.c.handle: handle,
|
367
|
+
cluster_table.c.status: status.value,
|
368
|
+
# do not update metadata value
|
369
|
+
# do not update owner value
|
370
|
+
cluster_table.c.cluster_hash: cluster_hash,
|
371
|
+
# do not update storage_mounts_metadata
|
372
|
+
cluster_table.c.status_updated_at: status_updated_at,
|
373
|
+
# do not update user_hash
|
374
|
+
})
|
375
|
+
session.execute(do_update_stmt)
|
376
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
377
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
378
|
+
# TODO(syang) support postgres dialect
|
379
|
+
session.rollback()
|
380
|
+
raise ValueError('Unsupported database dialect')
|
381
|
+
else:
|
382
|
+
session.rollback()
|
383
|
+
raise ValueError('Unsupported database dialect')
|
384
|
+
|
385
|
+
# Modify cluster history table
|
386
|
+
launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
|
387
|
+
launched_resources = getattr(cluster_handle, 'launched_resources', None)
|
388
|
+
|
389
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
390
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
391
|
+
insert_stmnt = sqlite.insert(cluster_history_table).values(
|
392
|
+
cluster_hash=cluster_hash,
|
393
|
+
name=cluster_name,
|
394
|
+
num_nodes=launched_nodes,
|
395
|
+
requested_resources=pickle.dumps(requested_resources),
|
396
|
+
launched_resources=pickle.dumps(launched_resources),
|
397
|
+
usage_intervals=pickle.dumps(usage_intervals),
|
398
|
+
user_hash=user_hash)
|
399
|
+
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
400
|
+
index_elements=[cluster_history_table.c.cluster_hash],
|
401
|
+
set_={
|
402
|
+
cluster_history_table.c.name: cluster_name,
|
403
|
+
cluster_history_table.c.num_nodes: launched_nodes,
|
404
|
+
cluster_history_table.c.requested_resources:
|
405
|
+
pickle.dumps(requested_resources),
|
406
|
+
cluster_history_table.c.launched_resources:
|
407
|
+
pickle.dumps(launched_resources),
|
408
|
+
cluster_history_table.c.usage_intervals:
|
409
|
+
pickle.dumps(usage_intervals),
|
410
|
+
cluster_history_table.c.user_hash: user_hash
|
411
|
+
})
|
412
|
+
session.execute(do_update_stmt)
|
413
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
414
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
415
|
+
# TODO(syang) support postgres dialect
|
416
|
+
session.rollback()
|
417
|
+
raise ValueError('Unsupported database dialect')
|
418
|
+
else:
|
419
|
+
session.rollback()
|
420
|
+
raise ValueError('Unsupported database dialect')
|
421
|
+
session.commit()
|
399
422
|
|
400
423
|
|
401
424
|
def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
|
@@ -413,16 +436,18 @@ def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
|
|
413
436
|
def update_cluster_handle(cluster_name: str,
|
414
437
|
cluster_handle: 'backends.ResourceHandle'):
|
415
438
|
handle = pickle.dumps(cluster_handle)
|
416
|
-
|
417
|
-
|
418
|
-
|
439
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
440
|
+
session.query(cluster_table).filter_by(name=cluster_name).update(
|
441
|
+
{cluster_table.c.handle: handle})
|
442
|
+
session.commit()
|
419
443
|
|
420
444
|
|
421
445
|
def update_last_use(cluster_name: str):
|
422
446
|
"""Updates the last used command for the cluster."""
|
423
|
-
|
424
|
-
|
425
|
-
|
447
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
448
|
+
session.query(cluster_table).filter_by(name=cluster_name).update(
|
449
|
+
{cluster_table.c.last_use: common_utils.get_current_command()})
|
450
|
+
session.commit()
|
426
451
|
|
427
452
|
|
428
453
|
def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
@@ -430,63 +455,73 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
|
430
455
|
cluster_hash = _get_hash_for_existing_cluster(cluster_name)
|
431
456
|
usage_intervals = _get_cluster_usage_intervals(cluster_hash)
|
432
457
|
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
))
|
462
|
-
_DB.conn.commit()
|
458
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
459
|
+
# usage_intervals is not None and not empty
|
460
|
+
if usage_intervals:
|
461
|
+
assert cluster_hash is not None, cluster_name
|
462
|
+
start_time = usage_intervals.pop()[0]
|
463
|
+
end_time = int(time.time())
|
464
|
+
usage_intervals.append((start_time, end_time))
|
465
|
+
_set_cluster_usage_intervals(cluster_hash, usage_intervals)
|
466
|
+
|
467
|
+
if terminate:
|
468
|
+
session.query(cluster_table).filter_by(name=cluster_name).delete()
|
469
|
+
else:
|
470
|
+
handle = get_handle_from_cluster_name(cluster_name)
|
471
|
+
if handle is None:
|
472
|
+
return
|
473
|
+
# Must invalidate IP list to avoid directly trying to ssh into a
|
474
|
+
# stopped VM, which leads to timeout.
|
475
|
+
if hasattr(handle, 'stable_internal_external_ips'):
|
476
|
+
handle = typing.cast('backends.CloudVmRayResourceHandle',
|
477
|
+
handle)
|
478
|
+
handle.stable_internal_external_ips = None
|
479
|
+
current_time = int(time.time())
|
480
|
+
session.query(cluster_table).filter_by(name=cluster_name).update({
|
481
|
+
cluster_table.c.handle: pickle.dumps(handle),
|
482
|
+
cluster_table.c.status: status_lib.ClusterStatus.STOPPED.value,
|
483
|
+
cluster_table.c.status_updated_at: current_time
|
484
|
+
})
|
485
|
+
session.commit()
|
463
486
|
|
464
487
|
|
465
488
|
def get_handle_from_cluster_name(
|
466
489
|
cluster_name: str) -> Optional['backends.ResourceHandle']:
|
467
490
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
return
|
472
|
-
return
|
491
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
492
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
493
|
+
if row is None:
|
494
|
+
return None
|
495
|
+
return pickle.loads(row.handle)
|
473
496
|
|
474
497
|
|
475
498
|
def get_glob_cluster_names(cluster_name: str) -> List[str]:
|
476
499
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
477
|
-
|
478
|
-
|
479
|
-
|
500
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
501
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
502
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
503
|
+
rows = session.query(cluster_table).filter(
|
504
|
+
cluster_table.c.name.op('GLOB')(cluster_name)).all()
|
505
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
506
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
507
|
+
# TODO(syang) support postgres dialect
|
508
|
+
# postgres does not support GLOB
|
509
|
+
raise ValueError('Unsupported database dialect')
|
510
|
+
else:
|
511
|
+
raise ValueError('Unsupported database dialect')
|
512
|
+
return [row.name for row in rows]
|
480
513
|
|
481
514
|
|
482
515
|
def set_cluster_status(cluster_name: str,
|
483
516
|
status: status_lib.ClusterStatus) -> None:
|
484
517
|
current_time = int(time.time())
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
518
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
519
|
+
count = session.query(cluster_table).filter_by(
|
520
|
+
name=cluster_name).update({
|
521
|
+
cluster_table.c.status: status.value,
|
522
|
+
cluster_table.c.status_updated_at: current_time
|
523
|
+
})
|
524
|
+
session.commit()
|
490
525
|
assert count <= 1, count
|
491
526
|
if count == 0:
|
492
527
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
@@ -494,46 +529,40 @@ def set_cluster_status(cluster_name: str,
|
|
494
529
|
|
495
530
|
def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
|
496
531
|
to_down: bool) -> None:
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
_DB.conn.commit()
|
532
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
533
|
+
count = session.query(cluster_table).filter_by(
|
534
|
+
name=cluster_name).update({
|
535
|
+
cluster_table.c.autostop: idle_minutes,
|
536
|
+
cluster_table.c.to_down: int(to_down)
|
537
|
+
})
|
538
|
+
session.commit()
|
505
539
|
assert count <= 1, count
|
506
540
|
if count == 0:
|
507
541
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
508
542
|
|
509
543
|
|
510
544
|
def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
return int(launch_time)
|
517
|
-
return None
|
545
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
546
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
547
|
+
if row is None or row.launched_at is None:
|
548
|
+
return None
|
549
|
+
return int(row.launched_at)
|
518
550
|
|
519
551
|
|
520
552
|
def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
return json.loads(metadata)
|
527
|
-
return None
|
553
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
554
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
555
|
+
if row is None or row.metadata is None:
|
556
|
+
return None
|
557
|
+
return json.loads(row.metadata)
|
528
558
|
|
529
559
|
|
530
560
|
def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
_DB.conn.commit()
|
561
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
562
|
+
count = session.query(cluster_table).filter_by(
|
563
|
+
name=cluster_name).update(
|
564
|
+
{cluster_table.c.metadata: json.dumps(metadata)})
|
565
|
+
session.commit()
|
537
566
|
assert count <= 1, count
|
538
567
|
if count == 0:
|
539
568
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
@@ -541,25 +570,22 @@ def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
|
|
541
570
|
|
542
571
|
def get_cluster_storage_mounts_metadata(
|
543
572
|
cluster_name: str) -> Optional[Dict[str, Any]]:
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
return None
|
550
|
-
return pickle.loads(storage_mounts_metadata)
|
551
|
-
return None
|
573
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
574
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
575
|
+
if row is None or row.storage_mounts_metadata is None:
|
576
|
+
return None
|
577
|
+
return pickle.loads(row.storage_mounts_metadata)
|
552
578
|
|
553
579
|
|
554
580
|
def set_cluster_storage_mounts_metadata(
|
555
581
|
cluster_name: str, storage_mounts_metadata: Dict[str, Any]) -> None:
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
582
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
583
|
+
count = session.query(cluster_table).filter_by(
|
584
|
+
name=cluster_name).update({
|
585
|
+
cluster_table.c.storage_mounts_metadata:
|
586
|
+
pickle.dumps(storage_mounts_metadata)
|
587
|
+
})
|
588
|
+
session.commit()
|
563
589
|
assert count <= 1, count
|
564
590
|
if count == 0:
|
565
591
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
@@ -570,14 +596,12 @@ def _get_cluster_usage_intervals(
|
|
570
596
|
) -> Optional[List[Tuple[int, Optional[int]]]]:
|
571
597
|
if cluster_hash is None:
|
572
598
|
return None
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
return pickle.loads(usage_intervals)
|
580
|
-
return None
|
599
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
600
|
+
row = session.query(cluster_history_table).filter_by(
|
601
|
+
cluster_hash=cluster_hash).first()
|
602
|
+
if row is None or row.usage_intervals is None:
|
603
|
+
return None
|
604
|
+
return pickle.loads(row.usage_intervals)
|
581
605
|
|
582
606
|
|
583
607
|
def _get_cluster_launch_time(cluster_hash: str) -> Optional[int]:
|
@@ -609,15 +633,13 @@ def _get_cluster_duration(cluster_hash: str) -> int:
|
|
609
633
|
def _set_cluster_usage_intervals(
|
610
634
|
cluster_hash: str, usage_intervals: List[Tuple[int,
|
611
635
|
Optional[int]]]) -> None:
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
count = _DB.cursor.rowcount
|
620
|
-
_DB.conn.commit()
|
636
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
637
|
+
count = session.query(cluster_history_table).filter_by(
|
638
|
+
cluster_hash=cluster_hash).update({
|
639
|
+
cluster_history_table.c.usage_intervals:
|
640
|
+
pickle.dumps(usage_intervals)
|
641
|
+
})
|
642
|
+
session.commit()
|
621
643
|
assert count <= 1, count
|
622
644
|
if count == 0:
|
623
645
|
raise ValueError(f'Cluster hash {cluster_hash} not found.')
|
@@ -628,38 +650,38 @@ def set_owner_identity_for_cluster(cluster_name: str,
|
|
628
650
|
if owner_identity is None:
|
629
651
|
return
|
630
652
|
owner_identity_str = json.dumps(owner_identity)
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
653
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
654
|
+
count = session.query(cluster_table).filter_by(
|
655
|
+
name=cluster_name).update(
|
656
|
+
{cluster_table.c.owner: owner_identity_str})
|
657
|
+
session.commit()
|
636
658
|
assert count <= 1, count
|
637
659
|
if count == 0:
|
638
660
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
639
661
|
|
640
662
|
|
641
663
|
def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
return cluster_hash
|
648
|
-
return None
|
664
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
665
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
666
|
+
if row is None or row.cluster_hash is None:
|
667
|
+
return None
|
668
|
+
return row.cluster_hash
|
649
669
|
|
650
670
|
|
651
671
|
def get_launched_resources_from_cluster_hash(
|
652
672
|
cluster_hash: str) -> Optional[Tuple[int, Any]]:
|
673
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
674
|
+
row = session.query(cluster_history_table).filter_by(
|
675
|
+
cluster_hash=cluster_hash).first()
|
676
|
+
if row is None:
|
677
|
+
return None
|
678
|
+
num_nodes = row.num_nodes
|
679
|
+
launched_resources = row.launched_resources
|
653
680
|
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
if num_nodes is None or launched_resources is None:
|
659
|
-
return None
|
660
|
-
launched_resources = pickle.loads(launched_resources)
|
661
|
-
return num_nodes, launched_resources
|
662
|
-
return None
|
681
|
+
if num_nodes is None or launched_resources is None:
|
682
|
+
return None
|
683
|
+
launched_resources = pickle.loads(launched_resources)
|
684
|
+
return num_nodes, launched_resources
|
663
685
|
|
664
686
|
|
665
687
|
def _load_owner(record_owner: Optional[str]) -> Optional[List[str]]:
|
@@ -693,76 +715,62 @@ def _load_storage_mounts_metadata(
|
|
693
715
|
@context_utils.cancellation_guard
|
694
716
|
def get_cluster_from_name(
|
695
717
|
cluster_name: Optional[str]) -> Optional[Dict[str, Any]]:
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
(
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
'cluster_ever_up': bool(cluster_ever_up),
|
724
|
-
'status_updated_at': status_updated_at,
|
725
|
-
'user_hash': user_hash,
|
726
|
-
'user_name': get_user(user_hash).name,
|
727
|
-
'config_hash': config_hash,
|
728
|
-
'workspace': workspace,
|
729
|
-
}
|
730
|
-
return record
|
731
|
-
return None
|
718
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
719
|
+
row = session.query(cluster_table).filter_by(name=cluster_name).first()
|
720
|
+
if row is None:
|
721
|
+
return None
|
722
|
+
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
723
|
+
# TODO: use namedtuple instead of dict
|
724
|
+
record = {
|
725
|
+
'name': row.name,
|
726
|
+
'launched_at': row.launched_at,
|
727
|
+
'handle': pickle.loads(row.handle),
|
728
|
+
'last_use': row.last_use,
|
729
|
+
'status': status_lib.ClusterStatus[row.status],
|
730
|
+
'autostop': row.autostop,
|
731
|
+
'to_down': bool(row.to_down),
|
732
|
+
'owner': _load_owner(row.owner),
|
733
|
+
'metadata': json.loads(row.metadata),
|
734
|
+
'cluster_hash': row.cluster_hash,
|
735
|
+
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
736
|
+
row.storage_mounts_metadata),
|
737
|
+
'cluster_ever_up': bool(row.cluster_ever_up),
|
738
|
+
'status_updated_at': row.status_updated_at,
|
739
|
+
'user_hash': user_hash,
|
740
|
+
'user_name': get_user(user_hash).name,
|
741
|
+
'config_hash': row.config_hash,
|
742
|
+
'workspace': row.workspace,
|
743
|
+
}
|
744
|
+
return record
|
732
745
|
|
733
746
|
|
734
747
|
def get_clusters() -> List[Dict[str, Any]]:
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
'cluster_ever_up, status_updated_at, config_hash, user_hash, workspace '
|
739
|
-
'from clusters order by launched_at desc').fetchall()
|
748
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
749
|
+
rows = session.query(cluster_table).order_by(
|
750
|
+
sqlalchemy.desc(cluster_table.c.launched_at)).all()
|
740
751
|
records = []
|
741
752
|
for row in rows:
|
742
|
-
|
743
|
-
to_down, owner, cluster_hash, storage_mounts_metadata, cluster_ever_up,
|
744
|
-
status_updated_at, config_hash, user_hash, workspace) = row
|
745
|
-
user_hash = _get_user_hash_or_current_user(user_hash)
|
753
|
+
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
746
754
|
# TODO: use namedtuple instead of dict
|
747
755
|
record = {
|
748
|
-
'name': name,
|
749
|
-
'launched_at': launched_at,
|
750
|
-
'handle': pickle.loads(handle),
|
751
|
-
'last_use': last_use,
|
752
|
-
'status': status_lib.ClusterStatus[status],
|
753
|
-
'autostop': autostop,
|
754
|
-
'to_down': bool(to_down),
|
755
|
-
'owner': _load_owner(owner),
|
756
|
-
'metadata': json.loads(metadata),
|
757
|
-
'cluster_hash': cluster_hash,
|
758
|
-
'storage_mounts_metadata':
|
759
|
-
|
760
|
-
'cluster_ever_up': bool(cluster_ever_up),
|
761
|
-
'status_updated_at': status_updated_at,
|
756
|
+
'name': row.name,
|
757
|
+
'launched_at': row.launched_at,
|
758
|
+
'handle': pickle.loads(row.handle),
|
759
|
+
'last_use': row.last_use,
|
760
|
+
'status': status_lib.ClusterStatus[row.status],
|
761
|
+
'autostop': row.autostop,
|
762
|
+
'to_down': bool(row.to_down),
|
763
|
+
'owner': _load_owner(row.owner),
|
764
|
+
'metadata': json.loads(row.metadata),
|
765
|
+
'cluster_hash': row.cluster_hash,
|
766
|
+
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
767
|
+
row.storage_mounts_metadata),
|
768
|
+
'cluster_ever_up': bool(row.cluster_ever_up),
|
769
|
+
'status_updated_at': row.status_updated_at,
|
762
770
|
'user_hash': user_hash,
|
763
771
|
'user_name': get_user(user_hash).name,
|
764
|
-
'config_hash': config_hash,
|
765
|
-
'workspace': workspace,
|
772
|
+
'config_hash': row.config_hash,
|
773
|
+
'workspace': row.workspace,
|
766
774
|
}
|
767
775
|
|
768
776
|
records.append(record)
|
@@ -770,43 +778,30 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
770
778
|
|
771
779
|
|
772
780
|
def get_clusters_from_history() -> List[Dict[str, Any]]:
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
'ON ch.cluster_hash=clusters.cluster_hash ').fetchall()
|
781
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
782
|
+
rows = session.query(
|
783
|
+
cluster_history_table.join(cluster_table,
|
784
|
+
cluster_history_table.c.cluster_hash ==
|
785
|
+
cluster_table.c.cluster_hash,
|
786
|
+
isouter=True)).all()
|
780
787
|
|
781
788
|
# '(cluster_hash, name, num_nodes, requested_resources, '
|
782
789
|
# 'launched_resources, usage_intervals) '
|
783
790
|
records = []
|
784
|
-
|
785
791
|
for row in rows:
|
786
792
|
# TODO: use namedtuple instead of dict
|
787
|
-
|
788
|
-
|
789
|
-
cluster_hash,
|
790
|
-
name,
|
791
|
-
num_nodes,
|
792
|
-
launched_resources,
|
793
|
-
usage_intervals,
|
794
|
-
status,
|
795
|
-
user_hash,
|
796
|
-
) = row[:7]
|
797
|
-
user_hash = _get_user_hash_or_current_user(user_hash)
|
798
|
-
|
793
|
+
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
794
|
+
status = row.status
|
799
795
|
if status is not None:
|
800
796
|
status = status_lib.ClusterStatus[status]
|
801
|
-
|
802
797
|
record = {
|
803
|
-
'name': name,
|
804
|
-
'launched_at': _get_cluster_launch_time(cluster_hash),
|
805
|
-
'duration': _get_cluster_duration(cluster_hash),
|
806
|
-
'num_nodes': num_nodes,
|
807
|
-
'resources': pickle.loads(launched_resources),
|
808
|
-
'cluster_hash': cluster_hash,
|
809
|
-
'usage_intervals': pickle.loads(usage_intervals),
|
798
|
+
'name': row.name,
|
799
|
+
'launched_at': _get_cluster_launch_time(row.cluster_hash),
|
800
|
+
'duration': _get_cluster_duration(row.cluster_hash),
|
801
|
+
'num_nodes': row.num_nodes,
|
802
|
+
'resources': pickle.loads(row.launched_resources),
|
803
|
+
'cluster_hash': row.cluster_hash,
|
804
|
+
'usage_intervals': pickle.loads(row.usage_intervals),
|
810
805
|
'status': status,
|
811
806
|
'user_hash': user_hash,
|
812
807
|
}
|
@@ -819,30 +814,29 @@ def get_clusters_from_history() -> List[Dict[str, Any]]:
|
|
819
814
|
|
820
815
|
|
821
816
|
def get_cluster_names_start_with(starts_with: str) -> List[str]:
|
822
|
-
|
823
|
-
|
824
|
-
|
817
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
818
|
+
rows = session.query(cluster_table).filter(
|
819
|
+
cluster_table.c.name.like(f'{starts_with}%')).all()
|
820
|
+
return [row.name for row in rows]
|
825
821
|
|
826
822
|
|
827
823
|
def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
|
828
824
|
workspace: str) -> List['clouds.Cloud']:
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
(_get_enabled_clouds_key(cloud_capability, workspace),))
|
825
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
826
|
+
row = session.query(config_table).filter_by(
|
827
|
+
key=_get_enabled_clouds_key(cloud_capability, workspace)).first()
|
833
828
|
ret = []
|
834
|
-
|
835
|
-
ret = json.loads(value)
|
836
|
-
break
|
829
|
+
if row:
|
830
|
+
ret = json.loads(row.value)
|
837
831
|
enabled_clouds: List['clouds.Cloud'] = []
|
838
832
|
for c in ret:
|
839
833
|
try:
|
840
834
|
cloud = registry.CLOUD_REGISTRY.from_str(c)
|
841
835
|
except ValueError:
|
842
|
-
# Handle the case for the clouds whose support has been
|
843
|
-
# SkyPilot, e.g., 'local' was a cloud in the past
|
844
|
-
# in the database for users before #3037.
|
845
|
-
# clouds and continue.
|
836
|
+
# Handle the case for the clouds whose support has been
|
837
|
+
# removed from SkyPilot, e.g., 'local' was a cloud in the past
|
838
|
+
# and may be stored in the database for users before #3037.
|
839
|
+
# We should ignore removed clouds and continue.
|
846
840
|
continue
|
847
841
|
if cloud is not None:
|
848
842
|
enabled_clouds.append(cloud)
|
@@ -852,10 +846,25 @@ def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
|
|
852
846
|
def set_enabled_clouds(enabled_clouds: List[str],
|
853
847
|
cloud_capability: 'cloud.CloudCapability',
|
854
848
|
workspace: str) -> None:
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
849
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
850
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
851
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
852
|
+
insert_stmnt = sqlite.insert(config_table).values(
|
853
|
+
key=_get_enabled_clouds_key(cloud_capability, workspace),
|
854
|
+
value=json.dumps(enabled_clouds))
|
855
|
+
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
856
|
+
index_elements=[config_table.c.key],
|
857
|
+
set_={config_table.c.value: json.dumps(enabled_clouds)})
|
858
|
+
session.execute(do_update_stmt)
|
859
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
860
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
861
|
+
# TODO(syang) support postgres dialect
|
862
|
+
session.rollback()
|
863
|
+
raise ValueError('Unsupported database dialect')
|
864
|
+
else:
|
865
|
+
session.rollback()
|
866
|
+
raise ValueError('Unsupported database dialect')
|
867
|
+
session.commit()
|
859
868
|
|
860
869
|
|
861
870
|
def _get_enabled_clouds_key(cloud_capability: 'cloud.CloudCapability',
|
@@ -876,26 +885,48 @@ def add_or_update_storage(storage_name: str,
|
|
876
885
|
if not status_check(storage_status):
|
877
886
|
raise ValueError(f'Error in updating global state. Storage Status '
|
878
887
|
f'{storage_status} is passed in incorrectly')
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
888
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
889
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
890
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
891
|
+
insert_stmnt = sqlite.insert(storage_table).values(
|
892
|
+
name=storage_name,
|
893
|
+
handle=handle,
|
894
|
+
last_use=last_use,
|
895
|
+
launched_at=storage_launched_at,
|
896
|
+
status=storage_status.value)
|
897
|
+
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
898
|
+
index_elements=[storage_table.c.name],
|
899
|
+
set_={
|
900
|
+
storage_table.c.handle: handle,
|
901
|
+
storage_table.c.last_use: last_use,
|
902
|
+
storage_table.c.launched_at: storage_launched_at,
|
903
|
+
storage_table.c.status: storage_status.value
|
904
|
+
})
|
905
|
+
session.execute(do_update_stmt)
|
906
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
907
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
908
|
+
# TODO(syang) support postgres dialect
|
909
|
+
session.rollback()
|
910
|
+
raise ValueError('Unsupported database dialect')
|
911
|
+
else:
|
912
|
+
session.rollback()
|
913
|
+
raise ValueError('Unsupported database dialect')
|
914
|
+
session.commit()
|
883
915
|
|
884
916
|
|
885
917
|
def remove_storage(storage_name: str):
|
886
918
|
"""Removes Storage from Database"""
|
887
|
-
|
888
|
-
|
919
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
920
|
+
session.query(storage_table).filter_by(name=storage_name).delete()
|
921
|
+
session.commit()
|
889
922
|
|
890
923
|
|
891
924
|
def set_storage_status(storage_name: str,
|
892
925
|
status: status_lib.StorageStatus) -> None:
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
count = _DB.cursor.rowcount
|
898
|
-
_DB.conn.commit()
|
926
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
927
|
+
count = session.query(storage_table).filter_by(
|
928
|
+
name=storage_name).update({storage_table.c.status: status.value})
|
929
|
+
session.commit()
|
899
930
|
assert count <= 1, count
|
900
931
|
if count == 0:
|
901
932
|
raise ValueError(f'Storage {storage_name} not found.')
|
@@ -903,21 +934,20 @@ def set_storage_status(storage_name: str,
|
|
903
934
|
|
904
935
|
def get_storage_status(storage_name: str) -> Optional[status_lib.StorageStatus]:
|
905
936
|
assert storage_name is not None, 'storage_name cannot be None'
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
return status_lib.StorageStatus[status]
|
937
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
938
|
+
row = session.query(storage_table).filter_by(name=storage_name).first()
|
939
|
+
if row:
|
940
|
+
return status_lib.StorageStatus[row.status]
|
910
941
|
return None
|
911
942
|
|
912
943
|
|
913
944
|
def set_storage_handle(storage_name: str,
|
914
945
|
handle: 'Storage.StorageMetadata') -> None:
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
_DB.conn.commit()
|
946
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
947
|
+
count = session.query(storage_table).filter_by(
|
948
|
+
name=storage_name).update(
|
949
|
+
{storage_table.c.handle: pickle.dumps(handle)})
|
950
|
+
session.commit()
|
921
951
|
assert count <= 1, count
|
922
952
|
if count == 0:
|
923
953
|
raise ValueError(f'Storage{storage_name} not found.')
|
@@ -927,38 +957,48 @@ def get_handle_from_storage_name(
|
|
927
957
|
storage_name: Optional[str]) -> Optional['Storage.StorageMetadata']:
|
928
958
|
if storage_name is None:
|
929
959
|
return None
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
return None
|
935
|
-
return pickle.loads(handle)
|
960
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
961
|
+
row = session.query(storage_table).filter_by(name=storage_name).first()
|
962
|
+
if row:
|
963
|
+
return pickle.loads(row.handle)
|
936
964
|
return None
|
937
965
|
|
938
966
|
|
939
967
|
def get_glob_storage_name(storage_name: str) -> List[str]:
|
940
968
|
assert storage_name is not None, 'storage_name cannot be None'
|
941
|
-
|
942
|
-
|
943
|
-
|
969
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
970
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
971
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
972
|
+
rows = session.query(storage_table).filter(
|
973
|
+
storage_table.c.name.op('GLOB')(storage_name)).all()
|
974
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
975
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
976
|
+
# TODO(syang) support postgres dialect
|
977
|
+
# postgres does not support GLOB
|
978
|
+
raise ValueError('Unsupported database dialect')
|
979
|
+
else:
|
980
|
+
raise ValueError('Unsupported database dialect')
|
981
|
+
return [row.name for row in rows]
|
944
982
|
|
945
983
|
|
946
984
|
def get_storage_names_start_with(starts_with: str) -> List[str]:
|
947
|
-
|
948
|
-
|
949
|
-
|
985
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
986
|
+
rows = session.query(storage_table).filter(
|
987
|
+
storage_table.c.name.like(f'{starts_with}%')).all()
|
988
|
+
return [row.name for row in rows]
|
950
989
|
|
951
990
|
|
952
991
|
def get_storage() -> List[Dict[str, Any]]:
|
953
|
-
|
992
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
993
|
+
rows = session.query(storage_table).all()
|
954
994
|
records = []
|
955
|
-
for
|
995
|
+
for row in rows:
|
956
996
|
# TODO: use namedtuple instead of dict
|
957
997
|
records.append({
|
958
|
-
'name': name,
|
959
|
-
'launched_at': launched_at,
|
960
|
-
'handle': pickle.loads(handle),
|
961
|
-
'last_use': last_use,
|
962
|
-
'status': status_lib.StorageStatus[status],
|
998
|
+
'name': row.name,
|
999
|
+
'launched_at': row.launched_at,
|
1000
|
+
'handle': pickle.loads(row.handle),
|
1001
|
+
'last_use': row.last_use,
|
1002
|
+
'status': status_lib.StorageStatus[row.status],
|
963
1003
|
})
|
964
1004
|
return records
|