skypilot-nightly 1.0.0.dev20250524__py3-none-any.whl → 1.0.0.dev20250527__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. sky/__init__.py +2 -2
  2. sky/check.py +32 -6
  3. sky/cli.py +17 -24
  4. sky/client/cli.py +17 -24
  5. sky/client/sdk.py +5 -2
  6. sky/clouds/cloud.py +2 -2
  7. sky/clouds/kubernetes.py +10 -5
  8. sky/clouds/service_catalog/kubernetes_catalog.py +4 -0
  9. sky/clouds/ssh.py +24 -8
  10. sky/core.py +20 -2
  11. sky/dashboard/out/404.html +1 -1
  12. sky/dashboard/out/_next/static/D5bjIfl4Ob3SV3LJz3CO0/_buildManifest.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/236-e220ba0c35bf089e.js +6 -0
  14. sky/dashboard/out/_next/static/chunks/{498-d7722313e5e5b4e6.js → 320-afea3ddcc5bd1c6c.js} +1 -16
  15. sky/dashboard/out/_next/static/chunks/470-1d784f5c8750744a.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/578-24f35aa98d38d638.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/627-31b701e69f52db0c.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/843-e35d71cf1c7f706e.js +11 -0
  19. sky/dashboard/out/_next/static/chunks/990-f85643b521f7ca65.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/pages/_app-3985f074c163a856.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-339b59921ccfe266.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e23fcddf60578a0d.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/{clusters-9e6d1ec6e1ac5b29.js → clusters-8afda8efa5b74997.js} +1 -1
  24. sky/dashboard/out/_next/static/chunks/pages/config-72b8c6c2edfd0e39.js +6 -0
  25. sky/dashboard/out/_next/static/chunks/pages/infra-1521baab6992916b.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4d913940b4fa6f5a.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs-ff7e8e377d02b651.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/users-9900af52acf8648d.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/workspace/new-63763ffa3edb4508.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-3ede7a13caf23375.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces-72330c4d0fc9a4a2.js +1 -0
  32. sky/dashboard/out/_next/static/css/6a1c0d711a4bdaf1.css +3 -0
  33. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  34. sky/dashboard/out/clusters/[cluster].html +1 -1
  35. sky/dashboard/out/clusters.html +1 -1
  36. sky/dashboard/out/config.html +1 -0
  37. sky/dashboard/out/index.html +1 -1
  38. sky/dashboard/out/infra.html +1 -1
  39. sky/dashboard/out/jobs/[job].html +1 -1
  40. sky/dashboard/out/jobs.html +1 -1
  41. sky/dashboard/out/users.html +1 -1
  42. sky/dashboard/out/workspace/new.html +1 -0
  43. sky/dashboard/out/workspaces/[name].html +1 -0
  44. sky/dashboard/out/workspaces.html +1 -1
  45. sky/global_user_state.py +592 -552
  46. sky/server/constants.py +1 -1
  47. sky/server/requests/payloads.py +33 -3
  48. sky/server/requests/serializers/decoders.py +0 -11
  49. sky/server/server.py +23 -22
  50. sky/setup_files/dependencies.py +1 -0
  51. sky/skypilot_config.py +35 -9
  52. sky/utils/db_utils.py +53 -0
  53. sky/utils/kubernetes/config_map_utils.py +133 -0
  54. sky/utils/kubernetes/deploy_remote_cluster.py +20 -4
  55. sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -0
  56. sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
  57. sky/utils/kubernetes/ssh-tunnel.sh +20 -28
  58. sky/utils/schemas.py +57 -5
  59. sky/workspaces/__init__.py +0 -0
  60. sky/workspaces/core.py +431 -0
  61. sky/workspaces/server.py +87 -0
  62. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/METADATA +2 -1
  63. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/RECORD +69 -57
  64. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/WHEEL +1 -1
  65. sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +0 -1
  66. sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +0 -1
  67. sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +0 -6
  68. sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +0 -1
  69. sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +0 -1
  70. sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +0 -1
  71. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +0 -1
  72. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +0 -16
  73. sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +0 -1
  74. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +0 -1
  75. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +0 -16
  76. sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +0 -1
  77. sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +0 -1
  78. sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +0 -3
  79. /sky/dashboard/out/_next/static/{aHej19bZyl4hoHgrzPCn7 → D5bjIfl4Ob3SV3LJz3CO0}/_ssgManifest.js +0 -0
  80. /sky/dashboard/out/_next/static/chunks/{573-f17bd89d9f9118b3.js → 573-82bd40a37af834f1.js} +0 -0
  81. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/entry_points.txt +0 -0
  82. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/licenses/LICENSE +0 -0
  83. {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/top_level.txt +0 -0
sky/global_user_state.py CHANGED
@@ -10,12 +10,17 @@ import json
10
10
  import os
11
11
  import pathlib
12
12
  import pickle
13
- import sqlite3
14
13
  import time
15
14
  import typing
16
15
  from typing import Any, Dict, List, Optional, Set, Tuple
17
16
  import uuid
18
17
 
18
+ import sqlalchemy
19
+ from sqlalchemy import exc as sqlalchemy_exc
20
+ from sqlalchemy import orm
21
+ from sqlalchemy.dialects import sqlite
22
+ from sqlalchemy.ext import declarative
23
+
19
24
  from sky import models
20
25
  from sky import sky_logging
21
26
  from sky.skylet import constants
@@ -38,168 +43,215 @@ _ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
38
43
  _DB_PATH = os.path.expanduser('~/.sky/state.db')
39
44
  pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
40
45
 
41
-
42
- def create_table(cursor, conn):
46
+ _SQLALCHEMY_ENGINE = sqlalchemy.create_engine(f'sqlite:///{_DB_PATH}')
47
+
48
+ Base = declarative.declarative_base()
49
+
50
+ config_table = sqlalchemy.Table(
51
+ 'config',
52
+ Base.metadata,
53
+ sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
54
+ sqlalchemy.Column('value', sqlalchemy.Text),
55
+ )
56
+
57
+ user_table = sqlalchemy.Table(
58
+ 'users',
59
+ Base.metadata,
60
+ sqlalchemy.Column('id', sqlalchemy.Text, primary_key=True),
61
+ sqlalchemy.Column('name', sqlalchemy.Text),
62
+ )
63
+
64
+ cluster_table = sqlalchemy.Table(
65
+ 'clusters',
66
+ Base.metadata,
67
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
68
+ sqlalchemy.Column('launched_at', sqlalchemy.Integer),
69
+ sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
70
+ sqlalchemy.Column('last_use', sqlalchemy.Text),
71
+ sqlalchemy.Column('status', sqlalchemy.Text),
72
+ sqlalchemy.Column('autostop', sqlalchemy.Integer, server_default='-1'),
73
+ sqlalchemy.Column('to_down', sqlalchemy.Integer, server_default='0'),
74
+ sqlalchemy.Column('metadata', sqlalchemy.Text, server_default='{}'),
75
+ sqlalchemy.Column('owner', sqlalchemy.Text, server_default=None),
76
+ sqlalchemy.Column('cluster_hash', sqlalchemy.Text, server_default=None),
77
+ sqlalchemy.Column('storage_mounts_metadata',
78
+ sqlalchemy.LargeBinary,
79
+ server_default=None),
80
+ sqlalchemy.Column('cluster_ever_up', sqlalchemy.Integer,
81
+ server_default='0'),
82
+ sqlalchemy.Column('status_updated_at',
83
+ sqlalchemy.Integer,
84
+ server_default=None),
85
+ sqlalchemy.Column('config_hash', sqlalchemy.Text, server_default=None),
86
+ sqlalchemy.Column('user_hash', sqlalchemy.Text, server_default=None),
87
+ sqlalchemy.Column('workspace',
88
+ sqlalchemy.Text,
89
+ server_default=constants.SKYPILOT_DEFAULT_WORKSPACE),
90
+ )
91
+
92
+ storage_table = sqlalchemy.Table(
93
+ 'storage',
94
+ Base.metadata,
95
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
96
+ sqlalchemy.Column('launched_at', sqlalchemy.Integer),
97
+ sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
98
+ sqlalchemy.Column('last_use', sqlalchemy.Text),
99
+ sqlalchemy.Column('status', sqlalchemy.Text),
100
+ )
101
+
102
+ # Table for Cluster History
103
+ # usage_intervals: List[Tuple[int, int]]
104
+ # Specifies start and end timestamps of cluster.
105
+ # When the last end time is None, the cluster is still UP.
106
+ # Example: [(start1, end1), (start2, end2), (start3, None)]
107
+
108
+ # requested_resources: Set[resource_lib.Resource]
109
+ # Requested resources fetched from task that user specifies.
110
+
111
+ # launched_resources: Optional[resources_lib.Resources]
112
+ # Actual launched resources fetched from handle for cluster.
113
+
114
+ # num_nodes: Optional[int] number of nodes launched.
115
+ cluster_history_table = sqlalchemy.Table(
116
+ 'cluster_history',
117
+ Base.metadata,
118
+ sqlalchemy.Column('cluster_hash', sqlalchemy.Text, primary_key=True),
119
+ sqlalchemy.Column('name', sqlalchemy.Text),
120
+ sqlalchemy.Column('num_nodes', sqlalchemy.Integer),
121
+ sqlalchemy.Column('requested_resources', sqlalchemy.LargeBinary),
122
+ sqlalchemy.Column('launched_resources', sqlalchemy.LargeBinary),
123
+ sqlalchemy.Column('usage_intervals', sqlalchemy.LargeBinary),
124
+ sqlalchemy.Column('user_hash', sqlalchemy.Text),
125
+ )
126
+
127
+
128
+ def create_table():
43
129
  # Enable WAL mode to avoid locking issues.
44
130
  # See: issue #1441 and PR #1509
45
131
  # https://github.com/microsoft/WSL/issues/2395
46
132
  # TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
47
133
  # This may cause the database locked problem from WSL issue #1441.
48
- if not common_utils.is_wsl():
134
+ if (_SQLALCHEMY_ENGINE.dialect.name
135
+ == db_utils.SQLAlchemyDialect.SQLITE.value and
136
+ not common_utils.is_wsl()):
49
137
  try:
50
- cursor.execute('PRAGMA journal_mode=WAL')
51
- except sqlite3.OperationalError as e:
138
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
139
+ session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
140
+ session.commit()
141
+ except sqlalchemy_exc.OperationalError as e:
52
142
  if 'database is locked' not in str(e):
53
143
  raise
54
144
  # If the database is locked, it is OK to continue, as the WAL mode
55
145
  # is not critical and is likely to be enabled by other processes.
56
146
 
57
- # Table for Clusters
58
- cursor.execute("""\
59
- CREATE TABLE IF NOT EXISTS clusters (
60
- name TEXT PRIMARY KEY,
61
- launched_at INTEGER,
62
- handle BLOB,
63
- last_use TEXT,
64
- status TEXT,
65
- autostop INTEGER DEFAULT -1,
66
- metadata TEXT DEFAULT '{}',
67
- to_down INTEGER DEFAULT 0,
68
- owner TEXT DEFAULT null,
69
- cluster_hash TEXT DEFAULT null,
70
- storage_mounts_metadata BLOB DEFAULT null,
71
- cluster_ever_up INTEGER DEFAULT 0,
72
- status_updated_at INTEGER DEFAULT null,
73
- config_hash TEXT DEFAULT null,
74
- user_hash TEXT DEFAULT null,
75
- workspace TEXT DEFAULT 'default')""")
76
-
77
- # Table for Cluster History
78
- # usage_intervals: List[Tuple[int, int]]
79
- # Specifies start and end timestamps of cluster.
80
- # When the last end time is None, the cluster is still UP.
81
- # Example: [(start1, end1), (start2, end2), (start3, None)]
82
-
83
- # requested_resources: Set[resource_lib.Resource]
84
- # Requested resources fetched from task that user specifies.
85
-
86
- # launched_resources: Optional[resources_lib.Resources]
87
- # Actual launched resources fetched from handle for cluster.
88
-
89
- # num_nodes: Optional[int] number of nodes launched.
90
-
91
- cursor.execute("""\
92
- CREATE TABLE IF NOT EXISTS cluster_history (
93
- cluster_hash TEXT PRIMARY KEY,
94
- name TEXT,
95
- num_nodes int,
96
- requested_resources BLOB,
97
- launched_resources BLOB,
98
- usage_intervals BLOB,
99
- user_hash TEXT)""")
100
- # Table for configs (e.g. enabled clouds)
101
- cursor.execute("""\
102
- CREATE TABLE IF NOT EXISTS config (
103
- key TEXT PRIMARY KEY, value TEXT)""")
104
- # Table for Storage
105
- cursor.execute("""\
106
- CREATE TABLE IF NOT EXISTS storage (
107
- name TEXT PRIMARY KEY,
108
- launched_at INTEGER,
109
- handle BLOB,
110
- last_use TEXT,
111
- status TEXT)""")
112
- # Table for User
113
- cursor.execute("""\
114
- CREATE TABLE IF NOT EXISTS users (
115
- id TEXT PRIMARY KEY,
116
- name TEXT)""")
147
+ # Create tables if they don't exist
148
+ Base.metadata.create_all(bind=_SQLALCHEMY_ENGINE)
149
+
117
150
  # For backward compatibility.
118
151
  # TODO(zhwu): Remove this function after all users have migrated to
119
152
  # the latest version of SkyPilot.
120
- # Add autostop column to clusters table
121
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'autostop',
122
- 'INTEGER DEFAULT -1')
123
-
124
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'metadata',
125
- 'TEXT DEFAULT \'{}\'')
126
-
127
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'to_down',
128
- 'INTEGER DEFAULT 0')
129
-
130
- # The cloud identity that created the cluster.
131
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'owner', 'TEXT')
132
-
133
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'cluster_hash',
134
- 'TEXT DEFAULT null')
135
-
136
- db_utils.add_column_to_table(cursor, conn, 'clusters',
137
- 'storage_mounts_metadata', 'BLOB DEFAULT null')
138
- db_utils.add_column_to_table(
139
- cursor,
140
- conn,
141
- 'clusters',
142
- 'cluster_ever_up',
143
- 'INTEGER DEFAULT 0',
144
- # Set the value to 1 so that all the existing clusters before #2977
145
- # are considered as ever up, i.e:
146
- # existing cluster's default (null) -> 1;
147
- # new cluster's default -> 0;
148
- # This is conservative for the existing clusters: even if some INIT
149
- # clusters were never really UP, setting it to 1 means they won't be
150
- # auto-deleted during any failover.
151
- value_to_replace_existing_entries=1)
152
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'status_updated_at',
153
- 'INTEGER DEFAULT null')
154
- db_utils.add_column_to_table(
155
- cursor,
156
- conn,
157
- 'clusters',
158
- 'user_hash',
159
- 'TEXT DEFAULT null',
160
- value_to_replace_existing_entries=common_utils.get_user_hash())
161
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'config_hash',
162
- 'TEXT DEFAULT null')
163
-
164
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'config_hash',
165
- 'TEXT DEFAULT null')
166
-
167
- db_utils.add_column_to_table(cursor, conn, 'cluster_history', 'user_hash',
168
- 'TEXT DEFAULT null')
169
-
170
- db_utils.add_column_to_table(
171
- cursor,
172
- conn,
173
- 'clusters',
174
- 'workspace',
175
- 'TEXT DEFAULT \'default\'',
176
- value_to_replace_existing_entries=constants.SKYPILOT_DEFAULT_WORKSPACE)
177
- conn.commit()
178
-
179
-
180
- _DB = db_utils.SQLiteConn(_DB_PATH, create_table)
153
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
154
+ # Add autostop column to clusters table
155
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'autostop',
156
+ 'INTEGER DEFAULT -1')
157
+
158
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'metadata',
159
+ 'TEXT DEFAULT \'{}\'')
160
+
161
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'to_down',
162
+ 'INTEGER DEFAULT 0')
163
+
164
+ # The cloud identity that created the cluster.
165
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'owner',
166
+ 'TEXT')
167
+
168
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
169
+ 'cluster_hash',
170
+ 'TEXT DEFAULT null')
171
+
172
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
173
+ 'storage_mounts_metadata',
174
+ 'BLOB DEFAULT null')
175
+ db_utils.add_column_to_table_sqlalchemy(
176
+ session,
177
+ 'clusters',
178
+ 'cluster_ever_up',
179
+ 'INTEGER DEFAULT 0',
180
+ # Set the value to 1 so that all the existing clusters before #2977
181
+ # are considered as ever up, i.e:
182
+ # existing cluster's default (null) -> 1;
183
+ # new cluster's default -> 0;
184
+ # This is conservative for the existing clusters: even if some INIT
185
+ # clusters were never really UP, setting it to 1 means they won't be
186
+ # auto-deleted during any failover.
187
+ value_to_replace_existing_entries=1)
188
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
189
+ 'status_updated_at',
190
+ 'INTEGER DEFAULT null')
191
+ db_utils.add_column_to_table_sqlalchemy(
192
+ session,
193
+ 'clusters',
194
+ 'user_hash',
195
+ 'TEXT DEFAULT null',
196
+ value_to_replace_existing_entries=common_utils.get_user_hash())
197
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
198
+ 'config_hash',
199
+ 'TEXT DEFAULT null')
200
+
201
+ db_utils.add_column_to_table_sqlalchemy(session, 'cluster_history',
202
+ 'user_hash',
203
+ 'TEXT DEFAULT null')
204
+
205
+ db_utils.add_column_to_table_sqlalchemy(
206
+ session,
207
+ 'clusters',
208
+ 'workspace',
209
+ 'TEXT DEFAULT \'default\'',
210
+ value_to_replace_existing_entries=constants.
211
+ SKYPILOT_DEFAULT_WORKSPACE)
212
+ session.commit()
213
+
214
+
215
+ create_table()
181
216
 
182
217
 
183
218
  def add_or_update_user(user: models.User):
184
219
  """Store the mapping from user hash to user name for display purposes."""
185
220
  if user.name is None:
186
221
  return
187
- _DB.cursor.execute('INSERT OR REPLACE INTO users (id, name) VALUES (?, ?)',
188
- (user.id, user.name))
189
- _DB.conn.commit()
222
+
223
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
224
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
225
+ db_utils.SQLAlchemyDialect.SQLITE.value):
226
+ insert_stmnt = sqlite.insert(user_table).values(id=user.id,
227
+ name=user.name)
228
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
229
+ index_elements=[user_table.c.id],
230
+ set_={user_table.c.name: user.name})
231
+ session.execute(do_update_stmt)
232
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
233
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
234
+ # TODO(syang) support postgres dialect
235
+ session.rollback()
236
+ raise ValueError('Unsupported database dialect')
237
+ else:
238
+ session.rollback()
239
+ raise ValueError('Unsupported database dialect')
240
+ session.commit()
190
241
 
191
242
 
192
243
  def get_user(user_id: str) -> models.User:
193
- row = _DB.cursor.execute('SELECT id, name FROM users WHERE id=?',
194
- (user_id,)).fetchone()
244
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
245
+ row = session.query(user_table).filter_by(id=user_id).first()
195
246
  if row is None:
196
247
  return models.User(id=user_id)
197
- return models.User(id=row[0], name=row[1])
248
+ return models.User(id=row.id, name=row.name)
198
249
 
199
250
 
200
251
  def get_all_users() -> List[models.User]:
201
- rows = _DB.cursor.execute('SELECT id, name FROM users').fetchall()
202
- return [models.User(id=row[0], name=row[1]) for row in rows]
252
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
253
+ rows = session.query(user_table).all()
254
+ return [models.User(id=row.id, name=row.name) for row in rows]
203
255
 
204
256
 
205
257
  def add_or_update_cluster(cluster_name: str,
@@ -257,145 +309,116 @@ def add_or_update_cluster(cluster_name: str,
257
309
  user_hash = common_utils.get_user_hash()
258
310
  active_workspace = skypilot_config.get_active_workspace()
259
311
 
260
- _DB.cursor.execute(
261
- 'INSERT or REPLACE INTO clusters'
262
- # All the fields need to exist here, even if they don't need
263
- # be changed, as the INSERT OR REPLACE statement will replace
264
- # the field of the existing row with the default value if not
265
- # specified.
266
- '(name, launched_at, handle, last_use, status, '
267
- 'autostop, to_down, metadata, owner, cluster_hash, '
268
- 'storage_mounts_metadata, cluster_ever_up, status_updated_at, '
269
- 'config_hash, user_hash, workspace) '
270
- 'VALUES ('
271
- # name
272
- '?, '
273
- # launched_at
274
- 'COALESCE('
275
- '?, (SELECT launched_at FROM clusters WHERE name=?)), '
276
- # handle
277
- '?, '
278
- # last_use
279
- 'COALESCE('
280
- '?, (SELECT last_use FROM clusters WHERE name=?)), '
281
- # status
282
- '?, '
283
- # autostop
284
- # Keep the old autostop value if it exists, otherwise set it to
285
- # default -1.
286
- 'COALESCE('
287
- '(SELECT autostop FROM clusters WHERE name=? AND status!=?), -1), '
288
- # Keep the old to_down value if it exists, otherwise set it to
289
- # default 0.
290
- 'COALESCE('
291
- '(SELECT to_down FROM clusters WHERE name=? AND status!=?), 0),'
292
- # Keep the old metadata value if it exists, otherwise set it to
293
- # default {}.
294
- 'COALESCE('
295
- '(SELECT metadata FROM clusters WHERE name=?), \'{}\'),'
296
- # Keep the old owner value if it exists, otherwise set it to
297
- # default null.
298
- 'COALESCE('
299
- '(SELECT owner FROM clusters WHERE name=?), null),'
300
- # cluster_hash
301
- '?,'
302
- # storage_mounts_metadata
303
- 'COALESCE('
304
- '(SELECT storage_mounts_metadata FROM clusters WHERE name=?), null), '
305
- # cluster_ever_up
306
- '((SELECT cluster_ever_up FROM clusters WHERE name=?) OR ?), '
307
- # status_updated_at
308
- '?,'
309
- # config_hash
310
- 'COALESCE(?, (SELECT config_hash FROM clusters WHERE name=?)),'
311
- # user_hash: keep original user_hash if it exists
312
- 'COALESCE((SELECT user_hash FROM clusters WHERE name=?), ?),'
313
- # keep original workspace if it exists
314
- 'COALESCE((SELECT workspace FROM clusters WHERE name=?), ?)'
315
- ')',
316
- (
317
- # name
318
- cluster_name,
319
- # launched_at
320
- cluster_launched_at,
321
- cluster_name,
322
- # handle
323
- handle,
324
- # last_use
325
- last_use,
326
- cluster_name,
327
- # status
328
- status.value,
329
- # autostop
330
- cluster_name,
331
- status_lib.ClusterStatus.STOPPED.value,
332
- # to_down
333
- cluster_name,
334
- status_lib.ClusterStatus.STOPPED.value,
335
- # metadata
336
- cluster_name,
337
- # owner
338
- cluster_name,
339
- # cluster_hash
340
- cluster_hash,
341
- # storage_mounts_metadata
342
- cluster_name,
343
- # cluster_ever_up
344
- cluster_name,
345
- int(ready),
346
- # status_updated_at
347
- status_updated_at,
348
- # config_hash
349
- config_hash,
350
- cluster_name,
351
- # user_hash
352
- cluster_name,
353
- user_hash,
354
- # workspace
355
- cluster_name,
356
- active_workspace,
357
- ))
358
-
359
- launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
360
- launched_resources = getattr(cluster_handle, 'launched_resources', None)
361
- _DB.cursor.execute(
362
- 'INSERT or REPLACE INTO cluster_history'
363
- '(cluster_hash, name, num_nodes, requested_resources, '
364
- 'launched_resources, usage_intervals, user_hash) '
365
- 'VALUES ('
366
- # hash
367
- '?, '
368
- # name
369
- '?, '
370
- # requested resources
371
- '?, '
372
- # launched resources
373
- '?, '
374
- # number of nodes
375
- '?, '
376
- # usage intervals
377
- '?, '
378
- # user_hash
379
- '?'
380
- ')',
381
- (
382
- # hash
383
- cluster_hash,
384
- # name
385
- cluster_name,
386
- # number of nodes
387
- launched_nodes,
388
- # requested resources
389
- pickle.dumps(requested_resources),
390
- # launched resources
391
- pickle.dumps(launched_resources),
392
- # usage intervals
393
- pickle.dumps(usage_intervals),
394
- # user_hash
395
- user_hash,
396
- ))
397
-
398
- _DB.conn.commit()
312
+ conditional_values = {}
313
+ if is_launch:
314
+ conditional_values.update({
315
+ 'launched_at': cluster_launched_at,
316
+ 'last_use': last_use
317
+ })
318
+
319
+ if int(ready) == 1:
320
+ conditional_values.update({
321
+ 'cluster_ever_up': 1,
322
+ })
323
+
324
+ if config_hash is not None:
325
+ conditional_values.update({
326
+ 'config_hash': config_hash,
327
+ })
328
+
329
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
330
+ # with_for_update() locks the row until commit() or rollback()
331
+ # is called, or until the code escapes the with block.
332
+ cluster_row = session.query(cluster_table).filter_by(
333
+ name=cluster_name).with_for_update().first()
334
+ if (not cluster_row or
335
+ cluster_row.status == status_lib.ClusterStatus.STOPPED.value):
336
+ conditional_values.update({
337
+ 'autostop': -1,
338
+ 'to_down': 0,
339
+ })
340
+ if not cluster_row or not cluster_row.user_hash:
341
+ conditional_values.update({
342
+ 'user_hash': user_hash,
343
+ })
344
+ if not cluster_row or not cluster_row.workspace:
345
+ conditional_values.update({
346
+ 'workspace': active_workspace,
347
+ })
348
+
349
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
350
+ db_utils.SQLAlchemyDialect.SQLITE.value):
351
+ insert_stmnt = sqlite.insert(cluster_table).values(
352
+ name=cluster_name,
353
+ **conditional_values,
354
+ handle=handle,
355
+ status=status.value,
356
+ # set metadata to server default ('{}')
357
+ # set owner to server default (null)
358
+ cluster_hash=cluster_hash,
359
+ # set storage_mounts_metadata to server default (null)
360
+ status_updated_at=status_updated_at,
361
+ )
362
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
363
+ index_elements=[cluster_table.c.name],
364
+ set_={
365
+ **conditional_values,
366
+ cluster_table.c.handle: handle,
367
+ cluster_table.c.status: status.value,
368
+ # do not update metadata value
369
+ # do not update owner value
370
+ cluster_table.c.cluster_hash: cluster_hash,
371
+ # do not update storage_mounts_metadata
372
+ cluster_table.c.status_updated_at: status_updated_at,
373
+ # do not update user_hash
374
+ })
375
+ session.execute(do_update_stmt)
376
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
377
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
378
+ # TODO(syang) support postgres dialect
379
+ session.rollback()
380
+ raise ValueError('Unsupported database dialect')
381
+ else:
382
+ session.rollback()
383
+ raise ValueError('Unsupported database dialect')
384
+
385
+ # Modify cluster history table
386
+ launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
387
+ launched_resources = getattr(cluster_handle, 'launched_resources', None)
388
+
389
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
390
+ db_utils.SQLAlchemyDialect.SQLITE.value):
391
+ insert_stmnt = sqlite.insert(cluster_history_table).values(
392
+ cluster_hash=cluster_hash,
393
+ name=cluster_name,
394
+ num_nodes=launched_nodes,
395
+ requested_resources=pickle.dumps(requested_resources),
396
+ launched_resources=pickle.dumps(launched_resources),
397
+ usage_intervals=pickle.dumps(usage_intervals),
398
+ user_hash=user_hash)
399
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
400
+ index_elements=[cluster_history_table.c.cluster_hash],
401
+ set_={
402
+ cluster_history_table.c.name: cluster_name,
403
+ cluster_history_table.c.num_nodes: launched_nodes,
404
+ cluster_history_table.c.requested_resources:
405
+ pickle.dumps(requested_resources),
406
+ cluster_history_table.c.launched_resources:
407
+ pickle.dumps(launched_resources),
408
+ cluster_history_table.c.usage_intervals:
409
+ pickle.dumps(usage_intervals),
410
+ cluster_history_table.c.user_hash: user_hash
411
+ })
412
+ session.execute(do_update_stmt)
413
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
414
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
415
+ # TODO(syang) support postgres dialect
416
+ session.rollback()
417
+ raise ValueError('Unsupported database dialect')
418
+ else:
419
+ session.rollback()
420
+ raise ValueError('Unsupported database dialect')
421
+ session.commit()
399
422
 
400
423
 
401
424
  def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
@@ -413,16 +436,18 @@ def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
413
436
  def update_cluster_handle(cluster_name: str,
414
437
  cluster_handle: 'backends.ResourceHandle'):
415
438
  handle = pickle.dumps(cluster_handle)
416
- _DB.cursor.execute('UPDATE clusters SET handle=(?) WHERE name=(?)',
417
- (handle, cluster_name))
418
- _DB.conn.commit()
439
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
440
+ session.query(cluster_table).filter_by(name=cluster_name).update(
441
+ {cluster_table.c.handle: handle})
442
+ session.commit()
419
443
 
420
444
 
421
445
  def update_last_use(cluster_name: str):
422
446
  """Updates the last used command for the cluster."""
423
- _DB.cursor.execute('UPDATE clusters SET last_use=(?) WHERE name=(?)',
424
- (common_utils.get_current_command(), cluster_name))
425
- _DB.conn.commit()
447
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
448
+ session.query(cluster_table).filter_by(name=cluster_name).update(
449
+ {cluster_table.c.last_use: common_utils.get_current_command()})
450
+ session.commit()
426
451
 
427
452
 
428
453
  def remove_cluster(cluster_name: str, terminate: bool) -> None:
@@ -430,63 +455,73 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
430
455
  cluster_hash = _get_hash_for_existing_cluster(cluster_name)
431
456
  usage_intervals = _get_cluster_usage_intervals(cluster_hash)
432
457
 
433
- # usage_intervals is not None and not empty
434
- if usage_intervals:
435
- assert cluster_hash is not None, cluster_name
436
- start_time = usage_intervals.pop()[0]
437
- end_time = int(time.time())
438
- usage_intervals.append((start_time, end_time))
439
- _set_cluster_usage_intervals(cluster_hash, usage_intervals)
440
-
441
- if terminate:
442
- _DB.cursor.execute('DELETE FROM clusters WHERE name=(?)',
443
- (cluster_name,))
444
- else:
445
- handle = get_handle_from_cluster_name(cluster_name)
446
- if handle is None:
447
- return
448
- # Must invalidate IP list to avoid directly trying to ssh into a
449
- # stopped VM, which leads to timeout.
450
- if hasattr(handle, 'stable_internal_external_ips'):
451
- handle = typing.cast('backends.CloudVmRayResourceHandle', handle)
452
- handle.stable_internal_external_ips = None
453
- current_time = int(time.time())
454
- _DB.cursor.execute(
455
- 'UPDATE clusters SET handle=(?), status=(?), '
456
- 'status_updated_at=(?) WHERE name=(?)', (
457
- pickle.dumps(handle),
458
- status_lib.ClusterStatus.STOPPED.value,
459
- current_time,
460
- cluster_name,
461
- ))
462
- _DB.conn.commit()
458
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
459
+ # usage_intervals is not None and not empty
460
+ if usage_intervals:
461
+ assert cluster_hash is not None, cluster_name
462
+ start_time = usage_intervals.pop()[0]
463
+ end_time = int(time.time())
464
+ usage_intervals.append((start_time, end_time))
465
+ _set_cluster_usage_intervals(cluster_hash, usage_intervals)
466
+
467
+ if terminate:
468
+ session.query(cluster_table).filter_by(name=cluster_name).delete()
469
+ else:
470
+ handle = get_handle_from_cluster_name(cluster_name)
471
+ if handle is None:
472
+ return
473
+ # Must invalidate IP list to avoid directly trying to ssh into a
474
+ # stopped VM, which leads to timeout.
475
+ if hasattr(handle, 'stable_internal_external_ips'):
476
+ handle = typing.cast('backends.CloudVmRayResourceHandle',
477
+ handle)
478
+ handle.stable_internal_external_ips = None
479
+ current_time = int(time.time())
480
+ session.query(cluster_table).filter_by(name=cluster_name).update({
481
+ cluster_table.c.handle: pickle.dumps(handle),
482
+ cluster_table.c.status: status_lib.ClusterStatus.STOPPED.value,
483
+ cluster_table.c.status_updated_at: current_time
484
+ })
485
+ session.commit()
463
486
 
464
487
 
465
488
  def get_handle_from_cluster_name(
466
489
  cluster_name: str) -> Optional['backends.ResourceHandle']:
467
490
  assert cluster_name is not None, 'cluster_name cannot be None'
468
- rows = _DB.cursor.execute('SELECT handle FROM clusters WHERE name=(?)',
469
- (cluster_name,))
470
- for (handle,) in rows:
471
- return pickle.loads(handle)
472
- return None
491
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
492
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
493
+ if row is None:
494
+ return None
495
+ return pickle.loads(row.handle)
473
496
 
474
497
 
475
498
  def get_glob_cluster_names(cluster_name: str) -> List[str]:
476
499
  assert cluster_name is not None, 'cluster_name cannot be None'
477
- rows = _DB.cursor.execute('SELECT name FROM clusters WHERE name GLOB (?)',
478
- (cluster_name,))
479
- return [row[0] for row in rows]
500
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
501
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
502
+ db_utils.SQLAlchemyDialect.SQLITE.value):
503
+ rows = session.query(cluster_table).filter(
504
+ cluster_table.c.name.op('GLOB')(cluster_name)).all()
505
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
506
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
507
+ # TODO(syang) support postgres dialect
508
+ # postgres does not support GLOB
509
+ raise ValueError('Unsupported database dialect')
510
+ else:
511
+ raise ValueError('Unsupported database dialect')
512
+ return [row.name for row in rows]
480
513
 
481
514
 
482
515
  def set_cluster_status(cluster_name: str,
483
516
  status: status_lib.ClusterStatus) -> None:
484
517
  current_time = int(time.time())
485
- _DB.cursor.execute(
486
- 'UPDATE clusters SET status=(?), status_updated_at=(?) WHERE name=(?)',
487
- (status.value, current_time, cluster_name))
488
- count = _DB.cursor.rowcount
489
- _DB.conn.commit()
518
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
519
+ count = session.query(cluster_table).filter_by(
520
+ name=cluster_name).update({
521
+ cluster_table.c.status: status.value,
522
+ cluster_table.c.status_updated_at: current_time
523
+ })
524
+ session.commit()
490
525
  assert count <= 1, count
491
526
  if count == 0:
492
527
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -494,46 +529,40 @@ def set_cluster_status(cluster_name: str,
494
529
 
495
530
  def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
496
531
  to_down: bool) -> None:
497
- _DB.cursor.execute(
498
- 'UPDATE clusters SET autostop=(?), to_down=(?) WHERE name=(?)', (
499
- idle_minutes,
500
- int(to_down),
501
- cluster_name,
502
- ))
503
- count = _DB.cursor.rowcount
504
- _DB.conn.commit()
532
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
533
+ count = session.query(cluster_table).filter_by(
534
+ name=cluster_name).update({
535
+ cluster_table.c.autostop: idle_minutes,
536
+ cluster_table.c.to_down: int(to_down)
537
+ })
538
+ session.commit()
505
539
  assert count <= 1, count
506
540
  if count == 0:
507
541
  raise ValueError(f'Cluster {cluster_name} not found.')
508
542
 
509
543
 
510
544
  def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
511
- rows = _DB.cursor.execute('SELECT launched_at FROM clusters WHERE name=(?)',
512
- (cluster_name,))
513
- for (launch_time,) in rows:
514
- if launch_time is None:
515
- return None
516
- return int(launch_time)
517
- return None
545
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
546
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
547
+ if row is None or row.launched_at is None:
548
+ return None
549
+ return int(row.launched_at)
518
550
 
519
551
 
520
552
  def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
521
- rows = _DB.cursor.execute('SELECT metadata FROM clusters WHERE name=(?)',
522
- (cluster_name,))
523
- for (metadata,) in rows:
524
- if metadata is None:
525
- return None
526
- return json.loads(metadata)
527
- return None
553
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
554
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
555
+ if row is None or row.metadata is None:
556
+ return None
557
+ return json.loads(row.metadata)
528
558
 
529
559
 
530
560
  def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
531
- _DB.cursor.execute('UPDATE clusters SET metadata=(?) WHERE name=(?)', (
532
- json.dumps(metadata),
533
- cluster_name,
534
- ))
535
- count = _DB.cursor.rowcount
536
- _DB.conn.commit()
561
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
562
+ count = session.query(cluster_table).filter_by(
563
+ name=cluster_name).update(
564
+ {cluster_table.c.metadata: json.dumps(metadata)})
565
+ session.commit()
537
566
  assert count <= 1, count
538
567
  if count == 0:
539
568
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -541,25 +570,22 @@ def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
541
570
 
542
571
  def get_cluster_storage_mounts_metadata(
543
572
  cluster_name: str) -> Optional[Dict[str, Any]]:
544
- rows = _DB.cursor.execute(
545
- 'SELECT storage_mounts_metadata FROM clusters WHERE name=(?)',
546
- (cluster_name,))
547
- for (storage_mounts_metadata,) in rows:
548
- if storage_mounts_metadata is None:
549
- return None
550
- return pickle.loads(storage_mounts_metadata)
551
- return None
573
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
574
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
575
+ if row is None or row.storage_mounts_metadata is None:
576
+ return None
577
+ return pickle.loads(row.storage_mounts_metadata)
552
578
 
553
579
 
554
580
  def set_cluster_storage_mounts_metadata(
555
581
  cluster_name: str, storage_mounts_metadata: Dict[str, Any]) -> None:
556
- _DB.cursor.execute(
557
- 'UPDATE clusters SET storage_mounts_metadata=(?) WHERE name=(?)', (
558
- pickle.dumps(storage_mounts_metadata),
559
- cluster_name,
560
- ))
561
- count = _DB.cursor.rowcount
562
- _DB.conn.commit()
582
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
583
+ count = session.query(cluster_table).filter_by(
584
+ name=cluster_name).update({
585
+ cluster_table.c.storage_mounts_metadata:
586
+ pickle.dumps(storage_mounts_metadata)
587
+ })
588
+ session.commit()
563
589
  assert count <= 1, count
564
590
  if count == 0:
565
591
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -570,14 +596,12 @@ def _get_cluster_usage_intervals(
570
596
  ) -> Optional[List[Tuple[int, Optional[int]]]]:
571
597
  if cluster_hash is None:
572
598
  return None
573
- rows = _DB.cursor.execute(
574
- 'SELECT usage_intervals FROM cluster_history WHERE cluster_hash=(?)',
575
- (cluster_hash,))
576
- for (usage_intervals,) in rows:
577
- if usage_intervals is None:
578
- return None
579
- return pickle.loads(usage_intervals)
580
- return None
599
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
600
+ row = session.query(cluster_history_table).filter_by(
601
+ cluster_hash=cluster_hash).first()
602
+ if row is None or row.usage_intervals is None:
603
+ return None
604
+ return pickle.loads(row.usage_intervals)
581
605
 
582
606
 
583
607
  def _get_cluster_launch_time(cluster_hash: str) -> Optional[int]:
@@ -609,15 +633,13 @@ def _get_cluster_duration(cluster_hash: str) -> int:
609
633
  def _set_cluster_usage_intervals(
610
634
  cluster_hash: str, usage_intervals: List[Tuple[int,
611
635
  Optional[int]]]) -> None:
612
- _DB.cursor.execute(
613
- 'UPDATE cluster_history SET usage_intervals=(?) WHERE cluster_hash=(?)',
614
- (
615
- pickle.dumps(usage_intervals),
616
- cluster_hash,
617
- ))
618
-
619
- count = _DB.cursor.rowcount
620
- _DB.conn.commit()
636
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
637
+ count = session.query(cluster_history_table).filter_by(
638
+ cluster_hash=cluster_hash).update({
639
+ cluster_history_table.c.usage_intervals:
640
+ pickle.dumps(usage_intervals)
641
+ })
642
+ session.commit()
621
643
  assert count <= 1, count
622
644
  if count == 0:
623
645
  raise ValueError(f'Cluster hash {cluster_hash} not found.')
@@ -628,38 +650,38 @@ def set_owner_identity_for_cluster(cluster_name: str,
628
650
  if owner_identity is None:
629
651
  return
630
652
  owner_identity_str = json.dumps(owner_identity)
631
- _DB.cursor.execute('UPDATE clusters SET owner=(?) WHERE name=(?)',
632
- (owner_identity_str, cluster_name))
633
-
634
- count = _DB.cursor.rowcount
635
- _DB.conn.commit()
653
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
654
+ count = session.query(cluster_table).filter_by(
655
+ name=cluster_name).update(
656
+ {cluster_table.c.owner: owner_identity_str})
657
+ session.commit()
636
658
  assert count <= 1, count
637
659
  if count == 0:
638
660
  raise ValueError(f'Cluster {cluster_name} not found.')
639
661
 
640
662
 
641
663
  def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
642
- rows = _DB.cursor.execute(
643
- 'SELECT cluster_hash FROM clusters WHERE name=(?)', (cluster_name,))
644
- for (cluster_hash,) in rows:
645
- if cluster_hash is None:
646
- return None
647
- return cluster_hash
648
- return None
664
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
665
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
666
+ if row is None or row.cluster_hash is None:
667
+ return None
668
+ return row.cluster_hash
649
669
 
650
670
 
651
671
  def get_launched_resources_from_cluster_hash(
652
672
  cluster_hash: str) -> Optional[Tuple[int, Any]]:
673
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
674
+ row = session.query(cluster_history_table).filter_by(
675
+ cluster_hash=cluster_hash).first()
676
+ if row is None:
677
+ return None
678
+ num_nodes = row.num_nodes
679
+ launched_resources = row.launched_resources
653
680
 
654
- rows = _DB.cursor.execute(
655
- 'SELECT num_nodes, launched_resources '
656
- 'FROM cluster_history WHERE cluster_hash=(?)', (cluster_hash,))
657
- for (num_nodes, launched_resources) in rows:
658
- if num_nodes is None or launched_resources is None:
659
- return None
660
- launched_resources = pickle.loads(launched_resources)
661
- return num_nodes, launched_resources
662
- return None
681
+ if num_nodes is None or launched_resources is None:
682
+ return None
683
+ launched_resources = pickle.loads(launched_resources)
684
+ return num_nodes, launched_resources
663
685
 
664
686
 
665
687
  def _load_owner(record_owner: Optional[str]) -> Optional[List[str]]:
@@ -693,76 +715,62 @@ def _load_storage_mounts_metadata(
693
715
  @context_utils.cancellation_guard
694
716
  def get_cluster_from_name(
695
717
  cluster_name: Optional[str]) -> Optional[Dict[str, Any]]:
696
- rows = _DB.cursor.execute(
697
- 'SELECT name, launched_at, handle, last_use, status, autostop, '
698
- 'metadata, to_down, owner, cluster_hash, storage_mounts_metadata, '
699
- 'cluster_ever_up, status_updated_at, config_hash, user_hash, workspace '
700
- 'FROM clusters WHERE name=(?)', (cluster_name,)).fetchall()
701
- for row in rows:
702
- # Explicitly specify the number of fields to unpack, so that
703
- # we can add new fields to the database in the future without
704
- # breaking the previous code.
705
- (name, launched_at, handle, last_use, status, autostop, metadata,
706
- to_down, owner, cluster_hash, storage_mounts_metadata, cluster_ever_up,
707
- status_updated_at, config_hash, user_hash, workspace) = row
708
- user_hash = _get_user_hash_or_current_user(user_hash)
709
- # TODO: use namedtuple instead of dict
710
- record = {
711
- 'name': name,
712
- 'launched_at': launched_at,
713
- 'handle': pickle.loads(handle),
714
- 'last_use': last_use,
715
- 'status': status_lib.ClusterStatus[status],
716
- 'autostop': autostop,
717
- 'to_down': bool(to_down),
718
- 'owner': _load_owner(owner),
719
- 'metadata': json.loads(metadata),
720
- 'cluster_hash': cluster_hash,
721
- 'storage_mounts_metadata':
722
- _load_storage_mounts_metadata(storage_mounts_metadata),
723
- 'cluster_ever_up': bool(cluster_ever_up),
724
- 'status_updated_at': status_updated_at,
725
- 'user_hash': user_hash,
726
- 'user_name': get_user(user_hash).name,
727
- 'config_hash': config_hash,
728
- 'workspace': workspace,
729
- }
730
- return record
731
- return None
718
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
719
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
720
+ if row is None:
721
+ return None
722
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
723
+ # TODO: use namedtuple instead of dict
724
+ record = {
725
+ 'name': row.name,
726
+ 'launched_at': row.launched_at,
727
+ 'handle': pickle.loads(row.handle),
728
+ 'last_use': row.last_use,
729
+ 'status': status_lib.ClusterStatus[row.status],
730
+ 'autostop': row.autostop,
731
+ 'to_down': bool(row.to_down),
732
+ 'owner': _load_owner(row.owner),
733
+ 'metadata': json.loads(row.metadata),
734
+ 'cluster_hash': row.cluster_hash,
735
+ 'storage_mounts_metadata': _load_storage_mounts_metadata(
736
+ row.storage_mounts_metadata),
737
+ 'cluster_ever_up': bool(row.cluster_ever_up),
738
+ 'status_updated_at': row.status_updated_at,
739
+ 'user_hash': user_hash,
740
+ 'user_name': get_user(user_hash).name,
741
+ 'config_hash': row.config_hash,
742
+ 'workspace': row.workspace,
743
+ }
744
+ return record
732
745
 
733
746
 
734
747
  def get_clusters() -> List[Dict[str, Any]]:
735
- rows = _DB.cursor.execute(
736
- 'select name, launched_at, handle, last_use, status, autostop, '
737
- 'metadata, to_down, owner, cluster_hash, storage_mounts_metadata, '
738
- 'cluster_ever_up, status_updated_at, config_hash, user_hash, workspace '
739
- 'from clusters order by launched_at desc').fetchall()
748
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
749
+ rows = session.query(cluster_table).order_by(
750
+ sqlalchemy.desc(cluster_table.c.launched_at)).all()
740
751
  records = []
741
752
  for row in rows:
742
- (name, launched_at, handle, last_use, status, autostop, metadata,
743
- to_down, owner, cluster_hash, storage_mounts_metadata, cluster_ever_up,
744
- status_updated_at, config_hash, user_hash, workspace) = row
745
- user_hash = _get_user_hash_or_current_user(user_hash)
753
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
746
754
  # TODO: use namedtuple instead of dict
747
755
  record = {
748
- 'name': name,
749
- 'launched_at': launched_at,
750
- 'handle': pickle.loads(handle),
751
- 'last_use': last_use,
752
- 'status': status_lib.ClusterStatus[status],
753
- 'autostop': autostop,
754
- 'to_down': bool(to_down),
755
- 'owner': _load_owner(owner),
756
- 'metadata': json.loads(metadata),
757
- 'cluster_hash': cluster_hash,
758
- 'storage_mounts_metadata':
759
- _load_storage_mounts_metadata(storage_mounts_metadata),
760
- 'cluster_ever_up': bool(cluster_ever_up),
761
- 'status_updated_at': status_updated_at,
756
+ 'name': row.name,
757
+ 'launched_at': row.launched_at,
758
+ 'handle': pickle.loads(row.handle),
759
+ 'last_use': row.last_use,
760
+ 'status': status_lib.ClusterStatus[row.status],
761
+ 'autostop': row.autostop,
762
+ 'to_down': bool(row.to_down),
763
+ 'owner': _load_owner(row.owner),
764
+ 'metadata': json.loads(row.metadata),
765
+ 'cluster_hash': row.cluster_hash,
766
+ 'storage_mounts_metadata': _load_storage_mounts_metadata(
767
+ row.storage_mounts_metadata),
768
+ 'cluster_ever_up': bool(row.cluster_ever_up),
769
+ 'status_updated_at': row.status_updated_at,
762
770
  'user_hash': user_hash,
763
771
  'user_name': get_user(user_hash).name,
764
- 'config_hash': config_hash,
765
- 'workspace': workspace,
772
+ 'config_hash': row.config_hash,
773
+ 'workspace': row.workspace,
766
774
  }
767
775
 
768
776
  records.append(record)
@@ -770,43 +778,30 @@ def get_clusters() -> List[Dict[str, Any]]:
770
778
 
771
779
 
772
780
  def get_clusters_from_history() -> List[Dict[str, Any]]:
773
- rows = _DB.cursor.execute(
774
- 'SELECT ch.cluster_hash, ch.name, ch.num_nodes, '
775
- 'ch.launched_resources, ch.usage_intervals, clusters.status, '
776
- 'ch.user_hash '
777
- 'FROM cluster_history ch '
778
- 'LEFT OUTER JOIN clusters '
779
- 'ON ch.cluster_hash=clusters.cluster_hash ').fetchall()
781
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
782
+ rows = session.query(
783
+ cluster_history_table.join(cluster_table,
784
+ cluster_history_table.c.cluster_hash ==
785
+ cluster_table.c.cluster_hash,
786
+ isouter=True)).all()
780
787
 
781
788
  # '(cluster_hash, name, num_nodes, requested_resources, '
782
789
  # 'launched_resources, usage_intervals) '
783
790
  records = []
784
-
785
791
  for row in rows:
786
792
  # TODO: use namedtuple instead of dict
787
-
788
- (
789
- cluster_hash,
790
- name,
791
- num_nodes,
792
- launched_resources,
793
- usage_intervals,
794
- status,
795
- user_hash,
796
- ) = row[:7]
797
- user_hash = _get_user_hash_or_current_user(user_hash)
798
-
793
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
794
+ status = row.status
799
795
  if status is not None:
800
796
  status = status_lib.ClusterStatus[status]
801
-
802
797
  record = {
803
- 'name': name,
804
- 'launched_at': _get_cluster_launch_time(cluster_hash),
805
- 'duration': _get_cluster_duration(cluster_hash),
806
- 'num_nodes': num_nodes,
807
- 'resources': pickle.loads(launched_resources),
808
- 'cluster_hash': cluster_hash,
809
- 'usage_intervals': pickle.loads(usage_intervals),
798
+ 'name': row.name,
799
+ 'launched_at': _get_cluster_launch_time(row.cluster_hash),
800
+ 'duration': _get_cluster_duration(row.cluster_hash),
801
+ 'num_nodes': row.num_nodes,
802
+ 'resources': pickle.loads(row.launched_resources),
803
+ 'cluster_hash': row.cluster_hash,
804
+ 'usage_intervals': pickle.loads(row.usage_intervals),
810
805
  'status': status,
811
806
  'user_hash': user_hash,
812
807
  }
@@ -819,30 +814,29 @@ def get_clusters_from_history() -> List[Dict[str, Any]]:
819
814
 
820
815
 
821
816
  def get_cluster_names_start_with(starts_with: str) -> List[str]:
822
- rows = _DB.cursor.execute('SELECT name FROM clusters WHERE name LIKE (?)',
823
- (f'{starts_with}%',))
824
- return [row[0] for row in rows]
817
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
818
+ rows = session.query(cluster_table).filter(
819
+ cluster_table.c.name.like(f'{starts_with}%')).all()
820
+ return [row.name for row in rows]
825
821
 
826
822
 
827
823
  def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
828
824
  workspace: str) -> List['clouds.Cloud']:
829
- # The table contains the cached enabled clouds for each workspace.
830
- rows = _DB.cursor.execute(
831
- 'SELECT value FROM config WHERE key = ?',
832
- (_get_enabled_clouds_key(cloud_capability, workspace),))
825
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
826
+ row = session.query(config_table).filter_by(
827
+ key=_get_enabled_clouds_key(cloud_capability, workspace)).first()
833
828
  ret = []
834
- for (value,) in rows:
835
- ret = json.loads(value)
836
- break
829
+ if row:
830
+ ret = json.loads(row.value)
837
831
  enabled_clouds: List['clouds.Cloud'] = []
838
832
  for c in ret:
839
833
  try:
840
834
  cloud = registry.CLOUD_REGISTRY.from_str(c)
841
835
  except ValueError:
842
- # Handle the case for the clouds whose support has been removed from
843
- # SkyPilot, e.g., 'local' was a cloud in the past and may be stored
844
- # in the database for users before #3037. We should ignore removed
845
- # clouds and continue.
836
+ # Handle the case for the clouds whose support has been
837
+ # removed from SkyPilot, e.g., 'local' was a cloud in the past
838
+ # and may be stored in the database for users before #3037.
839
+ # We should ignore removed clouds and continue.
846
840
  continue
847
841
  if cloud is not None:
848
842
  enabled_clouds.append(cloud)
@@ -852,10 +846,25 @@ def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
852
846
  def set_enabled_clouds(enabled_clouds: List[str],
853
847
  cloud_capability: 'cloud.CloudCapability',
854
848
  workspace: str) -> None:
855
- _DB.cursor.execute('INSERT OR REPLACE INTO config VALUES (?, ?)',
856
- (_get_enabled_clouds_key(cloud_capability, workspace),
857
- json.dumps(enabled_clouds)))
858
- _DB.conn.commit()
849
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
850
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
851
+ db_utils.SQLAlchemyDialect.SQLITE.value):
852
+ insert_stmnt = sqlite.insert(config_table).values(
853
+ key=_get_enabled_clouds_key(cloud_capability, workspace),
854
+ value=json.dumps(enabled_clouds))
855
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
856
+ index_elements=[config_table.c.key],
857
+ set_={config_table.c.value: json.dumps(enabled_clouds)})
858
+ session.execute(do_update_stmt)
859
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
860
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
861
+ # TODO(syang) support postgres dialect
862
+ session.rollback()
863
+ raise ValueError('Unsupported database dialect')
864
+ else:
865
+ session.rollback()
866
+ raise ValueError('Unsupported database dialect')
867
+ session.commit()
859
868
 
860
869
 
861
870
  def _get_enabled_clouds_key(cloud_capability: 'cloud.CloudCapability',
@@ -876,26 +885,48 @@ def add_or_update_storage(storage_name: str,
876
885
  if not status_check(storage_status):
877
886
  raise ValueError(f'Error in updating global state. Storage Status '
878
887
  f'{storage_status} is passed in incorrectly')
879
- _DB.cursor.execute('INSERT OR REPLACE INTO storage VALUES (?, ?, ?, ?, ?)',
880
- (storage_name, storage_launched_at, handle, last_use,
881
- storage_status.value))
882
- _DB.conn.commit()
888
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
889
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
890
+ db_utils.SQLAlchemyDialect.SQLITE.value):
891
+ insert_stmnt = sqlite.insert(storage_table).values(
892
+ name=storage_name,
893
+ handle=handle,
894
+ last_use=last_use,
895
+ launched_at=storage_launched_at,
896
+ status=storage_status.value)
897
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
898
+ index_elements=[storage_table.c.name],
899
+ set_={
900
+ storage_table.c.handle: handle,
901
+ storage_table.c.last_use: last_use,
902
+ storage_table.c.launched_at: storage_launched_at,
903
+ storage_table.c.status: storage_status.value
904
+ })
905
+ session.execute(do_update_stmt)
906
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
907
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
908
+ # TODO(syang) support postgres dialect
909
+ session.rollback()
910
+ raise ValueError('Unsupported database dialect')
911
+ else:
912
+ session.rollback()
913
+ raise ValueError('Unsupported database dialect')
914
+ session.commit()
883
915
 
884
916
 
885
917
  def remove_storage(storage_name: str):
886
918
  """Removes Storage from Database"""
887
- _DB.cursor.execute('DELETE FROM storage WHERE name=(?)', (storage_name,))
888
- _DB.conn.commit()
919
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
920
+ session.query(storage_table).filter_by(name=storage_name).delete()
921
+ session.commit()
889
922
 
890
923
 
891
924
  def set_storage_status(storage_name: str,
892
925
  status: status_lib.StorageStatus) -> None:
893
- _DB.cursor.execute('UPDATE storage SET status=(?) WHERE name=(?)', (
894
- status.value,
895
- storage_name,
896
- ))
897
- count = _DB.cursor.rowcount
898
- _DB.conn.commit()
926
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
927
+ count = session.query(storage_table).filter_by(
928
+ name=storage_name).update({storage_table.c.status: status.value})
929
+ session.commit()
899
930
  assert count <= 1, count
900
931
  if count == 0:
901
932
  raise ValueError(f'Storage {storage_name} not found.')
@@ -903,21 +934,20 @@ def set_storage_status(storage_name: str,
903
934
 
904
935
  def get_storage_status(storage_name: str) -> Optional[status_lib.StorageStatus]:
905
936
  assert storage_name is not None, 'storage_name cannot be None'
906
- rows = _DB.cursor.execute('SELECT status FROM storage WHERE name=(?)',
907
- (storage_name,))
908
- for (status,) in rows:
909
- return status_lib.StorageStatus[status]
937
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
938
+ row = session.query(storage_table).filter_by(name=storage_name).first()
939
+ if row:
940
+ return status_lib.StorageStatus[row.status]
910
941
  return None
911
942
 
912
943
 
913
944
  def set_storage_handle(storage_name: str,
914
945
  handle: 'Storage.StorageMetadata') -> None:
915
- _DB.cursor.execute('UPDATE storage SET handle=(?) WHERE name=(?)', (
916
- pickle.dumps(handle),
917
- storage_name,
918
- ))
919
- count = _DB.cursor.rowcount
920
- _DB.conn.commit()
946
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
947
+ count = session.query(storage_table).filter_by(
948
+ name=storage_name).update(
949
+ {storage_table.c.handle: pickle.dumps(handle)})
950
+ session.commit()
921
951
  assert count <= 1, count
922
952
  if count == 0:
923
953
  raise ValueError(f'Storage{storage_name} not found.')
@@ -927,38 +957,48 @@ def get_handle_from_storage_name(
927
957
  storage_name: Optional[str]) -> Optional['Storage.StorageMetadata']:
928
958
  if storage_name is None:
929
959
  return None
930
- rows = _DB.cursor.execute('SELECT handle FROM storage WHERE name=(?)',
931
- (storage_name,))
932
- for (handle,) in rows:
933
- if handle is None:
934
- return None
935
- return pickle.loads(handle)
960
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
961
+ row = session.query(storage_table).filter_by(name=storage_name).first()
962
+ if row:
963
+ return pickle.loads(row.handle)
936
964
  return None
937
965
 
938
966
 
939
967
  def get_glob_storage_name(storage_name: str) -> List[str]:
940
968
  assert storage_name is not None, 'storage_name cannot be None'
941
- rows = _DB.cursor.execute('SELECT name FROM storage WHERE name GLOB (?)',
942
- (storage_name,))
943
- return [row[0] for row in rows]
969
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
970
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
971
+ db_utils.SQLAlchemyDialect.SQLITE.value):
972
+ rows = session.query(storage_table).filter(
973
+ storage_table.c.name.op('GLOB')(storage_name)).all()
974
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
975
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
976
+ # TODO(syang) support postgres dialect
977
+ # postgres does not support GLOB
978
+ raise ValueError('Unsupported database dialect')
979
+ else:
980
+ raise ValueError('Unsupported database dialect')
981
+ return [row.name for row in rows]
944
982
 
945
983
 
946
984
  def get_storage_names_start_with(starts_with: str) -> List[str]:
947
- rows = _DB.cursor.execute('SELECT name FROM storage WHERE name LIKE (?)',
948
- (f'{starts_with}%',))
949
- return [row[0] for row in rows]
985
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
986
+ rows = session.query(storage_table).filter(
987
+ storage_table.c.name.like(f'{starts_with}%')).all()
988
+ return [row.name for row in rows]
950
989
 
951
990
 
952
991
  def get_storage() -> List[Dict[str, Any]]:
953
- rows = _DB.cursor.execute('SELECT * FROM storage')
992
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
993
+ rows = session.query(storage_table).all()
954
994
  records = []
955
- for name, launched_at, handle, last_use, status in rows:
995
+ for row in rows:
956
996
  # TODO: use namedtuple instead of dict
957
997
  records.append({
958
- 'name': name,
959
- 'launched_at': launched_at,
960
- 'handle': pickle.loads(handle),
961
- 'last_use': last_use,
962
- 'status': status_lib.StorageStatus[status],
998
+ 'name': row.name,
999
+ 'launched_at': row.launched_at,
1000
+ 'handle': pickle.loads(row.handle),
1001
+ 'last_use': row.last_use,
1002
+ 'status': status_lib.StorageStatus[row.status],
963
1003
  })
964
1004
  return records