skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +62 -45
  3. sky/backends/cloud_vm_ray_backend.py +3 -1
  4. sky/check.py +335 -170
  5. sky/cli.py +56 -13
  6. sky/client/cli.py +56 -13
  7. sky/client/sdk.py +54 -10
  8. sky/clouds/gcp.py +19 -3
  9. sky/core.py +5 -2
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
  15. sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
  16. sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
  17. sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
  25. sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
  29. sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
  34. sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  36. sky/dashboard/out/clusters/[cluster].html +1 -1
  37. sky/dashboard/out/clusters.html +1 -1
  38. sky/dashboard/out/index.html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/dashboard/out/users.html +1 -0
  43. sky/dashboard/out/workspace/new.html +1 -0
  44. sky/dashboard/out/workspaces/[name].html +1 -0
  45. sky/dashboard/out/workspaces.html +1 -0
  46. sky/data/storage.py +1 -1
  47. sky/global_user_state.py +606 -543
  48. sky/jobs/constants.py +1 -1
  49. sky/jobs/server/core.py +72 -56
  50. sky/jobs/state.py +26 -5
  51. sky/jobs/utils.py +65 -13
  52. sky/optimizer.py +6 -3
  53. sky/provision/fluidstack/instance.py +1 -0
  54. sky/serve/server/core.py +9 -6
  55. sky/server/html/token_page.html +6 -1
  56. sky/server/requests/executor.py +1 -0
  57. sky/server/requests/payloads.py +28 -0
  58. sky/server/server.py +59 -5
  59. sky/setup_files/dependencies.py +1 -0
  60. sky/skylet/constants.py +4 -1
  61. sky/skypilot_config.py +107 -11
  62. sky/utils/cli_utils/status_utils.py +18 -8
  63. sky/utils/db_utils.py +53 -0
  64. sky/utils/kubernetes/config_map_utils.py +133 -0
  65. sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
  66. sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
  67. sky/utils/kubernetes/ssh-tunnel.sh +20 -28
  68. sky/utils/log_utils.py +4 -0
  69. sky/utils/schemas.py +54 -0
  70. sky/workspaces/__init__.py +0 -0
  71. sky/workspaces/core.py +295 -0
  72. sky/workspaces/server.py +62 -0
  73. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
  74. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
  75. sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
  77. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  78. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
  79. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
  81. sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
  82. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
  85. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
  87. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  90. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
  91. /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
  92. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
  93. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
  94. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
  95. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/global_user_state.py CHANGED
@@ -10,14 +10,20 @@ import json
10
10
  import os
11
11
  import pathlib
12
12
  import pickle
13
- import sqlite3
14
13
  import time
15
14
  import typing
16
15
  from typing import Any, Dict, List, Optional, Set, Tuple
17
16
  import uuid
18
17
 
18
+ import sqlalchemy
19
+ from sqlalchemy import exc as sqlalchemy_exc
20
+ from sqlalchemy import orm
21
+ from sqlalchemy.dialects import sqlite
22
+ from sqlalchemy.ext import declarative
23
+
19
24
  from sky import models
20
25
  from sky import sky_logging
26
+ from sky.skylet import constants
21
27
  from sky.utils import common_utils
22
28
  from sky.utils import context_utils
23
29
  from sky.utils import db_utils
@@ -37,159 +43,215 @@ _ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
37
43
  _DB_PATH = os.path.expanduser('~/.sky/state.db')
38
44
  pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
39
45
 
40
-
41
- def create_table(cursor, conn):
46
+ _SQLALCHEMY_ENGINE = sqlalchemy.create_engine(f'sqlite:///{_DB_PATH}')
47
+
48
+ Base = declarative.declarative_base()
49
+
50
+ config_table = sqlalchemy.Table(
51
+ 'config',
52
+ Base.metadata,
53
+ sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
54
+ sqlalchemy.Column('value', sqlalchemy.Text),
55
+ )
56
+
57
+ user_table = sqlalchemy.Table(
58
+ 'users',
59
+ Base.metadata,
60
+ sqlalchemy.Column('id', sqlalchemy.Text, primary_key=True),
61
+ sqlalchemy.Column('name', sqlalchemy.Text),
62
+ )
63
+
64
+ cluster_table = sqlalchemy.Table(
65
+ 'clusters',
66
+ Base.metadata,
67
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
68
+ sqlalchemy.Column('launched_at', sqlalchemy.Integer),
69
+ sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
70
+ sqlalchemy.Column('last_use', sqlalchemy.Text),
71
+ sqlalchemy.Column('status', sqlalchemy.Text),
72
+ sqlalchemy.Column('autostop', sqlalchemy.Integer, server_default='-1'),
73
+ sqlalchemy.Column('to_down', sqlalchemy.Integer, server_default='0'),
74
+ sqlalchemy.Column('metadata', sqlalchemy.Text, server_default='{}'),
75
+ sqlalchemy.Column('owner', sqlalchemy.Text, server_default=None),
76
+ sqlalchemy.Column('cluster_hash', sqlalchemy.Text, server_default=None),
77
+ sqlalchemy.Column('storage_mounts_metadata',
78
+ sqlalchemy.LargeBinary,
79
+ server_default=None),
80
+ sqlalchemy.Column('cluster_ever_up', sqlalchemy.Integer,
81
+ server_default='0'),
82
+ sqlalchemy.Column('status_updated_at',
83
+ sqlalchemy.Integer,
84
+ server_default=None),
85
+ sqlalchemy.Column('config_hash', sqlalchemy.Text, server_default=None),
86
+ sqlalchemy.Column('user_hash', sqlalchemy.Text, server_default=None),
87
+ sqlalchemy.Column('workspace',
88
+ sqlalchemy.Text,
89
+ server_default=constants.SKYPILOT_DEFAULT_WORKSPACE),
90
+ )
91
+
92
+ storage_table = sqlalchemy.Table(
93
+ 'storage',
94
+ Base.metadata,
95
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
96
+ sqlalchemy.Column('launched_at', sqlalchemy.Integer),
97
+ sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
98
+ sqlalchemy.Column('last_use', sqlalchemy.Text),
99
+ sqlalchemy.Column('status', sqlalchemy.Text),
100
+ )
101
+
102
+ # Table for Cluster History
103
+ # usage_intervals: List[Tuple[int, int]]
104
+ # Specifies start and end timestamps of cluster.
105
+ # When the last end time is None, the cluster is still UP.
106
+ # Example: [(start1, end1), (start2, end2), (start3, None)]
107
+
108
+ # requested_resources: Set[resource_lib.Resource]
109
+ # Requested resources fetched from task that user specifies.
110
+
111
+ # launched_resources: Optional[resources_lib.Resources]
112
+ # Actual launched resources fetched from handle for cluster.
113
+
114
+ # num_nodes: Optional[int] number of nodes launched.
115
+ cluster_history_table = sqlalchemy.Table(
116
+ 'cluster_history',
117
+ Base.metadata,
118
+ sqlalchemy.Column('cluster_hash', sqlalchemy.Text, primary_key=True),
119
+ sqlalchemy.Column('name', sqlalchemy.Text),
120
+ sqlalchemy.Column('num_nodes', sqlalchemy.Integer),
121
+ sqlalchemy.Column('requested_resources', sqlalchemy.LargeBinary),
122
+ sqlalchemy.Column('launched_resources', sqlalchemy.LargeBinary),
123
+ sqlalchemy.Column('usage_intervals', sqlalchemy.LargeBinary),
124
+ sqlalchemy.Column('user_hash', sqlalchemy.Text),
125
+ )
126
+
127
+
128
+ def create_table():
42
129
  # Enable WAL mode to avoid locking issues.
43
130
  # See: issue #1441 and PR #1509
44
131
  # https://github.com/microsoft/WSL/issues/2395
45
132
  # TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
46
133
  # This may cause the database locked problem from WSL issue #1441.
47
- if not common_utils.is_wsl():
134
+ if (_SQLALCHEMY_ENGINE.dialect.name
135
+ == db_utils.SQLAlchemyDialect.SQLITE.value and
136
+ not common_utils.is_wsl()):
48
137
  try:
49
- cursor.execute('PRAGMA journal_mode=WAL')
50
- except sqlite3.OperationalError as e:
138
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
139
+ session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
140
+ session.commit()
141
+ except sqlalchemy_exc.OperationalError as e:
51
142
  if 'database is locked' not in str(e):
52
143
  raise
53
144
  # If the database is locked, it is OK to continue, as the WAL mode
54
145
  # is not critical and is likely to be enabled by other processes.
55
146
 
56
- # Table for Clusters
57
- cursor.execute("""\
58
- CREATE TABLE IF NOT EXISTS clusters (
59
- name TEXT PRIMARY KEY,
60
- launched_at INTEGER,
61
- handle BLOB,
62
- last_use TEXT,
63
- status TEXT,
64
- autostop INTEGER DEFAULT -1,
65
- metadata TEXT DEFAULT '{}',
66
- to_down INTEGER DEFAULT 0,
67
- owner TEXT DEFAULT null,
68
- cluster_hash TEXT DEFAULT null,
69
- storage_mounts_metadata BLOB DEFAULT null,
70
- cluster_ever_up INTEGER DEFAULT 0,
71
- status_updated_at INTEGER DEFAULT null,
72
- config_hash TEXT DEFAULT null,
73
- user_hash TEXT DEFAULT null)""")
74
-
75
- # Table for Cluster History
76
- # usage_intervals: List[Tuple[int, int]]
77
- # Specifies start and end timestamps of cluster.
78
- # When the last end time is None, the cluster is still UP.
79
- # Example: [(start1, end1), (start2, end2), (start3, None)]
80
-
81
- # requested_resources: Set[resource_lib.Resource]
82
- # Requested resources fetched from task that user specifies.
83
-
84
- # launched_resources: Optional[resources_lib.Resources]
85
- # Actual launched resources fetched from handle for cluster.
86
-
87
- # num_nodes: Optional[int] number of nodes launched.
88
-
89
- cursor.execute("""\
90
- CREATE TABLE IF NOT EXISTS cluster_history (
91
- cluster_hash TEXT PRIMARY KEY,
92
- name TEXT,
93
- num_nodes int,
94
- requested_resources BLOB,
95
- launched_resources BLOB,
96
- usage_intervals BLOB,
97
- user_hash TEXT)""")
98
- # Table for configs (e.g. enabled clouds)
99
- cursor.execute("""\
100
- CREATE TABLE IF NOT EXISTS config (
101
- key TEXT PRIMARY KEY, value TEXT)""")
102
- # Table for Storage
103
- cursor.execute("""\
104
- CREATE TABLE IF NOT EXISTS storage (
105
- name TEXT PRIMARY KEY,
106
- launched_at INTEGER,
107
- handle BLOB,
108
- last_use TEXT,
109
- status TEXT)""")
110
- # Table for User
111
- cursor.execute("""\
112
- CREATE TABLE IF NOT EXISTS users (
113
- id TEXT PRIMARY KEY,
114
- name TEXT)""")
147
+ # Create tables if they don't exist
148
+ Base.metadata.create_all(bind=_SQLALCHEMY_ENGINE)
149
+
115
150
  # For backward compatibility.
116
151
  # TODO(zhwu): Remove this function after all users have migrated to
117
152
  # the latest version of SkyPilot.
118
- # Add autostop column to clusters table
119
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'autostop',
120
- 'INTEGER DEFAULT -1')
121
-
122
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'metadata',
123
- 'TEXT DEFAULT \'{}\'')
124
-
125
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'to_down',
126
- 'INTEGER DEFAULT 0')
127
-
128
- # The cloud identity that created the cluster.
129
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'owner', 'TEXT')
130
-
131
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'cluster_hash',
132
- 'TEXT DEFAULT null')
133
-
134
- db_utils.add_column_to_table(cursor, conn, 'clusters',
135
- 'storage_mounts_metadata', 'BLOB DEFAULT null')
136
- db_utils.add_column_to_table(
137
- cursor,
138
- conn,
139
- 'clusters',
140
- 'cluster_ever_up',
141
- 'INTEGER DEFAULT 0',
142
- # Set the value to 1 so that all the existing clusters before #2977
143
- # are considered as ever up, i.e:
144
- # existing cluster's default (null) -> 1;
145
- # new cluster's default -> 0;
146
- # This is conservative for the existing clusters: even if some INIT
147
- # clusters were never really UP, setting it to 1 means they won't be
148
- # auto-deleted during any failover.
149
- value_to_replace_existing_entries=1)
150
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'status_updated_at',
151
- 'INTEGER DEFAULT null')
152
- db_utils.add_column_to_table(
153
- cursor,
154
- conn,
155
- 'clusters',
156
- 'user_hash',
157
- 'TEXT DEFAULT null',
158
- value_to_replace_existing_entries=common_utils.get_user_hash())
159
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'config_hash',
160
- 'TEXT DEFAULT null')
161
-
162
- db_utils.add_column_to_table(cursor, conn, 'clusters', 'config_hash',
163
- 'TEXT DEFAULT null')
164
-
165
- db_utils.add_column_to_table(cursor, conn, 'cluster_history', 'user_hash',
166
- 'TEXT DEFAULT null')
167
- conn.commit()
168
-
169
-
170
- _DB = db_utils.SQLiteConn(_DB_PATH, create_table)
153
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
154
+ # Add autostop column to clusters table
155
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'autostop',
156
+ 'INTEGER DEFAULT -1')
157
+
158
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'metadata',
159
+ 'TEXT DEFAULT \'{}\'')
160
+
161
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'to_down',
162
+ 'INTEGER DEFAULT 0')
163
+
164
+ # The cloud identity that created the cluster.
165
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters', 'owner',
166
+ 'TEXT')
167
+
168
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
169
+ 'cluster_hash',
170
+ 'TEXT DEFAULT null')
171
+
172
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
173
+ 'storage_mounts_metadata',
174
+ 'BLOB DEFAULT null')
175
+ db_utils.add_column_to_table_sqlalchemy(
176
+ session,
177
+ 'clusters',
178
+ 'cluster_ever_up',
179
+ 'INTEGER DEFAULT 0',
180
+ # Set the value to 1 so that all the existing clusters before #2977
181
+ # are considered as ever up, i.e:
182
+ # existing cluster's default (null) -> 1;
183
+ # new cluster's default -> 0;
184
+ # This is conservative for the existing clusters: even if some INIT
185
+ # clusters were never really UP, setting it to 1 means they won't be
186
+ # auto-deleted during any failover.
187
+ value_to_replace_existing_entries=1)
188
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
189
+ 'status_updated_at',
190
+ 'INTEGER DEFAULT null')
191
+ db_utils.add_column_to_table_sqlalchemy(
192
+ session,
193
+ 'clusters',
194
+ 'user_hash',
195
+ 'TEXT DEFAULT null',
196
+ value_to_replace_existing_entries=common_utils.get_user_hash())
197
+ db_utils.add_column_to_table_sqlalchemy(session, 'clusters',
198
+ 'config_hash',
199
+ 'TEXT DEFAULT null')
200
+
201
+ db_utils.add_column_to_table_sqlalchemy(session, 'cluster_history',
202
+ 'user_hash',
203
+ 'TEXT DEFAULT null')
204
+
205
+ db_utils.add_column_to_table_sqlalchemy(
206
+ session,
207
+ 'clusters',
208
+ 'workspace',
209
+ 'TEXT DEFAULT \'default\'',
210
+ value_to_replace_existing_entries=constants.
211
+ SKYPILOT_DEFAULT_WORKSPACE)
212
+ session.commit()
213
+
214
+
215
+ create_table()
171
216
 
172
217
 
173
218
  def add_or_update_user(user: models.User):
174
219
  """Store the mapping from user hash to user name for display purposes."""
175
220
  if user.name is None:
176
221
  return
177
- _DB.cursor.execute('INSERT OR REPLACE INTO users (id, name) VALUES (?, ?)',
178
- (user.id, user.name))
179
- _DB.conn.commit()
222
+
223
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
224
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
225
+ db_utils.SQLAlchemyDialect.SQLITE.value):
226
+ insert_stmnt = sqlite.insert(user_table).values(id=user.id,
227
+ name=user.name)
228
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
229
+ index_elements=[user_table.c.id],
230
+ set_={user_table.c.name: user.name})
231
+ session.execute(do_update_stmt)
232
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
233
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
234
+ # TODO(syang) support postgres dialect
235
+ session.rollback()
236
+ raise ValueError('Unsupported database dialect')
237
+ else:
238
+ session.rollback()
239
+ raise ValueError('Unsupported database dialect')
240
+ session.commit()
180
241
 
181
242
 
182
243
  def get_user(user_id: str) -> models.User:
183
- row = _DB.cursor.execute('SELECT id, name FROM users WHERE id=?',
184
- (user_id,)).fetchone()
244
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
245
+ row = session.query(user_table).filter_by(id=user_id).first()
185
246
  if row is None:
186
247
  return models.User(id=user_id)
187
- return models.User(id=row[0], name=row[1])
248
+ return models.User(id=row.id, name=row.name)
188
249
 
189
250
 
190
251
  def get_all_users() -> List[models.User]:
191
- rows = _DB.cursor.execute('SELECT id, name FROM users').fetchall()
192
- return [models.User(id=row[0], name=row[1]) for row in rows]
252
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
253
+ rows = session.query(user_table).all()
254
+ return [models.User(id=row.id, name=row.name) for row in rows]
193
255
 
194
256
 
195
257
  def add_or_update_cluster(cluster_name: str,
@@ -209,6 +271,9 @@ def add_or_update_cluster(cluster_name: str,
209
271
  is_launch: if the cluster is firstly launched. If True, the launched_at
210
272
  and last_use will be updated. Otherwise, use the old value.
211
273
  """
274
+ # TODO(zhwu): have to be imported here to avoid circular import.
275
+ from sky import skypilot_config # pylint: disable=import-outside-toplevel
276
+
212
277
  # FIXME: launched_at will be changed when `sky launch -c` is called.
213
278
  handle = pickle.dumps(cluster_handle)
214
279
  cluster_launched_at = int(time.time()) if is_launch else None
@@ -242,142 +307,118 @@ def add_or_update_cluster(cluster_name: str,
242
307
  usage_intervals.append((cluster_launched_at, None))
243
308
 
244
309
  user_hash = common_utils.get_user_hash()
310
+ active_workspace = skypilot_config.get_active_workspace()
245
311
 
246
- _DB.cursor.execute(
247
- 'INSERT or REPLACE INTO clusters'
248
- # All the fields need to exist here, even if they don't need
249
- # be changed, as the INSERT OR REPLACE statement will replace
250
- # the field of the existing row with the default value if not
251
- # specified.
252
- '(name, launched_at, handle, last_use, status, '
253
- 'autostop, to_down, metadata, owner, cluster_hash, '
254
- 'storage_mounts_metadata, cluster_ever_up, status_updated_at, '
255
- 'config_hash, user_hash) '
256
- 'VALUES ('
257
- # name
258
- '?, '
259
- # launched_at
260
- 'COALESCE('
261
- '?, (SELECT launched_at FROM clusters WHERE name=?)), '
262
- # handle
263
- '?, '
264
- # last_use
265
- 'COALESCE('
266
- '?, (SELECT last_use FROM clusters WHERE name=?)), '
267
- # status
268
- '?, '
269
- # autostop
270
- # Keep the old autostop value if it exists, otherwise set it to
271
- # default -1.
272
- 'COALESCE('
273
- '(SELECT autostop FROM clusters WHERE name=? AND status!=?), -1), '
274
- # Keep the old to_down value if it exists, otherwise set it to
275
- # default 0.
276
- 'COALESCE('
277
- '(SELECT to_down FROM clusters WHERE name=? AND status!=?), 0),'
278
- # Keep the old metadata value if it exists, otherwise set it to
279
- # default {}.
280
- 'COALESCE('
281
- '(SELECT metadata FROM clusters WHERE name=?), \'{}\'),'
282
- # Keep the old owner value if it exists, otherwise set it to
283
- # default null.
284
- 'COALESCE('
285
- '(SELECT owner FROM clusters WHERE name=?), null),'
286
- # cluster_hash
287
- '?,'
288
- # storage_mounts_metadata
289
- 'COALESCE('
290
- '(SELECT storage_mounts_metadata FROM clusters WHERE name=?), null), '
291
- # cluster_ever_up
292
- '((SELECT cluster_ever_up FROM clusters WHERE name=?) OR ?), '
293
- # status_updated_at
294
- '?,'
295
- # config_hash
296
- 'COALESCE(?, (SELECT config_hash FROM clusters WHERE name=?)),'
297
- # user_hash: keep original user_hash if it exists
298
- 'COALESCE('
299
- '(SELECT user_hash FROM clusters WHERE name=?), ?)'
300
- ')',
301
- (
302
- # name
303
- cluster_name,
304
- # launched_at
305
- cluster_launched_at,
306
- cluster_name,
307
- # handle
308
- handle,
309
- # last_use
310
- last_use,
311
- cluster_name,
312
- # status
313
- status.value,
314
- # autostop
315
- cluster_name,
316
- status_lib.ClusterStatus.STOPPED.value,
317
- # to_down
318
- cluster_name,
319
- status_lib.ClusterStatus.STOPPED.value,
320
- # metadata
321
- cluster_name,
322
- # owner
323
- cluster_name,
324
- # cluster_hash
325
- cluster_hash,
326
- # storage_mounts_metadata
327
- cluster_name,
328
- # cluster_ever_up
329
- cluster_name,
330
- int(ready),
331
- # status_updated_at
332
- status_updated_at,
333
- # config_hash
334
- config_hash,
335
- cluster_name,
336
- # user_hash
337
- cluster_name,
338
- user_hash,
339
- ))
340
-
341
- launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
342
- launched_resources = getattr(cluster_handle, 'launched_resources', None)
343
- _DB.cursor.execute(
344
- 'INSERT or REPLACE INTO cluster_history'
345
- '(cluster_hash, name, num_nodes, requested_resources, '
346
- 'launched_resources, usage_intervals, user_hash) '
347
- 'VALUES ('
348
- # hash
349
- '?, '
350
- # name
351
- '?, '
352
- # requested resources
353
- '?, '
354
- # launched resources
355
- '?, '
356
- # number of nodes
357
- '?, '
358
- # usage intervals
359
- '?, '
360
- # user_hash
361
- '?'
362
- ')',
363
- (
364
- # hash
365
- cluster_hash,
366
- # name
367
- cluster_name,
368
- # number of nodes
369
- launched_nodes,
370
- # requested resources
371
- pickle.dumps(requested_resources),
372
- # launched resources
373
- pickle.dumps(launched_resources),
374
- # usage intervals
375
- pickle.dumps(usage_intervals),
376
- # user_hash
377
- user_hash,
378
- ))
379
-
380
- _DB.conn.commit()
312
+ conditional_values = {}
313
+ if is_launch:
314
+ conditional_values.update({
315
+ 'launched_at': cluster_launched_at,
316
+ 'last_use': last_use
317
+ })
318
+
319
+ if int(ready) == 1:
320
+ conditional_values.update({
321
+ 'cluster_ever_up': 1,
322
+ })
323
+
324
+ if config_hash is not None:
325
+ conditional_values.update({
326
+ 'config_hash': config_hash,
327
+ })
328
+
329
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
330
+ # with_for_update() locks the row until commit() or rollback()
331
+ # is called, or until the code escapes the with block.
332
+ cluster_row = session.query(cluster_table).filter_by(
333
+ name=cluster_name).with_for_update().first()
334
+ if (not cluster_row or
335
+ cluster_row.status == status_lib.ClusterStatus.STOPPED.value):
336
+ conditional_values.update({
337
+ 'autostop': -1,
338
+ 'to_down': 0,
339
+ })
340
+ if not cluster_row or not cluster_row.user_hash:
341
+ conditional_values.update({
342
+ 'user_hash': user_hash,
343
+ })
344
+ if not cluster_row or not cluster_row.workspace:
345
+ conditional_values.update({
346
+ 'workspace': active_workspace,
347
+ })
348
+
349
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
350
+ db_utils.SQLAlchemyDialect.SQLITE.value):
351
+ insert_stmnt = sqlite.insert(cluster_table).values(
352
+ name=cluster_name,
353
+ **conditional_values,
354
+ handle=handle,
355
+ status=status.value,
356
+ # set metadata to server default ('{}')
357
+ # set owner to server default (null)
358
+ cluster_hash=cluster_hash,
359
+ # set storage_mounts_metadata to server default (null)
360
+ status_updated_at=status_updated_at,
361
+ )
362
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
363
+ index_elements=[cluster_table.c.name],
364
+ set_={
365
+ **conditional_values,
366
+ cluster_table.c.handle: handle,
367
+ cluster_table.c.status: status.value,
368
+ # do not update metadata value
369
+ # do not update owner value
370
+ cluster_table.c.cluster_hash: cluster_hash,
371
+ # do not update storage_mounts_metadata
372
+ cluster_table.c.status_updated_at: status_updated_at,
373
+ # do not update user_hash
374
+ })
375
+ session.execute(do_update_stmt)
376
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
377
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
378
+ # TODO(syang) support postgres dialect
379
+ session.rollback()
380
+ raise ValueError('Unsupported database dialect')
381
+ else:
382
+ session.rollback()
383
+ raise ValueError('Unsupported database dialect')
384
+
385
+ # Modify cluster history table
386
+ launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
387
+ launched_resources = getattr(cluster_handle, 'launched_resources', None)
388
+
389
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
390
+ db_utils.SQLAlchemyDialect.SQLITE.value):
391
+ insert_stmnt = sqlite.insert(cluster_history_table).values(
392
+ cluster_hash=cluster_hash,
393
+ name=cluster_name,
394
+ num_nodes=launched_nodes,
395
+ requested_resources=pickle.dumps(requested_resources),
396
+ launched_resources=pickle.dumps(launched_resources),
397
+ usage_intervals=pickle.dumps(usage_intervals),
398
+ user_hash=user_hash)
399
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
400
+ index_elements=[cluster_history_table.c.cluster_hash],
401
+ set_={
402
+ cluster_history_table.c.name: cluster_name,
403
+ cluster_history_table.c.num_nodes: launched_nodes,
404
+ cluster_history_table.c.requested_resources:
405
+ pickle.dumps(requested_resources),
406
+ cluster_history_table.c.launched_resources:
407
+ pickle.dumps(launched_resources),
408
+ cluster_history_table.c.usage_intervals:
409
+ pickle.dumps(usage_intervals),
410
+ cluster_history_table.c.user_hash: user_hash
411
+ })
412
+ session.execute(do_update_stmt)
413
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
414
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
415
+ # TODO(syang) support postgres dialect
416
+ session.rollback()
417
+ raise ValueError('Unsupported database dialect')
418
+ else:
419
+ session.rollback()
420
+ raise ValueError('Unsupported database dialect')
421
+ session.commit()
381
422
 
382
423
 
383
424
  def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
@@ -395,16 +436,18 @@ def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
395
436
  def update_cluster_handle(cluster_name: str,
396
437
  cluster_handle: 'backends.ResourceHandle'):
397
438
  handle = pickle.dumps(cluster_handle)
398
- _DB.cursor.execute('UPDATE clusters SET handle=(?) WHERE name=(?)',
399
- (handle, cluster_name))
400
- _DB.conn.commit()
439
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
440
+ session.query(cluster_table).filter_by(name=cluster_name).update(
441
+ {cluster_table.c.handle: handle})
442
+ session.commit()
401
443
 
402
444
 
403
445
  def update_last_use(cluster_name: str):
404
446
  """Updates the last used command for the cluster."""
405
- _DB.cursor.execute('UPDATE clusters SET last_use=(?) WHERE name=(?)',
406
- (common_utils.get_current_command(), cluster_name))
407
- _DB.conn.commit()
447
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
448
+ session.query(cluster_table).filter_by(name=cluster_name).update(
449
+ {cluster_table.c.last_use: common_utils.get_current_command()})
450
+ session.commit()
408
451
 
409
452
 
410
453
  def remove_cluster(cluster_name: str, terminate: bool) -> None:
@@ -412,63 +455,73 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
412
455
  cluster_hash = _get_hash_for_existing_cluster(cluster_name)
413
456
  usage_intervals = _get_cluster_usage_intervals(cluster_hash)
414
457
 
415
- # usage_intervals is not None and not empty
416
- if usage_intervals:
417
- assert cluster_hash is not None, cluster_name
418
- start_time = usage_intervals.pop()[0]
419
- end_time = int(time.time())
420
- usage_intervals.append((start_time, end_time))
421
- _set_cluster_usage_intervals(cluster_hash, usage_intervals)
422
-
423
- if terminate:
424
- _DB.cursor.execute('DELETE FROM clusters WHERE name=(?)',
425
- (cluster_name,))
426
- else:
427
- handle = get_handle_from_cluster_name(cluster_name)
428
- if handle is None:
429
- return
430
- # Must invalidate IP list to avoid directly trying to ssh into a
431
- # stopped VM, which leads to timeout.
432
- if hasattr(handle, 'stable_internal_external_ips'):
433
- handle = typing.cast('backends.CloudVmRayResourceHandle', handle)
434
- handle.stable_internal_external_ips = None
435
- current_time = int(time.time())
436
- _DB.cursor.execute(
437
- 'UPDATE clusters SET handle=(?), status=(?), '
438
- 'status_updated_at=(?) WHERE name=(?)', (
439
- pickle.dumps(handle),
440
- status_lib.ClusterStatus.STOPPED.value,
441
- current_time,
442
- cluster_name,
443
- ))
444
- _DB.conn.commit()
458
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
459
+ # usage_intervals is not None and not empty
460
+ if usage_intervals:
461
+ assert cluster_hash is not None, cluster_name
462
+ start_time = usage_intervals.pop()[0]
463
+ end_time = int(time.time())
464
+ usage_intervals.append((start_time, end_time))
465
+ _set_cluster_usage_intervals(cluster_hash, usage_intervals)
466
+
467
+ if terminate:
468
+ session.query(cluster_table).filter_by(name=cluster_name).delete()
469
+ else:
470
+ handle = get_handle_from_cluster_name(cluster_name)
471
+ if handle is None:
472
+ return
473
+ # Must invalidate IP list to avoid directly trying to ssh into a
474
+ # stopped VM, which leads to timeout.
475
+ if hasattr(handle, 'stable_internal_external_ips'):
476
+ handle = typing.cast('backends.CloudVmRayResourceHandle',
477
+ handle)
478
+ handle.stable_internal_external_ips = None
479
+ current_time = int(time.time())
480
+ session.query(cluster_table).filter_by(name=cluster_name).update({
481
+ cluster_table.c.handle: pickle.dumps(handle),
482
+ cluster_table.c.status: status_lib.ClusterStatus.STOPPED.value,
483
+ cluster_table.c.status_updated_at: current_time
484
+ })
485
+ session.commit()
445
486
 
446
487
 
447
488
  def get_handle_from_cluster_name(
448
489
  cluster_name: str) -> Optional['backends.ResourceHandle']:
449
490
  assert cluster_name is not None, 'cluster_name cannot be None'
450
- rows = _DB.cursor.execute('SELECT handle FROM clusters WHERE name=(?)',
451
- (cluster_name,))
452
- for (handle,) in rows:
453
- return pickle.loads(handle)
454
- return None
491
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
492
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
493
+ if row is None:
494
+ return None
495
+ return pickle.loads(row.handle)
455
496
 
456
497
 
457
498
  def get_glob_cluster_names(cluster_name: str) -> List[str]:
458
499
  assert cluster_name is not None, 'cluster_name cannot be None'
459
- rows = _DB.cursor.execute('SELECT name FROM clusters WHERE name GLOB (?)',
460
- (cluster_name,))
461
- return [row[0] for row in rows]
500
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
501
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
502
+ db_utils.SQLAlchemyDialect.SQLITE.value):
503
+ rows = session.query(cluster_table).filter(
504
+ cluster_table.c.name.op('GLOB')(cluster_name)).all()
505
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
506
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
507
+ # TODO(syang) support postgres dialect
508
+ # postgres does not support GLOB
509
+ raise ValueError('Unsupported database dialect')
510
+ else:
511
+ raise ValueError('Unsupported database dialect')
512
+ return [row.name for row in rows]
462
513
 
463
514
 
464
515
  def set_cluster_status(cluster_name: str,
465
516
  status: status_lib.ClusterStatus) -> None:
466
517
  current_time = int(time.time())
467
- _DB.cursor.execute(
468
- 'UPDATE clusters SET status=(?), status_updated_at=(?) WHERE name=(?)',
469
- (status.value, current_time, cluster_name))
470
- count = _DB.cursor.rowcount
471
- _DB.conn.commit()
518
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
519
+ count = session.query(cluster_table).filter_by(
520
+ name=cluster_name).update({
521
+ cluster_table.c.status: status.value,
522
+ cluster_table.c.status_updated_at: current_time
523
+ })
524
+ session.commit()
472
525
  assert count <= 1, count
473
526
  if count == 0:
474
527
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -476,46 +529,40 @@ def set_cluster_status(cluster_name: str,
476
529
 
477
530
  def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
478
531
  to_down: bool) -> None:
479
- _DB.cursor.execute(
480
- 'UPDATE clusters SET autostop=(?), to_down=(?) WHERE name=(?)', (
481
- idle_minutes,
482
- int(to_down),
483
- cluster_name,
484
- ))
485
- count = _DB.cursor.rowcount
486
- _DB.conn.commit()
532
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
533
+ count = session.query(cluster_table).filter_by(
534
+ name=cluster_name).update({
535
+ cluster_table.c.autostop: idle_minutes,
536
+ cluster_table.c.to_down: int(to_down)
537
+ })
538
+ session.commit()
487
539
  assert count <= 1, count
488
540
  if count == 0:
489
541
  raise ValueError(f'Cluster {cluster_name} not found.')
490
542
 
491
543
 
492
544
  def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
493
- rows = _DB.cursor.execute('SELECT launched_at FROM clusters WHERE name=(?)',
494
- (cluster_name,))
495
- for (launch_time,) in rows:
496
- if launch_time is None:
497
- return None
498
- return int(launch_time)
499
- return None
545
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
546
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
547
+ if row is None or row.launched_at is None:
548
+ return None
549
+ return int(row.launched_at)
500
550
 
501
551
 
502
552
  def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
503
- rows = _DB.cursor.execute('SELECT metadata FROM clusters WHERE name=(?)',
504
- (cluster_name,))
505
- for (metadata,) in rows:
506
- if metadata is None:
507
- return None
508
- return json.loads(metadata)
509
- return None
553
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
554
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
555
+ if row is None or row.metadata is None:
556
+ return None
557
+ return json.loads(row.metadata)
510
558
 
511
559
 
512
560
  def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
513
- _DB.cursor.execute('UPDATE clusters SET metadata=(?) WHERE name=(?)', (
514
- json.dumps(metadata),
515
- cluster_name,
516
- ))
517
- count = _DB.cursor.rowcount
518
- _DB.conn.commit()
561
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
562
+ count = session.query(cluster_table).filter_by(
563
+ name=cluster_name).update(
564
+ {cluster_table.c.metadata: json.dumps(metadata)})
565
+ session.commit()
519
566
  assert count <= 1, count
520
567
  if count == 0:
521
568
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -523,25 +570,22 @@ def set_cluster_info(cluster_name: str, metadata: Dict[str, Any]) -> None:
523
570
 
524
571
  def get_cluster_storage_mounts_metadata(
525
572
  cluster_name: str) -> Optional[Dict[str, Any]]:
526
- rows = _DB.cursor.execute(
527
- 'SELECT storage_mounts_metadata FROM clusters WHERE name=(?)',
528
- (cluster_name,))
529
- for (storage_mounts_metadata,) in rows:
530
- if storage_mounts_metadata is None:
531
- return None
532
- return pickle.loads(storage_mounts_metadata)
533
- return None
573
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
574
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
575
+ if row is None or row.storage_mounts_metadata is None:
576
+ return None
577
+ return pickle.loads(row.storage_mounts_metadata)
534
578
 
535
579
 
536
580
  def set_cluster_storage_mounts_metadata(
537
581
  cluster_name: str, storage_mounts_metadata: Dict[str, Any]) -> None:
538
- _DB.cursor.execute(
539
- 'UPDATE clusters SET storage_mounts_metadata=(?) WHERE name=(?)', (
540
- pickle.dumps(storage_mounts_metadata),
541
- cluster_name,
542
- ))
543
- count = _DB.cursor.rowcount
544
- _DB.conn.commit()
582
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
583
+ count = session.query(cluster_table).filter_by(
584
+ name=cluster_name).update({
585
+ cluster_table.c.storage_mounts_metadata:
586
+ pickle.dumps(storage_mounts_metadata)
587
+ })
588
+ session.commit()
545
589
  assert count <= 1, count
546
590
  if count == 0:
547
591
  raise ValueError(f'Cluster {cluster_name} not found.')
@@ -552,14 +596,12 @@ def _get_cluster_usage_intervals(
552
596
  ) -> Optional[List[Tuple[int, Optional[int]]]]:
553
597
  if cluster_hash is None:
554
598
  return None
555
- rows = _DB.cursor.execute(
556
- 'SELECT usage_intervals FROM cluster_history WHERE cluster_hash=(?)',
557
- (cluster_hash,))
558
- for (usage_intervals,) in rows:
559
- if usage_intervals is None:
560
- return None
561
- return pickle.loads(usage_intervals)
562
- return None
599
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
600
+ row = session.query(cluster_history_table).filter_by(
601
+ cluster_hash=cluster_hash).first()
602
+ if row is None or row.usage_intervals is None:
603
+ return None
604
+ return pickle.loads(row.usage_intervals)
563
605
 
564
606
 
565
607
  def _get_cluster_launch_time(cluster_hash: str) -> Optional[int]:
@@ -591,15 +633,13 @@ def _get_cluster_duration(cluster_hash: str) -> int:
591
633
  def _set_cluster_usage_intervals(
592
634
  cluster_hash: str, usage_intervals: List[Tuple[int,
593
635
  Optional[int]]]) -> None:
594
- _DB.cursor.execute(
595
- 'UPDATE cluster_history SET usage_intervals=(?) WHERE cluster_hash=(?)',
596
- (
597
- pickle.dumps(usage_intervals),
598
- cluster_hash,
599
- ))
600
-
601
- count = _DB.cursor.rowcount
602
- _DB.conn.commit()
636
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
637
+ count = session.query(cluster_history_table).filter_by(
638
+ cluster_hash=cluster_hash).update({
639
+ cluster_history_table.c.usage_intervals:
640
+ pickle.dumps(usage_intervals)
641
+ })
642
+ session.commit()
603
643
  assert count <= 1, count
604
644
  if count == 0:
605
645
  raise ValueError(f'Cluster hash {cluster_hash} not found.')
@@ -610,38 +650,38 @@ def set_owner_identity_for_cluster(cluster_name: str,
610
650
  if owner_identity is None:
611
651
  return
612
652
  owner_identity_str = json.dumps(owner_identity)
613
- _DB.cursor.execute('UPDATE clusters SET owner=(?) WHERE name=(?)',
614
- (owner_identity_str, cluster_name))
615
-
616
- count = _DB.cursor.rowcount
617
- _DB.conn.commit()
653
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
654
+ count = session.query(cluster_table).filter_by(
655
+ name=cluster_name).update(
656
+ {cluster_table.c.owner: owner_identity_str})
657
+ session.commit()
618
658
  assert count <= 1, count
619
659
  if count == 0:
620
660
  raise ValueError(f'Cluster {cluster_name} not found.')
621
661
 
622
662
 
623
663
  def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
624
- rows = _DB.cursor.execute(
625
- 'SELECT cluster_hash FROM clusters WHERE name=(?)', (cluster_name,))
626
- for (cluster_hash,) in rows:
627
- if cluster_hash is None:
628
- return None
629
- return cluster_hash
630
- return None
664
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
665
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
666
+ if row is None or row.cluster_hash is None:
667
+ return None
668
+ return row.cluster_hash
631
669
 
632
670
 
633
671
  def get_launched_resources_from_cluster_hash(
634
672
  cluster_hash: str) -> Optional[Tuple[int, Any]]:
673
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
674
+ row = session.query(cluster_history_table).filter_by(
675
+ cluster_hash=cluster_hash).first()
676
+ if row is None:
677
+ return None
678
+ num_nodes = row.num_nodes
679
+ launched_resources = row.launched_resources
635
680
 
636
- rows = _DB.cursor.execute(
637
- 'SELECT num_nodes, launched_resources '
638
- 'FROM cluster_history WHERE cluster_hash=(?)', (cluster_hash,))
639
- for (num_nodes, launched_resources) in rows:
640
- if num_nodes is None or launched_resources is None:
641
- return None
642
- launched_resources = pickle.loads(launched_resources)
643
- return num_nodes, launched_resources
644
- return None
681
+ if num_nodes is None or launched_resources is None:
682
+ return None
683
+ launched_resources = pickle.loads(launched_resources)
684
+ return num_nodes, launched_resources
645
685
 
646
686
 
647
687
  def _load_owner(record_owner: Optional[str]) -> Optional[List[str]]:
@@ -675,74 +715,62 @@ def _load_storage_mounts_metadata(
675
715
  @context_utils.cancellation_guard
676
716
  def get_cluster_from_name(
677
717
  cluster_name: Optional[str]) -> Optional[Dict[str, Any]]:
678
- rows = _DB.cursor.execute(
679
- 'SELECT name, launched_at, handle, last_use, status, autostop, '
680
- 'metadata, to_down, owner, cluster_hash, storage_mounts_metadata, '
681
- 'cluster_ever_up, status_updated_at, config_hash, user_hash '
682
- 'FROM clusters WHERE name=(?)', (cluster_name,)).fetchall()
683
- for row in rows:
684
- # Explicitly specify the number of fields to unpack, so that
685
- # we can add new fields to the database in the future without
686
- # breaking the previous code.
687
- (name, launched_at, handle, last_use, status, autostop, metadata,
688
- to_down, owner, cluster_hash, storage_mounts_metadata, cluster_ever_up,
689
- status_updated_at, config_hash, user_hash) = row
690
- user_hash = _get_user_hash_or_current_user(user_hash)
691
- # TODO: use namedtuple instead of dict
692
- record = {
693
- 'name': name,
694
- 'launched_at': launched_at,
695
- 'handle': pickle.loads(handle),
696
- 'last_use': last_use,
697
- 'status': status_lib.ClusterStatus[status],
698
- 'autostop': autostop,
699
- 'to_down': bool(to_down),
700
- 'owner': _load_owner(owner),
701
- 'metadata': json.loads(metadata),
702
- 'cluster_hash': cluster_hash,
703
- 'storage_mounts_metadata':
704
- _load_storage_mounts_metadata(storage_mounts_metadata),
705
- 'cluster_ever_up': bool(cluster_ever_up),
706
- 'status_updated_at': status_updated_at,
707
- 'user_hash': user_hash,
708
- 'user_name': get_user(user_hash).name,
709
- 'config_hash': config_hash,
710
- }
711
- return record
712
- return None
718
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
719
+ row = session.query(cluster_table).filter_by(name=cluster_name).first()
720
+ if row is None:
721
+ return None
722
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
723
+ # TODO: use namedtuple instead of dict
724
+ record = {
725
+ 'name': row.name,
726
+ 'launched_at': row.launched_at,
727
+ 'handle': pickle.loads(row.handle),
728
+ 'last_use': row.last_use,
729
+ 'status': status_lib.ClusterStatus[row.status],
730
+ 'autostop': row.autostop,
731
+ 'to_down': bool(row.to_down),
732
+ 'owner': _load_owner(row.owner),
733
+ 'metadata': json.loads(row.metadata),
734
+ 'cluster_hash': row.cluster_hash,
735
+ 'storage_mounts_metadata': _load_storage_mounts_metadata(
736
+ row.storage_mounts_metadata),
737
+ 'cluster_ever_up': bool(row.cluster_ever_up),
738
+ 'status_updated_at': row.status_updated_at,
739
+ 'user_hash': user_hash,
740
+ 'user_name': get_user(user_hash).name,
741
+ 'config_hash': row.config_hash,
742
+ 'workspace': row.workspace,
743
+ }
744
+ return record
713
745
 
714
746
 
715
747
  def get_clusters() -> List[Dict[str, Any]]:
716
- rows = _DB.cursor.execute(
717
- 'select name, launched_at, handle, last_use, status, autostop, '
718
- 'metadata, to_down, owner, cluster_hash, storage_mounts_metadata, '
719
- 'cluster_ever_up, status_updated_at, config_hash, user_hash '
720
- 'from clusters order by launched_at desc').fetchall()
748
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
749
+ rows = session.query(cluster_table).order_by(
750
+ sqlalchemy.desc(cluster_table.c.launched_at)).all()
721
751
  records = []
722
752
  for row in rows:
723
- (name, launched_at, handle, last_use, status, autostop, metadata,
724
- to_down, owner, cluster_hash, storage_mounts_metadata, cluster_ever_up,
725
- status_updated_at, config_hash, user_hash) = row
726
- user_hash = _get_user_hash_or_current_user(user_hash)
753
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
727
754
  # TODO: use namedtuple instead of dict
728
755
  record = {
729
- 'name': name,
730
- 'launched_at': launched_at,
731
- 'handle': pickle.loads(handle),
732
- 'last_use': last_use,
733
- 'status': status_lib.ClusterStatus[status],
734
- 'autostop': autostop,
735
- 'to_down': bool(to_down),
736
- 'owner': _load_owner(owner),
737
- 'metadata': json.loads(metadata),
738
- 'cluster_hash': cluster_hash,
739
- 'storage_mounts_metadata':
740
- _load_storage_mounts_metadata(storage_mounts_metadata),
741
- 'cluster_ever_up': bool(cluster_ever_up),
742
- 'status_updated_at': status_updated_at,
756
+ 'name': row.name,
757
+ 'launched_at': row.launched_at,
758
+ 'handle': pickle.loads(row.handle),
759
+ 'last_use': row.last_use,
760
+ 'status': status_lib.ClusterStatus[row.status],
761
+ 'autostop': row.autostop,
762
+ 'to_down': bool(row.to_down),
763
+ 'owner': _load_owner(row.owner),
764
+ 'metadata': json.loads(row.metadata),
765
+ 'cluster_hash': row.cluster_hash,
766
+ 'storage_mounts_metadata': _load_storage_mounts_metadata(
767
+ row.storage_mounts_metadata),
768
+ 'cluster_ever_up': bool(row.cluster_ever_up),
769
+ 'status_updated_at': row.status_updated_at,
743
770
  'user_hash': user_hash,
744
771
  'user_name': get_user(user_hash).name,
745
- 'config_hash': config_hash,
772
+ 'config_hash': row.config_hash,
773
+ 'workspace': row.workspace,
746
774
  }
747
775
 
748
776
  records.append(record)
@@ -750,43 +778,30 @@ def get_clusters() -> List[Dict[str, Any]]:
750
778
 
751
779
 
752
780
  def get_clusters_from_history() -> List[Dict[str, Any]]:
753
- rows = _DB.cursor.execute(
754
- 'SELECT ch.cluster_hash, ch.name, ch.num_nodes, '
755
- 'ch.launched_resources, ch.usage_intervals, clusters.status, '
756
- 'ch.user_hash '
757
- 'FROM cluster_history ch '
758
- 'LEFT OUTER JOIN clusters '
759
- 'ON ch.cluster_hash=clusters.cluster_hash ').fetchall()
781
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
782
+ rows = session.query(
783
+ cluster_history_table.join(cluster_table,
784
+ cluster_history_table.c.cluster_hash ==
785
+ cluster_table.c.cluster_hash,
786
+ isouter=True)).all()
760
787
 
761
788
  # '(cluster_hash, name, num_nodes, requested_resources, '
762
789
  # 'launched_resources, usage_intervals) '
763
790
  records = []
764
-
765
791
  for row in rows:
766
792
  # TODO: use namedtuple instead of dict
767
-
768
- (
769
- cluster_hash,
770
- name,
771
- num_nodes,
772
- launched_resources,
773
- usage_intervals,
774
- status,
775
- user_hash,
776
- ) = row[:7]
777
- user_hash = _get_user_hash_or_current_user(user_hash)
778
-
793
+ user_hash = _get_user_hash_or_current_user(row.user_hash)
794
+ status = row.status
779
795
  if status is not None:
780
796
  status = status_lib.ClusterStatus[status]
781
-
782
797
  record = {
783
- 'name': name,
784
- 'launched_at': _get_cluster_launch_time(cluster_hash),
785
- 'duration': _get_cluster_duration(cluster_hash),
786
- 'num_nodes': num_nodes,
787
- 'resources': pickle.loads(launched_resources),
788
- 'cluster_hash': cluster_hash,
789
- 'usage_intervals': pickle.loads(usage_intervals),
798
+ 'name': row.name,
799
+ 'launched_at': _get_cluster_launch_time(row.cluster_hash),
800
+ 'duration': _get_cluster_duration(row.cluster_hash),
801
+ 'num_nodes': row.num_nodes,
802
+ 'resources': pickle.loads(row.launched_resources),
803
+ 'cluster_hash': row.cluster_hash,
804
+ 'usage_intervals': pickle.loads(row.usage_intervals),
790
805
  'status': status,
791
806
  'user_hash': user_hash,
792
807
  }
@@ -799,29 +814,29 @@ def get_clusters_from_history() -> List[Dict[str, Any]]:
799
814
 
800
815
 
801
816
  def get_cluster_names_start_with(starts_with: str) -> List[str]:
802
- rows = _DB.cursor.execute('SELECT name FROM clusters WHERE name LIKE (?)',
803
- (f'{starts_with}%',))
804
- return [row[0] for row in rows]
805
-
817
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
818
+ rows = session.query(cluster_table).filter(
819
+ cluster_table.c.name.like(f'{starts_with}%')).all()
820
+ return [row.name for row in rows]
806
821
 
807
- def get_cached_enabled_clouds(
808
- cloud_capability: 'cloud.CloudCapability') -> List['clouds.Cloud']:
809
822
 
810
- rows = _DB.cursor.execute('SELECT value FROM config WHERE key = ?',
811
- (_get_capability_key(cloud_capability),))
823
+ def get_cached_enabled_clouds(cloud_capability: 'cloud.CloudCapability',
824
+ workspace: str) -> List['clouds.Cloud']:
825
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
826
+ row = session.query(config_table).filter_by(
827
+ key=_get_enabled_clouds_key(cloud_capability, workspace)).first()
812
828
  ret = []
813
- for (value,) in rows:
814
- ret = json.loads(value)
815
- break
829
+ if row:
830
+ ret = json.loads(row.value)
816
831
  enabled_clouds: List['clouds.Cloud'] = []
817
832
  for c in ret:
818
833
  try:
819
834
  cloud = registry.CLOUD_REGISTRY.from_str(c)
820
835
  except ValueError:
821
- # Handle the case for the clouds whose support has been removed from
822
- # SkyPilot, e.g., 'local' was a cloud in the past and may be stored
823
- # in the database for users before #3037. We should ignore removed
824
- # clouds and continue.
836
+ # Handle the case for the clouds whose support has been
837
+ # removed from SkyPilot, e.g., 'local' was a cloud in the past
838
+ # and may be stored in the database for users before #3037.
839
+ # We should ignore removed clouds and continue.
825
840
  continue
826
841
  if cloud is not None:
827
842
  enabled_clouds.append(cloud)
@@ -829,15 +844,32 @@ def get_cached_enabled_clouds(
829
844
 
830
845
 
831
846
  def set_enabled_clouds(enabled_clouds: List[str],
832
- cloud_capability: 'cloud.CloudCapability') -> None:
833
- _DB.cursor.execute(
834
- 'INSERT OR REPLACE INTO config VALUES (?, ?)',
835
- (_get_capability_key(cloud_capability), json.dumps(enabled_clouds)))
836
- _DB.conn.commit()
837
-
838
-
839
- def _get_capability_key(cloud_capability: 'cloud.CloudCapability') -> str:
840
- return _ENABLED_CLOUDS_KEY_PREFIX + cloud_capability.value
847
+ cloud_capability: 'cloud.CloudCapability',
848
+ workspace: str) -> None:
849
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
850
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
851
+ db_utils.SQLAlchemyDialect.SQLITE.value):
852
+ insert_stmnt = sqlite.insert(config_table).values(
853
+ key=_get_enabled_clouds_key(cloud_capability, workspace),
854
+ value=json.dumps(enabled_clouds))
855
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
856
+ index_elements=[config_table.c.key],
857
+ set_={config_table.c.value: json.dumps(enabled_clouds)})
858
+ session.execute(do_update_stmt)
859
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
860
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
861
+ # TODO(syang) support postgres dialect
862
+ session.rollback()
863
+ raise ValueError('Unsupported database dialect')
864
+ else:
865
+ session.rollback()
866
+ raise ValueError('Unsupported database dialect')
867
+ session.commit()
868
+
869
+
870
+ def _get_enabled_clouds_key(cloud_capability: 'cloud.CloudCapability',
871
+ workspace: str) -> str:
872
+ return _ENABLED_CLOUDS_KEY_PREFIX + workspace + '_' + cloud_capability.value
841
873
 
842
874
 
843
875
  def add_or_update_storage(storage_name: str,
@@ -853,26 +885,48 @@ def add_or_update_storage(storage_name: str,
853
885
  if not status_check(storage_status):
854
886
  raise ValueError(f'Error in updating global state. Storage Status '
855
887
  f'{storage_status} is passed in incorrectly')
856
- _DB.cursor.execute('INSERT OR REPLACE INTO storage VALUES (?, ?, ?, ?, ?)',
857
- (storage_name, storage_launched_at, handle, last_use,
858
- storage_status.value))
859
- _DB.conn.commit()
888
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
889
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
890
+ db_utils.SQLAlchemyDialect.SQLITE.value):
891
+ insert_stmnt = sqlite.insert(storage_table).values(
892
+ name=storage_name,
893
+ handle=handle,
894
+ last_use=last_use,
895
+ launched_at=storage_launched_at,
896
+ status=storage_status.value)
897
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
898
+ index_elements=[storage_table.c.name],
899
+ set_={
900
+ storage_table.c.handle: handle,
901
+ storage_table.c.last_use: last_use,
902
+ storage_table.c.launched_at: storage_launched_at,
903
+ storage_table.c.status: storage_status.value
904
+ })
905
+ session.execute(do_update_stmt)
906
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
907
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
908
+ # TODO(syang) support postgres dialect
909
+ session.rollback()
910
+ raise ValueError('Unsupported database dialect')
911
+ else:
912
+ session.rollback()
913
+ raise ValueError('Unsupported database dialect')
914
+ session.commit()
860
915
 
861
916
 
862
917
  def remove_storage(storage_name: str):
863
918
  """Removes Storage from Database"""
864
- _DB.cursor.execute('DELETE FROM storage WHERE name=(?)', (storage_name,))
865
- _DB.conn.commit()
919
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
920
+ session.query(storage_table).filter_by(name=storage_name).delete()
921
+ session.commit()
866
922
 
867
923
 
868
924
  def set_storage_status(storage_name: str,
869
925
  status: status_lib.StorageStatus) -> None:
870
- _DB.cursor.execute('UPDATE storage SET status=(?) WHERE name=(?)', (
871
- status.value,
872
- storage_name,
873
- ))
874
- count = _DB.cursor.rowcount
875
- _DB.conn.commit()
926
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
927
+ count = session.query(storage_table).filter_by(
928
+ name=storage_name).update({storage_table.c.status: status.value})
929
+ session.commit()
876
930
  assert count <= 1, count
877
931
  if count == 0:
878
932
  raise ValueError(f'Storage {storage_name} not found.')
@@ -880,21 +934,20 @@ def set_storage_status(storage_name: str,
880
934
 
881
935
  def get_storage_status(storage_name: str) -> Optional[status_lib.StorageStatus]:
882
936
  assert storage_name is not None, 'storage_name cannot be None'
883
- rows = _DB.cursor.execute('SELECT status FROM storage WHERE name=(?)',
884
- (storage_name,))
885
- for (status,) in rows:
886
- return status_lib.StorageStatus[status]
937
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
938
+ row = session.query(storage_table).filter_by(name=storage_name).first()
939
+ if row:
940
+ return status_lib.StorageStatus[row.status]
887
941
  return None
888
942
 
889
943
 
890
944
  def set_storage_handle(storage_name: str,
891
945
  handle: 'Storage.StorageMetadata') -> None:
892
- _DB.cursor.execute('UPDATE storage SET handle=(?) WHERE name=(?)', (
893
- pickle.dumps(handle),
894
- storage_name,
895
- ))
896
- count = _DB.cursor.rowcount
897
- _DB.conn.commit()
946
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
947
+ count = session.query(storage_table).filter_by(
948
+ name=storage_name).update(
949
+ {storage_table.c.handle: pickle.dumps(handle)})
950
+ session.commit()
898
951
  assert count <= 1, count
899
952
  if count == 0:
900
953
  raise ValueError(f'Storage{storage_name} not found.')
@@ -904,38 +957,48 @@ def get_handle_from_storage_name(
904
957
  storage_name: Optional[str]) -> Optional['Storage.StorageMetadata']:
905
958
  if storage_name is None:
906
959
  return None
907
- rows = _DB.cursor.execute('SELECT handle FROM storage WHERE name=(?)',
908
- (storage_name,))
909
- for (handle,) in rows:
910
- if handle is None:
911
- return None
912
- return pickle.loads(handle)
960
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
961
+ row = session.query(storage_table).filter_by(name=storage_name).first()
962
+ if row:
963
+ return pickle.loads(row.handle)
913
964
  return None
914
965
 
915
966
 
916
967
  def get_glob_storage_name(storage_name: str) -> List[str]:
917
968
  assert storage_name is not None, 'storage_name cannot be None'
918
- rows = _DB.cursor.execute('SELECT name FROM storage WHERE name GLOB (?)',
919
- (storage_name,))
920
- return [row[0] for row in rows]
969
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
970
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
971
+ db_utils.SQLAlchemyDialect.SQLITE.value):
972
+ rows = session.query(storage_table).filter(
973
+ storage_table.c.name.op('GLOB')(storage_name)).all()
974
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
975
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
976
+ # TODO(syang) support postgres dialect
977
+ # postgres does not support GLOB
978
+ raise ValueError('Unsupported database dialect')
979
+ else:
980
+ raise ValueError('Unsupported database dialect')
981
+ return [row.name for row in rows]
921
982
 
922
983
 
923
984
  def get_storage_names_start_with(starts_with: str) -> List[str]:
924
- rows = _DB.cursor.execute('SELECT name FROM storage WHERE name LIKE (?)',
925
- (f'{starts_with}%',))
926
- return [row[0] for row in rows]
985
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
986
+ rows = session.query(storage_table).filter(
987
+ storage_table.c.name.like(f'{starts_with}%')).all()
988
+ return [row.name for row in rows]
927
989
 
928
990
 
929
991
  def get_storage() -> List[Dict[str, Any]]:
930
- rows = _DB.cursor.execute('SELECT * FROM storage')
992
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
993
+ rows = session.query(storage_table).all()
931
994
  records = []
932
- for name, launched_at, handle, last_use, status in rows:
995
+ for row in rows:
933
996
  # TODO: use namedtuple instead of dict
934
997
  records.append({
935
- 'name': name,
936
- 'launched_at': launched_at,
937
- 'handle': pickle.loads(handle),
938
- 'last_use': last_use,
939
- 'status': status_lib.StorageStatus[status],
998
+ 'name': row.name,
999
+ 'launched_at': row.launched_at,
1000
+ 'handle': pickle.loads(row.handle),
1001
+ 'last_use': row.last_use,
1002
+ 'status': status_lib.StorageStatus[row.status],
940
1003
  })
941
1004
  return records