skypilot-nightly 1.0.0.dev20250623__py3-none-any.whl → 1.0.0.dev20250625__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/admin_policy.py +16 -5
- sky/backends/__init__.py +2 -1
- sky/backends/backend_utils.py +38 -11
- sky/backends/cloud_vm_ray_backend.py +52 -18
- sky/client/cli/command.py +264 -25
- sky/client/sdk.py +119 -85
- sky/clouds/aws.py +10 -7
- sky/clouds/azure.py +10 -7
- sky/clouds/cloud.py +2 -0
- sky/clouds/cudo.py +2 -0
- sky/clouds/do.py +10 -7
- sky/clouds/fluidstack.py +2 -0
- sky/clouds/gcp.py +10 -7
- sky/clouds/hyperbolic.py +10 -7
- sky/clouds/ibm.py +2 -0
- sky/clouds/kubernetes.py +27 -9
- sky/clouds/lambda_cloud.py +10 -7
- sky/clouds/nebius.py +10 -7
- sky/clouds/oci.py +10 -7
- sky/clouds/paperspace.py +10 -7
- sky/clouds/runpod.py +10 -7
- sky/clouds/scp.py +10 -7
- sky/clouds/vast.py +10 -7
- sky/clouds/vsphere.py +2 -0
- sky/core.py +89 -15
- sky/dag.py +14 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
- sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +16 -0
- sky/dashboard/out/_next/static/chunks/37-1f1e94f5a561202a.js +6 -0
- sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +6 -0
- sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
- sky/dashboard/out/_next/static/chunks/{513.211357a2914a34b2.js → 513.309df9e18a9ff005.js} +1 -1
- sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
- sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +1 -0
- sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +6 -0
- sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
- sky/dashboard/out/_next/static/chunks/856-cdf66268ec878d0c.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +1 -0
- sky/dashboard/out/_next/static/chunks/969-d3a0b53f728d280a.js +1 -0
- sky/dashboard/out/_next/static/chunks/989-db34c16ad7ea6155.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-c416e87d5c2715cf.js → _app-0ef7418d1a3822f3.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-32ce4f49f2261f55.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-26da173e20af16e4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-ce29e7420385563d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c4ff1ec05e2f3daf.js → [name]-0b4c662a25e4747a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-862b120406461b10.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +1 -0
- sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +2 -4
- sky/exceptions.py +26 -0
- sky/execution.py +5 -0
- sky/global_user_state.py +263 -20
- sky/jobs/client/sdk.py +13 -12
- sky/jobs/controller.py +5 -1
- sky/jobs/scheduler.py +4 -3
- sky/jobs/server/core.py +121 -51
- sky/jobs/state.py +15 -0
- sky/jobs/utils.py +114 -8
- sky/models.py +16 -0
- sky/provision/__init__.py +26 -0
- sky/provision/kubernetes/__init__.py +3 -0
- sky/provision/kubernetes/instance.py +38 -77
- sky/provision/kubernetes/utils.py +52 -2
- sky/provision/kubernetes/volume.py +147 -0
- sky/resources.py +20 -76
- sky/serve/client/sdk.py +13 -13
- sky/serve/server/core.py +5 -1
- sky/server/common.py +40 -5
- sky/server/constants.py +5 -1
- sky/server/metrics.py +105 -0
- sky/server/requests/executor.py +30 -14
- sky/server/requests/payloads.py +22 -3
- sky/server/requests/requests.py +59 -2
- sky/server/rest.py +152 -0
- sky/server/server.py +70 -19
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +8 -3
- sky/server/uvicorn.py +153 -13
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +19 -14
- sky/task.py +141 -43
- sky/templates/jobs-controller.yaml.j2 +12 -1
- sky/templates/kubernetes-ray.yml.j2 +31 -2
- sky/users/permission.py +2 -0
- sky/utils/admin_policy_utils.py +5 -1
- sky/utils/cli_utils/status_utils.py +25 -17
- sky/utils/command_runner.py +118 -12
- sky/utils/command_runner.pyi +57 -0
- sky/utils/common_utils.py +9 -1
- sky/utils/context.py +3 -1
- sky/utils/controller_utils.py +1 -2
- sky/utils/resources_utils.py +66 -0
- sky/utils/rich_utils.py +6 -0
- sky/utils/schemas.py +180 -38
- sky/utils/status_lib.py +10 -0
- sky/utils/validator.py +11 -1
- sky/volumes/__init__.py +0 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +64 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +199 -0
- sky/volumes/server/server.py +85 -0
- sky/volumes/utils.py +158 -0
- sky/volumes/volume.py +198 -0
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/RECORD +139 -123
- sky/dashboard/out/_next/static/F4kiZ6Zh72jA6HzZ3ncFo/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
- sky/dashboard/out/_next/static/chunks/37-3a4d77ad62932eaf.js +0 -6
- sky/dashboard/out/_next/static/chunks/42.d39e24467181b06b.js +0 -6
- sky/dashboard/out/_next/static/chunks/470-4d1a5dbe58a8a2b9.js +0 -1
- sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
- sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
- sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
- sky/dashboard/out/_next/static/chunks/856-c2c39c0912285e54.js +0 -1
- sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-1493ac755eadeb35.js +0 -1
- sky/dashboard/out/_next/static/chunks/969-20d54a9d998dc102.js +0 -1
- sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-89216c616dbaa9c5.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-36bc0962129f72df.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters-82a651dbad53ec6e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-497a35a7ed49734a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-d2910be98e9227cb.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-780860bcc1103945.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-cf490d1fa38f3740.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-336ab80e270ce2ce.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-0263b00d6a10e64a.js +0 -1
- sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +0 -3
- /sky/dashboard/out/_next/static/{F4kiZ6Zh72jA6HzZ3ncFo → ZWdSYkqVe3WjnFR8ocqoG}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{843-b3040e493f6e7947.js → 843-07d25a7e64462fd8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{973-db3c97c2bfbceb65.js → 973-5b5019ba333e8d62.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250623.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/top_level.txt +0 -0
sky/global_user_state.py
CHANGED
@@ -111,6 +111,23 @@ storage_table = sqlalchemy.Table(
|
|
111
111
|
sqlalchemy.Column('status', sqlalchemy.Text),
|
112
112
|
)
|
113
113
|
|
114
|
+
volume_table = sqlalchemy.Table(
|
115
|
+
'volumes',
|
116
|
+
Base.metadata,
|
117
|
+
sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
|
118
|
+
sqlalchemy.Column('launched_at', sqlalchemy.Integer),
|
119
|
+
sqlalchemy.Column('handle', sqlalchemy.LargeBinary),
|
120
|
+
sqlalchemy.Column('user_hash', sqlalchemy.Text, server_default=None),
|
121
|
+
sqlalchemy.Column('workspace',
|
122
|
+
sqlalchemy.Text,
|
123
|
+
server_default=constants.SKYPILOT_DEFAULT_WORKSPACE),
|
124
|
+
sqlalchemy.Column('last_attached_at',
|
125
|
+
sqlalchemy.Integer,
|
126
|
+
server_default=None),
|
127
|
+
sqlalchemy.Column('last_use', sqlalchemy.Text),
|
128
|
+
sqlalchemy.Column('status', sqlalchemy.Text),
|
129
|
+
)
|
130
|
+
|
114
131
|
# Table for Cluster History
|
115
132
|
# usage_intervals: List[Tuple[int, int]]
|
116
133
|
# Specifies start and end timestamps of cluster.
|
@@ -134,6 +151,12 @@ cluster_history_table = sqlalchemy.Table(
|
|
134
151
|
sqlalchemy.Column('launched_resources', sqlalchemy.LargeBinary),
|
135
152
|
sqlalchemy.Column('usage_intervals', sqlalchemy.LargeBinary),
|
136
153
|
sqlalchemy.Column('user_hash', sqlalchemy.Text),
|
154
|
+
sqlalchemy.Column('last_creation_yaml',
|
155
|
+
sqlalchemy.Text,
|
156
|
+
server_default=None),
|
157
|
+
sqlalchemy.Column('last_creation_command',
|
158
|
+
sqlalchemy.Text,
|
159
|
+
server_default=None),
|
137
160
|
)
|
138
161
|
|
139
162
|
ssh_key_table = sqlalchemy.Table(
|
@@ -308,6 +331,21 @@ def create_table():
|
|
308
331
|
'password',
|
309
332
|
sqlalchemy.Text(),
|
310
333
|
default_statement='DEFAULT NULL')
|
334
|
+
|
335
|
+
db_utils.add_column_to_table_sqlalchemy(
|
336
|
+
session,
|
337
|
+
'cluster_history',
|
338
|
+
'last_creation_yaml',
|
339
|
+
sqlalchemy.Text(),
|
340
|
+
default_statement='DEFAULT NULL')
|
341
|
+
|
342
|
+
db_utils.add_column_to_table_sqlalchemy(
|
343
|
+
session,
|
344
|
+
'cluster_history',
|
345
|
+
'last_creation_command',
|
346
|
+
sqlalchemy.Text(),
|
347
|
+
default_statement='DEFAULT NULL')
|
348
|
+
|
311
349
|
session.commit()
|
312
350
|
|
313
351
|
|
@@ -597,6 +635,14 @@ def add_or_update_cluster(cluster_name: str,
|
|
597
635
|
# Modify cluster history table
|
598
636
|
launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
|
599
637
|
launched_resources = getattr(cluster_handle, 'launched_resources', None)
|
638
|
+
creation_info = {}
|
639
|
+
if conditional_values.get('last_creation_yaml') is not None:
|
640
|
+
creation_info = {
|
641
|
+
'last_creation_yaml':
|
642
|
+
conditional_values.get('last_creation_yaml'),
|
643
|
+
'last_creation_command':
|
644
|
+
conditional_values.get('last_creation_command'),
|
645
|
+
}
|
600
646
|
|
601
647
|
insert_stmnt = insert_func(cluster_history_table).values(
|
602
648
|
cluster_hash=cluster_hash,
|
@@ -605,7 +651,9 @@ def add_or_update_cluster(cluster_name: str,
|
|
605
651
|
requested_resources=pickle.dumps(requested_resources),
|
606
652
|
launched_resources=pickle.dumps(launched_resources),
|
607
653
|
usage_intervals=pickle.dumps(usage_intervals),
|
608
|
-
user_hash=user_hash
|
654
|
+
user_hash=user_hash,
|
655
|
+
**creation_info,
|
656
|
+
)
|
609
657
|
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
610
658
|
index_elements=[cluster_history_table.c.cluster_hash],
|
611
659
|
set_={
|
@@ -617,7 +665,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
617
665
|
pickle.dumps(launched_resources),
|
618
666
|
cluster_history_table.c.usage_intervals:
|
619
667
|
pickle.dumps(usage_intervals),
|
620
|
-
cluster_history_table.c.user_hash: user_hash
|
668
|
+
cluster_history_table.c.user_hash: user_hash,
|
669
|
+
**creation_info,
|
621
670
|
})
|
622
671
|
session.execute(do_update_stmt)
|
623
672
|
|
@@ -1027,40 +1076,122 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
1027
1076
|
|
1028
1077
|
|
1029
1078
|
@_init_db
|
1030
|
-
def get_clusters_from_history(
|
1079
|
+
def get_clusters_from_history(
|
1080
|
+
days: Optional[int] = None) -> List[Dict[str, Any]]:
|
1081
|
+
"""Get cluster reports from history.
|
1082
|
+
|
1083
|
+
Args:
|
1084
|
+
days: If specified, only include historical clusters (those not
|
1085
|
+
currently active) that were last used within the past 'days'
|
1086
|
+
days. Active clusters are always included regardless of this
|
1087
|
+
parameter.
|
1088
|
+
|
1089
|
+
Returns:
|
1090
|
+
List of cluster records with history information.
|
1091
|
+
"""
|
1031
1092
|
assert _SQLALCHEMY_ENGINE is not None
|
1032
1093
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1094
|
+
# Explicitly select columns from both tables to avoid ambiguity
|
1095
|
+
query = session.query(
|
1096
|
+
cluster_history_table.c.cluster_hash, cluster_history_table.c.name,
|
1097
|
+
cluster_history_table.c.num_nodes,
|
1098
|
+
cluster_history_table.c.requested_resources,
|
1099
|
+
cluster_history_table.c.launched_resources,
|
1100
|
+
cluster_history_table.c.usage_intervals,
|
1101
|
+
cluster_history_table.c.user_hash,
|
1102
|
+
cluster_history_table.c.last_creation_yaml,
|
1103
|
+
cluster_history_table.c.last_creation_command,
|
1104
|
+
cluster_table.c.status, cluster_table.c.workspace,
|
1105
|
+
cluster_table.c.status_updated_at).select_from(
|
1106
|
+
cluster_history_table.join(cluster_table,
|
1107
|
+
cluster_history_table.c.cluster_hash
|
1108
|
+
== cluster_table.c.cluster_hash,
|
1109
|
+
isouter=True))
|
1110
|
+
|
1111
|
+
rows = query.all()
|
1112
|
+
|
1113
|
+
# Prepare filtering parameters
|
1114
|
+
cutoff_time = None
|
1115
|
+
if days is not None:
|
1116
|
+
cutoff_time = int(time.time()) - (days * 24 * 60 * 60)
|
1117
|
+
|
1041
1118
|
records = []
|
1042
1119
|
for row in rows:
|
1043
|
-
# TODO: use namedtuple instead of dict
|
1044
1120
|
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1121
|
+
launched_at = _get_cluster_launch_time(row.cluster_hash)
|
1122
|
+
duration = _get_cluster_duration(row.cluster_hash)
|
1123
|
+
|
1124
|
+
# Parse status
|
1125
|
+
status = None
|
1126
|
+
if row.status:
|
1127
|
+
status = status_lib.ClusterStatus[row.status]
|
1128
|
+
|
1129
|
+
# Apply filtering: always include active clusters, filter historical
|
1130
|
+
# ones by time
|
1131
|
+
if cutoff_time is not None and status is None: # Historical cluster
|
1132
|
+
# For historical clusters, check if they were used recently
|
1133
|
+
# Use the most recent activity from usage_intervals to determine
|
1134
|
+
# last use
|
1135
|
+
usage_intervals = []
|
1136
|
+
if row.usage_intervals:
|
1137
|
+
try:
|
1138
|
+
usage_intervals = pickle.loads(row.usage_intervals)
|
1139
|
+
except (pickle.PickleError, AttributeError):
|
1140
|
+
usage_intervals = []
|
1141
|
+
|
1142
|
+
# Find the most recent activity time from usage_intervals
|
1143
|
+
last_activity_time = None
|
1144
|
+
if usage_intervals:
|
1145
|
+
# Get the end time of the last interval (or start time if
|
1146
|
+
# still running)
|
1147
|
+
last_interval = usage_intervals[-1]
|
1148
|
+
last_activity_time = (last_interval[1] if last_interval[1]
|
1149
|
+
is not None else last_interval[0])
|
1150
|
+
|
1151
|
+
# Skip historical clusters that haven't been used recently
|
1152
|
+
if last_activity_time is None or last_activity_time < cutoff_time:
|
1153
|
+
continue
|
1154
|
+
|
1155
|
+
# Parse launched resources safely
|
1156
|
+
launched_resources = None
|
1157
|
+
if row.launched_resources:
|
1158
|
+
try:
|
1159
|
+
launched_resources = pickle.loads(row.launched_resources)
|
1160
|
+
except (pickle.PickleError, AttributeError):
|
1161
|
+
launched_resources = None
|
1162
|
+
|
1163
|
+
# Parse usage intervals safely
|
1164
|
+
usage_intervals = []
|
1165
|
+
if row.usage_intervals:
|
1166
|
+
try:
|
1167
|
+
usage_intervals = pickle.loads(row.usage_intervals)
|
1168
|
+
except (pickle.PickleError, AttributeError):
|
1169
|
+
usage_intervals = []
|
1170
|
+
|
1171
|
+
# Get user name from user hash
|
1172
|
+
user = get_user(user_hash)
|
1173
|
+
user_name = user.name if user is not None else None
|
1174
|
+
|
1048
1175
|
record = {
|
1049
1176
|
'name': row.name,
|
1050
|
-
'launched_at':
|
1051
|
-
'duration':
|
1177
|
+
'launched_at': launched_at,
|
1178
|
+
'duration': duration,
|
1052
1179
|
'num_nodes': row.num_nodes,
|
1053
|
-
'resources':
|
1180
|
+
'resources': launched_resources,
|
1054
1181
|
'cluster_hash': row.cluster_hash,
|
1055
|
-
'usage_intervals':
|
1182
|
+
'usage_intervals': usage_intervals,
|
1056
1183
|
'status': status,
|
1057
1184
|
'user_hash': user_hash,
|
1185
|
+
'user_name': user_name,
|
1186
|
+
'workspace': row.workspace,
|
1187
|
+
'last_creation_yaml': row.last_creation_yaml,
|
1188
|
+
'last_creation_command': row.last_creation_command,
|
1058
1189
|
}
|
1059
1190
|
|
1060
1191
|
records.append(record)
|
1061
1192
|
|
1062
1193
|
# sort by launch time, descending in recency
|
1063
|
-
records = sorted(records, key=lambda record: -record['launched_at'])
|
1194
|
+
records = sorted(records, key=lambda record: -(record['launched_at'] or 0))
|
1064
1195
|
return records
|
1065
1196
|
|
1066
1197
|
|
@@ -1312,6 +1443,118 @@ def get_storage() -> List[Dict[str, Any]]:
|
|
1312
1443
|
return records
|
1313
1444
|
|
1314
1445
|
|
1446
|
+
@_init_db
|
1447
|
+
def get_volume_names_start_with(starts_with: str) -> List[str]:
|
1448
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1449
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1450
|
+
rows = session.query(volume_table).filter(
|
1451
|
+
volume_table.c.name.like(f'{starts_with}%')).all()
|
1452
|
+
return [row.name for row in rows]
|
1453
|
+
|
1454
|
+
|
1455
|
+
@_init_db
|
1456
|
+
def get_volumes() -> List[Dict[str, Any]]:
|
1457
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1458
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1459
|
+
rows = session.query(volume_table).all()
|
1460
|
+
records = []
|
1461
|
+
for row in rows:
|
1462
|
+
records.append({
|
1463
|
+
'name': row.name,
|
1464
|
+
'launched_at': row.launched_at,
|
1465
|
+
'handle': pickle.loads(row.handle),
|
1466
|
+
'user_hash': row.user_hash,
|
1467
|
+
'workspace': row.workspace,
|
1468
|
+
'last_attached_at': row.last_attached_at,
|
1469
|
+
'last_use': row.last_use,
|
1470
|
+
'status': status_lib.VolumeStatus[row.status],
|
1471
|
+
})
|
1472
|
+
return records
|
1473
|
+
|
1474
|
+
|
1475
|
+
@_init_db
|
1476
|
+
def get_volume_by_name(name: str) -> Optional[Dict[str, Any]]:
|
1477
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1478
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1479
|
+
row = session.query(volume_table).filter_by(name=name).first()
|
1480
|
+
if row:
|
1481
|
+
return {
|
1482
|
+
'name': row.name,
|
1483
|
+
'launched_at': row.launched_at,
|
1484
|
+
'handle': pickle.loads(row.handle),
|
1485
|
+
'user_hash': row.user_hash,
|
1486
|
+
'workspace': row.workspace,
|
1487
|
+
'last_attached_at': row.last_attached_at,
|
1488
|
+
'last_use': row.last_use,
|
1489
|
+
'status': status_lib.VolumeStatus[row.status],
|
1490
|
+
}
|
1491
|
+
return None
|
1492
|
+
|
1493
|
+
|
1494
|
+
@_init_db
|
1495
|
+
def add_volume(name: str, config: models.VolumeConfig,
|
1496
|
+
status: status_lib.VolumeStatus) -> None:
|
1497
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1498
|
+
volume_launched_at = int(time.time())
|
1499
|
+
handle = pickle.dumps(config)
|
1500
|
+
last_use = common_utils.get_current_command()
|
1501
|
+
user_hash = common_utils.get_current_user().id
|
1502
|
+
active_workspace = skypilot_config.get_active_workspace()
|
1503
|
+
|
1504
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1505
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
1506
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
1507
|
+
insert_func = sqlite.insert
|
1508
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
1509
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
1510
|
+
insert_func = postgresql.insert
|
1511
|
+
else:
|
1512
|
+
raise ValueError('Unsupported database dialect')
|
1513
|
+
insert_stmnt = insert_func(volume_table).values(
|
1514
|
+
name=name,
|
1515
|
+
launched_at=volume_launched_at,
|
1516
|
+
handle=handle,
|
1517
|
+
user_hash=user_hash,
|
1518
|
+
workspace=active_workspace,
|
1519
|
+
last_attached_at=None,
|
1520
|
+
last_use=last_use,
|
1521
|
+
status=status.value,
|
1522
|
+
)
|
1523
|
+
do_update_stmt = insert_stmnt.on_conflict_do_nothing()
|
1524
|
+
session.execute(do_update_stmt)
|
1525
|
+
session.commit()
|
1526
|
+
|
1527
|
+
|
1528
|
+
@_init_db
|
1529
|
+
def update_volume(name: str, last_attached_at: int,
|
1530
|
+
status: status_lib.VolumeStatus) -> None:
|
1531
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1532
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1533
|
+
session.query(volume_table).filter_by(name=name).update({
|
1534
|
+
volume_table.c.last_attached_at: last_attached_at,
|
1535
|
+
volume_table.c.status: status.value,
|
1536
|
+
})
|
1537
|
+
session.commit()
|
1538
|
+
|
1539
|
+
|
1540
|
+
@_init_db
|
1541
|
+
def update_volume_status(name: str, status: status_lib.VolumeStatus) -> None:
|
1542
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1543
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1544
|
+
session.query(volume_table).filter_by(name=name).update({
|
1545
|
+
volume_table.c.status: status.value,
|
1546
|
+
})
|
1547
|
+
session.commit()
|
1548
|
+
|
1549
|
+
|
1550
|
+
@_init_db
|
1551
|
+
def delete_volume(name: str) -> None:
|
1552
|
+
assert _SQLALCHEMY_ENGINE is not None
|
1553
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
1554
|
+
session.query(volume_table).filter_by(name=name).delete()
|
1555
|
+
session.commit()
|
1556
|
+
|
1557
|
+
|
1315
1558
|
@_init_db
|
1316
1559
|
def get_ssh_keys(user_hash: str) -> Tuple[str, str, bool]:
|
1317
1560
|
assert _SQLALCHEMY_ENGINE is not None
|
sky/jobs/client/sdk.py
CHANGED
@@ -7,10 +7,10 @@ import webbrowser
|
|
7
7
|
import click
|
8
8
|
|
9
9
|
from sky import sky_logging
|
10
|
-
from sky.adaptors import common as adaptors_common
|
11
10
|
from sky.client import common as client_common
|
12
11
|
from sky.client import sdk
|
13
12
|
from sky.server import common as server_common
|
13
|
+
from sky.server import rest
|
14
14
|
from sky.server.requests import payloads
|
15
15
|
from sky.skylet import constants
|
16
16
|
from sky.usage import usage_lib
|
@@ -22,11 +22,7 @@ from sky.utils import dag_utils
|
|
22
22
|
if typing.TYPE_CHECKING:
|
23
23
|
import io
|
24
24
|
|
25
|
-
import requests
|
26
|
-
|
27
25
|
import sky
|
28
|
-
else:
|
29
|
-
requests = adaptors_common.LazyImport('requests')
|
30
26
|
|
31
27
|
logger = sky_logging.init_logger(__name__)
|
32
28
|
|
@@ -49,7 +45,6 @@ def launch(
|
|
49
45
|
task: sky.Task, or sky.Dag (experimental; 1-task only) to launch as a
|
50
46
|
managed job.
|
51
47
|
name: Name of the managed job.
|
52
|
-
priority: Priority of the managed job.
|
53
48
|
_need_confirmation: (Internal only) Whether to show a confirmation
|
54
49
|
prompt before launching the job.
|
55
50
|
|
@@ -87,7 +82,7 @@ def launch(
|
|
87
82
|
task=dag_str,
|
88
83
|
name=name,
|
89
84
|
)
|
90
|
-
response =
|
85
|
+
response = rest.post(
|
91
86
|
f'{server_common.get_server_url()}/jobs/launch',
|
92
87
|
json=json.loads(body.model_dump_json()),
|
93
88
|
timeout=(5, None),
|
@@ -147,7 +142,7 @@ def queue(refresh: bool,
|
|
147
142
|
all_users=all_users,
|
148
143
|
job_ids=job_ids,
|
149
144
|
)
|
150
|
-
response =
|
145
|
+
response = rest.post(
|
151
146
|
f'{server_common.get_server_url()}/jobs/queue',
|
152
147
|
json=json.loads(body.model_dump_json()),
|
153
148
|
timeout=(5, None),
|
@@ -187,7 +182,7 @@ def cancel(
|
|
187
182
|
all=all,
|
188
183
|
all_users=all_users,
|
189
184
|
)
|
190
|
-
response =
|
185
|
+
response = rest.post(
|
191
186
|
f'{server_common.get_server_url()}/jobs/cancel',
|
192
187
|
json=json.loads(body.model_dump_json()),
|
193
188
|
timeout=(5, None),
|
@@ -198,6 +193,7 @@ def cancel(
|
|
198
193
|
|
199
194
|
@usage_lib.entrypoint
|
200
195
|
@server_common.check_server_healthy_or_start
|
196
|
+
@rest.retry_on_server_unavailable()
|
201
197
|
def tail_logs(name: Optional[str] = None,
|
202
198
|
job_id: Optional[int] = None,
|
203
199
|
follow: bool = True,
|
@@ -237,7 +233,7 @@ def tail_logs(name: Optional[str] = None,
|
|
237
233
|
refresh=refresh,
|
238
234
|
tail=tail,
|
239
235
|
)
|
240
|
-
response =
|
236
|
+
response = rest.post(
|
241
237
|
f'{server_common.get_server_url()}/jobs/logs',
|
242
238
|
json=json.loads(body.model_dump_json()),
|
243
239
|
stream=True,
|
@@ -245,7 +241,12 @@ def tail_logs(name: Optional[str] = None,
|
|
245
241
|
cookies=server_common.get_api_cookie_jar(),
|
246
242
|
)
|
247
243
|
request_id = server_common.get_request_id(response)
|
248
|
-
|
244
|
+
# Log request is idempotent when tail is 0, thus can resume previous
|
245
|
+
# streaming point on retry.
|
246
|
+
return sdk.stream_response(request_id=request_id,
|
247
|
+
response=response,
|
248
|
+
output_stream=output_stream,
|
249
|
+
resumable=(tail == 0))
|
249
250
|
|
250
251
|
|
251
252
|
@usage_lib.entrypoint
|
@@ -282,7 +283,7 @@ def download_logs(
|
|
282
283
|
controller=controller,
|
283
284
|
local_dir=local_dir,
|
284
285
|
)
|
285
|
-
response =
|
286
|
+
response = rest.post(
|
286
287
|
f'{server_common.get_server_url()}/jobs/download_logs',
|
287
288
|
json=json.loads(body.model_dump_json()),
|
288
289
|
timeout=(5, None),
|
sky/jobs/controller.py
CHANGED
@@ -603,7 +603,11 @@ def _cleanup(job_id: int, dag_yaml: str):
|
|
603
603
|
# mounts.
|
604
604
|
for file_mount in (task.file_mounts or {}).values():
|
605
605
|
try:
|
606
|
-
|
606
|
+
# For consolidation mode, there is no two-hop file mounts
|
607
|
+
# and the file path here represents the real user data.
|
608
|
+
# We skip the cleanup for consolidation mode.
|
609
|
+
if (not data_utils.is_cloud_store_url(file_mount) and
|
610
|
+
not managed_job_utils.is_consolidation_mode()):
|
607
611
|
path = os.path.expanduser(file_mount)
|
608
612
|
if os.path.isdir(path):
|
609
613
|
shutil.rmtree(path)
|
sky/jobs/scheduler.py
CHANGED
@@ -40,6 +40,7 @@ from argparse import ArgumentParser
|
|
40
40
|
import contextlib
|
41
41
|
from functools import lru_cache
|
42
42
|
import os
|
43
|
+
import sys
|
43
44
|
import time
|
44
45
|
import typing
|
45
46
|
|
@@ -89,12 +90,12 @@ def _start_controller(job_id: int, dag_yaml_path: str,
|
|
89
90
|
activate_python_env_cmd = (f'{constants.ACTIVATE_SKY_REMOTE_PYTHON_ENV};')
|
90
91
|
source_environment_cmd = (f'source {env_file_path};'
|
91
92
|
if env_file_path else '')
|
92
|
-
run_controller_cmd = ('
|
93
|
+
run_controller_cmd = (f'{sys.executable} -u -m sky.jobs.controller '
|
93
94
|
f'{dag_yaml_path} --job-id {job_id};')
|
94
95
|
|
95
96
|
# If the command line here is changed, please also update
|
96
|
-
# utils._controller_process_alive. `--job-id X`
|
97
|
-
# the
|
97
|
+
# utils._controller_process_alive. The substring `--job-id X`
|
98
|
+
# should be in the command.
|
98
99
|
run_cmd = (f'{activate_python_env_cmd}'
|
99
100
|
f'{source_environment_cmd}'
|
100
101
|
f'{run_controller_cmd}')
|