skypilot-nightly 1.0.0.dev20251021__py3-none-any.whl → 1.0.0.dev20251022__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +5 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/IgACOQPupLbX9z-RYVEDx/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-ec6f902ffb865853.js +11 -0
- sky/dashboard/out/_next/static/chunks/2755.9b1e69c921b5a870.js +26 -0
- sky/dashboard/out/_next/static/chunks/3015-d014dc5b9412fade.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3294.1fafbf42b3bcebff.js → 3294.998db87cd52a1238.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.483a3dda2d52f26e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{1121-d0782b9251f0fcd3.js → 4282-d2f3ef2fbf78e347.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-5c94d394259cdb6e.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0389e2cb52412db3.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.14326e329484b57e.js +31 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-8f058b0346db2aff.js → [job]-602eeead010ec1d6.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-18b334dedbd9f6f2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-57221ec2e4e01076.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-44ce535a0a0ad4ec.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-872e6a00165534f4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-0dc34cf9a8710a9f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-3a543725492fb896.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-d2af9d22e87cc4ba.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-9ad108cd67d16d96.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-6fc994fa1ee6c6bf.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-919e3c01ab6b2633.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +117 -17
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +4 -2
- sky/jobs/server/server.py +11 -11
- sky/jobs/state.py +307 -55
- sky/jobs/utils.py +248 -144
- sky/schemas/api/responses.py +2 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/serve/server/server.py +7 -7
- sky/server/common.py +1 -13
- sky/server/requests/executor.py +20 -20
- sky/server/requests/payloads.py +3 -0
- sky/server/requests/requests.py +12 -19
- sky/server/requests/serializers/encoders.py +3 -3
- sky/server/server.py +34 -34
- sky/setup_files/alembic.ini +4 -0
- sky/skylet/services.py +5 -5
- sky/skypilot_config.py +87 -75
- sky/ssh_node_pools/server.py +4 -4
- sky/users/permission.py +4 -0
- sky/utils/db/db_utils.py +11 -3
- sky/utils/db/migration_utils.py +7 -3
- sky/volumes/server/server.py +3 -3
- sky/workspaces/server.py +6 -6
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/METADATA +36 -35
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/RECORD +73 -72
- sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
- sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
- sky/dashboard/out/_next/static/chunks/3015-7e0e8f06bb2f881c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
- sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
- sky/dashboard/out/_next/static/chunks/webpack-66f23594d38c7f16.js +0 -1
- sky/dashboard/out/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{jDc1PlRsl9Cc5FQUMLBu8 → IgACOQPupLbX9z-RYVEDx}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-df9f87fcb7f24292.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-e5c9ce6a24fc0de4.js → [job]-8677af16befde039.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-e020fd69dbe76cea.js} +0 -0
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251021.dist-info → skypilot_nightly-1.0.0.dev20251022.dist-info}/top_level.txt +0 -0
sky/jobs/state.py
CHANGED
|
@@ -10,8 +10,7 @@ import sqlite3
|
|
|
10
10
|
import threading
|
|
11
11
|
import time
|
|
12
12
|
import typing
|
|
13
|
-
from typing import
|
|
14
|
-
Union)
|
|
13
|
+
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple, Union
|
|
15
14
|
import urllib.parse
|
|
16
15
|
|
|
17
16
|
import colorama
|
|
@@ -315,41 +314,42 @@ async def _describe_task_transition_failure(session: sql_async.AsyncSession,
|
|
|
315
314
|
# by joining the spot and job_info tables.
|
|
316
315
|
def _get_jobs_dict(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
317
316
|
return {
|
|
318
|
-
'_job_id': r
|
|
319
|
-
'_task_name': r
|
|
320
|
-
'resources': r
|
|
321
|
-
'submitted_at': r
|
|
322
|
-
'status': r
|
|
323
|
-
'run_timestamp': r
|
|
324
|
-
'start_at': r
|
|
325
|
-
'end_at': r
|
|
326
|
-
'last_recovered_at': r
|
|
327
|
-
'recovery_count': r
|
|
328
|
-
'job_duration': r
|
|
329
|
-
'failure_reason': r
|
|
330
|
-
'job_id': r
|
|
331
|
-
|
|
332
|
-
'
|
|
333
|
-
'
|
|
334
|
-
'
|
|
335
|
-
'
|
|
317
|
+
'_job_id': r.get('job_id'), # from spot table
|
|
318
|
+
'_task_name': r.get('job_name'), # deprecated, from spot table
|
|
319
|
+
'resources': r.get('resources'),
|
|
320
|
+
'submitted_at': r.get('submitted_at'),
|
|
321
|
+
'status': r.get('status'),
|
|
322
|
+
'run_timestamp': r.get('run_timestamp'),
|
|
323
|
+
'start_at': r.get('start_at'),
|
|
324
|
+
'end_at': r.get('end_at'),
|
|
325
|
+
'last_recovered_at': r.get('last_recovered_at'),
|
|
326
|
+
'recovery_count': r.get('recovery_count'),
|
|
327
|
+
'job_duration': r.get('job_duration'),
|
|
328
|
+
'failure_reason': r.get('failure_reason'),
|
|
329
|
+
'job_id': r.get(spot_table.c.spot_job_id
|
|
330
|
+
), # ambiguous, use table.column
|
|
331
|
+
'task_id': r.get('task_id'),
|
|
332
|
+
'task_name': r.get('task_name'),
|
|
333
|
+
'specs': r.get('specs'),
|
|
334
|
+
'local_log_file': r.get('local_log_file'),
|
|
335
|
+
'metadata': r.get('metadata'),
|
|
336
336
|
# columns from job_info table (some may be None for legacy jobs)
|
|
337
|
-
'_job_info_job_id': r
|
|
338
|
-
|
|
339
|
-
'job_name': r
|
|
340
|
-
'schedule_state': r
|
|
341
|
-
'controller_pid': r
|
|
342
|
-
'dag_yaml_path': r
|
|
343
|
-
'env_file_path': r
|
|
344
|
-
'user_hash': r
|
|
345
|
-
'workspace': r
|
|
346
|
-
'priority': r
|
|
347
|
-
'entrypoint': r
|
|
348
|
-
'original_user_yaml_path': r
|
|
349
|
-
'pool': r
|
|
350
|
-
'current_cluster_name': r
|
|
351
|
-
'job_id_on_pool_cluster': r
|
|
352
|
-
'pool_hash': r
|
|
337
|
+
'_job_info_job_id': r.get(job_info_table.c.spot_job_id
|
|
338
|
+
), # ambiguous, use table.column
|
|
339
|
+
'job_name': r.get('name'), # from job_info table
|
|
340
|
+
'schedule_state': r.get('schedule_state'),
|
|
341
|
+
'controller_pid': r.get('controller_pid'),
|
|
342
|
+
'dag_yaml_path': r.get('dag_yaml_path'),
|
|
343
|
+
'env_file_path': r.get('env_file_path'),
|
|
344
|
+
'user_hash': r.get('user_hash'),
|
|
345
|
+
'workspace': r.get('workspace'),
|
|
346
|
+
'priority': r.get('priority'),
|
|
347
|
+
'entrypoint': r.get('entrypoint'),
|
|
348
|
+
'original_user_yaml_path': r.get('original_user_yaml_path'),
|
|
349
|
+
'pool': r.get('pool'),
|
|
350
|
+
'current_cluster_name': r.get('current_cluster_name'),
|
|
351
|
+
'job_id_on_pool_cluster': r.get('job_id_on_pool_cluster'),
|
|
352
|
+
'pool_hash': r.get('pool_hash'),
|
|
353
353
|
}
|
|
354
354
|
|
|
355
355
|
|
|
@@ -1200,6 +1200,277 @@ def get_managed_jobs(job_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
|
1200
1200
|
return jobs
|
|
1201
1201
|
|
|
1202
1202
|
|
|
1203
|
+
def _map_response_field_to_db_column(field: str):
|
|
1204
|
+
"""Map the response field name to an actual SQLAlchemy ColumnElement.
|
|
1205
|
+
|
|
1206
|
+
This ensures we never pass plain strings to SQLAlchemy 2.0 APIs like
|
|
1207
|
+
Select.with_only_columns().
|
|
1208
|
+
"""
|
|
1209
|
+
# Explicit aliases differing from actual DB column names
|
|
1210
|
+
alias_mapping = {
|
|
1211
|
+
'_job_id': spot_table.c.job_id, # spot.job_id
|
|
1212
|
+
'_task_name': spot_table.c.job_name, # deprecated, from spot table
|
|
1213
|
+
'job_id': spot_table.c.spot_job_id, # public job id -> spot.spot_job_id
|
|
1214
|
+
'_job_info_job_id': job_info_table.c.spot_job_id,
|
|
1215
|
+
'job_name': job_info_table.c.name, # public job name -> job_info.name
|
|
1216
|
+
}
|
|
1217
|
+
if field in alias_mapping:
|
|
1218
|
+
return alias_mapping[field]
|
|
1219
|
+
|
|
1220
|
+
# Try direct match on the `spot` table columns
|
|
1221
|
+
if field in spot_table.c:
|
|
1222
|
+
return spot_table.c[field]
|
|
1223
|
+
|
|
1224
|
+
# Try direct match on the `job_info` table columns
|
|
1225
|
+
if field in job_info_table.c:
|
|
1226
|
+
return job_info_table.c[field]
|
|
1227
|
+
|
|
1228
|
+
raise ValueError(f'Unknown field: {field}')
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
@_init_db
|
|
1232
|
+
def get_managed_jobs_total() -> int:
|
|
1233
|
+
"""Get the total number of managed jobs."""
|
|
1234
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1235
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1236
|
+
result = session.execute(
|
|
1237
|
+
sqlalchemy.select(sqlalchemy.func.count() # pylint: disable=not-callable
|
|
1238
|
+
).select_from(spot_table)).fetchone()
|
|
1239
|
+
return result[0] if result else 0
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
@_init_db
|
|
1243
|
+
def get_managed_jobs_highest_priority() -> int:
|
|
1244
|
+
"""Get the highest priority of the managed jobs."""
|
|
1245
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1246
|
+
query = sqlalchemy.select(sqlalchemy.func.max(
|
|
1247
|
+
job_info_table.c.priority)).where(
|
|
1248
|
+
sqlalchemy.and_(
|
|
1249
|
+
job_info_table.c.schedule_state.in_([
|
|
1250
|
+
ManagedJobScheduleState.LAUNCHING.value,
|
|
1251
|
+
ManagedJobScheduleState.ALIVE_BACKOFF.value,
|
|
1252
|
+
ManagedJobScheduleState.WAITING.value,
|
|
1253
|
+
ManagedJobScheduleState.ALIVE_WAITING.value,
|
|
1254
|
+
]),
|
|
1255
|
+
job_info_table.c.priority.is_not(None),
|
|
1256
|
+
))
|
|
1257
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1258
|
+
priority = session.execute(query).fetchone()
|
|
1259
|
+
return priority[0] if priority and priority[
|
|
1260
|
+
0] is not None else constants.MIN_PRIORITY
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
def build_managed_jobs_with_filters_no_status_query(
|
|
1264
|
+
fields: Optional[List[str]] = None,
|
|
1265
|
+
job_ids: Optional[List[int]] = None,
|
|
1266
|
+
accessible_workspaces: Optional[List[str]] = None,
|
|
1267
|
+
workspace_match: Optional[str] = None,
|
|
1268
|
+
name_match: Optional[str] = None,
|
|
1269
|
+
pool_match: Optional[str] = None,
|
|
1270
|
+
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1271
|
+
skip_finished: bool = False,
|
|
1272
|
+
count_only: bool = False,
|
|
1273
|
+
status_count: bool = False,
|
|
1274
|
+
) -> sqlalchemy.Select:
|
|
1275
|
+
"""Build a query to get managed jobs from the database with filters."""
|
|
1276
|
+
# Join spot and job_info tables to get the job name for each task.
|
|
1277
|
+
# We use LEFT OUTER JOIN mainly for backward compatibility, as for an
|
|
1278
|
+
# existing controller before #1982, the job_info table may not exist,
|
|
1279
|
+
# and all the managed jobs created before will not present in the
|
|
1280
|
+
# job_info.
|
|
1281
|
+
# Note: we will get the user_hash here, but don't try to call
|
|
1282
|
+
# global_user_state.get_user() on it. This runs on the controller, which may
|
|
1283
|
+
# not have the user info. Prefer to do it on the API server side.
|
|
1284
|
+
if count_only:
|
|
1285
|
+
query = sqlalchemy.select(sqlalchemy.func.count().label('count')) # pylint: disable=not-callable
|
|
1286
|
+
elif status_count:
|
|
1287
|
+
query = sqlalchemy.select(spot_table.c.status,
|
|
1288
|
+
sqlalchemy.func.count().label('count')) # pylint: disable=not-callable
|
|
1289
|
+
else:
|
|
1290
|
+
query = sqlalchemy.select(spot_table, job_info_table)
|
|
1291
|
+
query = query.select_from(
|
|
1292
|
+
spot_table.outerjoin(
|
|
1293
|
+
job_info_table,
|
|
1294
|
+
spot_table.c.spot_job_id == job_info_table.c.spot_job_id))
|
|
1295
|
+
if skip_finished:
|
|
1296
|
+
# Filter out finished jobs at the DB level. If a multi-task job is
|
|
1297
|
+
# partially finished, include all its tasks. We do this by first
|
|
1298
|
+
# selecting job_ids that have at least one non-terminal task, then
|
|
1299
|
+
# restricting the main query to those job_ids.
|
|
1300
|
+
terminal_status_values = [
|
|
1301
|
+
s.value for s in ManagedJobStatus.terminal_statuses()
|
|
1302
|
+
]
|
|
1303
|
+
non_terminal_job_ids_subquery = (sqlalchemy.select(
|
|
1304
|
+
spot_table.c.spot_job_id).where(
|
|
1305
|
+
sqlalchemy.or_(
|
|
1306
|
+
spot_table.c.status.is_(None),
|
|
1307
|
+
sqlalchemy.not_(
|
|
1308
|
+
spot_table.c.status.in_(terminal_status_values)),
|
|
1309
|
+
)).distinct())
|
|
1310
|
+
query = query.where(
|
|
1311
|
+
spot_table.c.spot_job_id.in_(non_terminal_job_ids_subquery))
|
|
1312
|
+
if not count_only and not status_count and fields:
|
|
1313
|
+
# Resolve requested field names to explicit ColumnElements from
|
|
1314
|
+
# the joined tables.
|
|
1315
|
+
selected_columns = [_map_response_field_to_db_column(f) for f in fields]
|
|
1316
|
+
query = query.with_only_columns(*selected_columns)
|
|
1317
|
+
if job_ids is not None:
|
|
1318
|
+
query = query.where(spot_table.c.spot_job_id.in_(job_ids))
|
|
1319
|
+
if accessible_workspaces is not None:
|
|
1320
|
+
query = query.where(
|
|
1321
|
+
job_info_table.c.workspace.in_(accessible_workspaces))
|
|
1322
|
+
if workspace_match is not None:
|
|
1323
|
+
query = query.where(
|
|
1324
|
+
job_info_table.c.workspace.like(f'%{workspace_match}%'))
|
|
1325
|
+
if name_match is not None:
|
|
1326
|
+
query = query.where(job_info_table.c.name.like(f'%{name_match}%'))
|
|
1327
|
+
if pool_match is not None:
|
|
1328
|
+
query = query.where(job_info_table.c.pool.like(f'%{pool_match}%'))
|
|
1329
|
+
if user_hashes is not None:
|
|
1330
|
+
query = query.where(job_info_table.c.user_hash.in_(user_hashes))
|
|
1331
|
+
return query
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
def build_managed_jobs_with_filters_query(
|
|
1335
|
+
fields: Optional[List[str]] = None,
|
|
1336
|
+
job_ids: Optional[List[int]] = None,
|
|
1337
|
+
accessible_workspaces: Optional[List[str]] = None,
|
|
1338
|
+
workspace_match: Optional[str] = None,
|
|
1339
|
+
name_match: Optional[str] = None,
|
|
1340
|
+
pool_match: Optional[str] = None,
|
|
1341
|
+
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1342
|
+
statuses: Optional[List[str]] = None,
|
|
1343
|
+
skip_finished: bool = False,
|
|
1344
|
+
count_only: bool = False,
|
|
1345
|
+
) -> sqlalchemy.Select:
|
|
1346
|
+
"""Build a query to get managed jobs from the database with filters."""
|
|
1347
|
+
query = build_managed_jobs_with_filters_no_status_query(
|
|
1348
|
+
fields=fields,
|
|
1349
|
+
job_ids=job_ids,
|
|
1350
|
+
accessible_workspaces=accessible_workspaces,
|
|
1351
|
+
workspace_match=workspace_match,
|
|
1352
|
+
name_match=name_match,
|
|
1353
|
+
pool_match=pool_match,
|
|
1354
|
+
user_hashes=user_hashes,
|
|
1355
|
+
skip_finished=skip_finished,
|
|
1356
|
+
count_only=count_only,
|
|
1357
|
+
)
|
|
1358
|
+
if statuses is not None:
|
|
1359
|
+
query = query.where(spot_table.c.status.in_(statuses))
|
|
1360
|
+
return query
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
@_init_db
|
|
1364
|
+
def get_status_count_with_filters(
|
|
1365
|
+
fields: Optional[List[str]] = None,
|
|
1366
|
+
job_ids: Optional[List[int]] = None,
|
|
1367
|
+
accessible_workspaces: Optional[List[str]] = None,
|
|
1368
|
+
workspace_match: Optional[str] = None,
|
|
1369
|
+
name_match: Optional[str] = None,
|
|
1370
|
+
pool_match: Optional[str] = None,
|
|
1371
|
+
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1372
|
+
skip_finished: bool = False,
|
|
1373
|
+
) -> Dict[str, int]:
|
|
1374
|
+
"""Get the status count of the managed jobs with filters."""
|
|
1375
|
+
query = build_managed_jobs_with_filters_no_status_query(
|
|
1376
|
+
fields=fields,
|
|
1377
|
+
job_ids=job_ids,
|
|
1378
|
+
accessible_workspaces=accessible_workspaces,
|
|
1379
|
+
workspace_match=workspace_match,
|
|
1380
|
+
name_match=name_match,
|
|
1381
|
+
pool_match=pool_match,
|
|
1382
|
+
user_hashes=user_hashes,
|
|
1383
|
+
skip_finished=skip_finished,
|
|
1384
|
+
status_count=True,
|
|
1385
|
+
)
|
|
1386
|
+
query = query.group_by(spot_table.c.status)
|
|
1387
|
+
results: Dict[str, int] = {}
|
|
1388
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1389
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1390
|
+
rows = session.execute(query).fetchall()
|
|
1391
|
+
for status_value, count in rows:
|
|
1392
|
+
# status_value is already a string (enum value)
|
|
1393
|
+
results[str(status_value)] = int(count)
|
|
1394
|
+
return results
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
@_init_db
|
|
1398
|
+
def get_managed_jobs_with_filters(
|
|
1399
|
+
fields: Optional[List[str]] = None,
|
|
1400
|
+
job_ids: Optional[List[int]] = None,
|
|
1401
|
+
accessible_workspaces: Optional[List[str]] = None,
|
|
1402
|
+
workspace_match: Optional[str] = None,
|
|
1403
|
+
name_match: Optional[str] = None,
|
|
1404
|
+
pool_match: Optional[str] = None,
|
|
1405
|
+
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1406
|
+
statuses: Optional[List[str]] = None,
|
|
1407
|
+
skip_finished: bool = False,
|
|
1408
|
+
page: Optional[int] = None,
|
|
1409
|
+
limit: Optional[int] = None,
|
|
1410
|
+
) -> Tuple[List[Dict[str, Any]], int]:
|
|
1411
|
+
"""Get managed jobs from the database with filters."""
|
|
1412
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1413
|
+
|
|
1414
|
+
count_query = build_managed_jobs_with_filters_query(
|
|
1415
|
+
fields=None,
|
|
1416
|
+
job_ids=job_ids,
|
|
1417
|
+
accessible_workspaces=accessible_workspaces,
|
|
1418
|
+
workspace_match=workspace_match,
|
|
1419
|
+
name_match=name_match,
|
|
1420
|
+
pool_match=pool_match,
|
|
1421
|
+
user_hashes=user_hashes,
|
|
1422
|
+
statuses=statuses,
|
|
1423
|
+
skip_finished=skip_finished,
|
|
1424
|
+
count_only=True,
|
|
1425
|
+
)
|
|
1426
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1427
|
+
total = session.execute(count_query).fetchone()[0]
|
|
1428
|
+
|
|
1429
|
+
query = build_managed_jobs_with_filters_query(
|
|
1430
|
+
fields=fields,
|
|
1431
|
+
job_ids=job_ids,
|
|
1432
|
+
accessible_workspaces=accessible_workspaces,
|
|
1433
|
+
workspace_match=workspace_match,
|
|
1434
|
+
name_match=name_match,
|
|
1435
|
+
pool_match=pool_match,
|
|
1436
|
+
user_hashes=user_hashes,
|
|
1437
|
+
statuses=statuses,
|
|
1438
|
+
skip_finished=skip_finished,
|
|
1439
|
+
)
|
|
1440
|
+
query = query.order_by(spot_table.c.spot_job_id.desc(),
|
|
1441
|
+
spot_table.c.task_id.asc())
|
|
1442
|
+
if page is not None and limit is not None:
|
|
1443
|
+
query = query.offset((page - 1) * limit).limit(limit)
|
|
1444
|
+
rows = None
|
|
1445
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1446
|
+
rows = session.execute(query).fetchall()
|
|
1447
|
+
jobs = []
|
|
1448
|
+
for row in rows:
|
|
1449
|
+
job_dict = _get_jobs_dict(row._mapping) # pylint: disable=protected-access
|
|
1450
|
+
job_dict['status'] = ManagedJobStatus(job_dict['status'])
|
|
1451
|
+
if job_dict.get('schedule_state') is not None:
|
|
1452
|
+
job_dict['schedule_state'] = ManagedJobScheduleState(
|
|
1453
|
+
job_dict['schedule_state'])
|
|
1454
|
+
if job_dict.get('job_name') is None:
|
|
1455
|
+
job_dict['job_name'] = job_dict.get('task_name')
|
|
1456
|
+
if job_dict.get('metadata') is not None:
|
|
1457
|
+
job_dict['metadata'] = json.loads(job_dict['metadata'])
|
|
1458
|
+
|
|
1459
|
+
# Add user YAML content for managed jobs.
|
|
1460
|
+
yaml_path = job_dict.get('original_user_yaml_path')
|
|
1461
|
+
if (not fields or 'user_yaml' in fields) and yaml_path:
|
|
1462
|
+
try:
|
|
1463
|
+
with open(yaml_path, 'r', encoding='utf-8') as f:
|
|
1464
|
+
job_dict['user_yaml'] = f.read()
|
|
1465
|
+
except (FileNotFoundError, IOError, OSError):
|
|
1466
|
+
job_dict['user_yaml'] = None
|
|
1467
|
+
else:
|
|
1468
|
+
job_dict['user_yaml'] = None
|
|
1469
|
+
|
|
1470
|
+
jobs.append(job_dict)
|
|
1471
|
+
return jobs, total
|
|
1472
|
+
|
|
1473
|
+
|
|
1203
1474
|
@_init_db
|
|
1204
1475
|
def get_task_name(job_id: int, task_id: int) -> str:
|
|
1205
1476
|
"""Get the task name of a job."""
|
|
@@ -1278,25 +1549,6 @@ def get_pool_from_job_id(job_id: int) -> Optional[str]:
|
|
|
1278
1549
|
return pool[0] if pool else None
|
|
1279
1550
|
|
|
1280
1551
|
|
|
1281
|
-
@_init_db
|
|
1282
|
-
def get_pool_and_submit_info_from_job_ids(
|
|
1283
|
-
job_ids: Set[int]
|
|
1284
|
-
) -> Dict[int, Tuple[Optional[str], Optional[str], Optional[int]]]:
|
|
1285
|
-
"""Get the pool, cluster name, and job id on pool from job id"""
|
|
1286
|
-
assert _SQLALCHEMY_ENGINE is not None
|
|
1287
|
-
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1288
|
-
rows = session.execute(
|
|
1289
|
-
sqlalchemy.select(
|
|
1290
|
-
job_info_table.c.spot_job_id, job_info_table.c.pool,
|
|
1291
|
-
job_info_table.c.current_cluster_name,
|
|
1292
|
-
job_info_table.c.job_id_on_pool_cluster).where(
|
|
1293
|
-
job_info_table.c.spot_job_id.in_(job_ids))).fetchall()
|
|
1294
|
-
return {
|
|
1295
|
-
job_id: (pool, cluster_name, job_id_on_pool_cluster)
|
|
1296
|
-
for job_id, pool, cluster_name, job_id_on_pool_cluster in rows
|
|
1297
|
-
}
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
1552
|
@_init_db
|
|
1301
1553
|
def set_current_cluster_name(job_id: int, current_cluster_name: str) -> None:
|
|
1302
1554
|
"""Set the current cluster name for a job."""
|