skypilot-nightly 1.0.0.dev20250612__py3-none-any.whl → 1.0.0.dev20250613__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +3 -2
- sky/backends/backend_utils.py +8 -2
- sky/benchmark/benchmark_state.py +2 -1
- sky/catalog/data_fetchers/fetch_aws.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +1 -1
- sky/check.py +2 -1
- sky/cli.py +1 -1
- sky/client/cli.py +1 -1
- sky/clouds/cloud.py +1 -1
- sky/clouds/gcp.py +1 -1
- sky/clouds/kubernetes.py +8 -2
- sky/clouds/ssh.py +7 -3
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-208a9812ab4f61c9.js → webpack-5c3e6471d04780c6.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +2 -2
- sky/jobs/state.py +43 -44
- sky/provision/common.py +1 -1
- sky/provision/gcp/config.py +1 -1
- sky/provision/kubernetes/instance.py +2 -1
- sky/provision/kubernetes/utils.py +60 -13
- sky/resources.py +2 -2
- sky/serve/serve_state.py +81 -15
- sky/server/requests/preconditions.py +1 -1
- sky/server/requests/requests.py +11 -6
- sky/skylet/configs.py +26 -19
- sky/skylet/job_lib.py +3 -5
- sky/task.py +1 -1
- sky/templates/kubernetes-ray.yml.j2 +1 -1
- sky/utils/common_utils.py +6 -0
- sky/utils/context.py +1 -1
- sky/utils/infra_utils.py +1 -1
- sky/utils/kubernetes/generate_kubeconfig.sh +1 -1
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/RECORD +54 -54
- /sky/dashboard/out/_next/static/{G3DXdMFu2Jzd-Dody9iq1 → UdgJCk2sZFLJgFJW_qiWG}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{G3DXdMFu2Jzd-Dody9iq1 → UdgJCk2sZFLJgFJW_qiWG}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250612.dist-info → skypilot_nightly-1.0.0.dev20250613.dist-info}/top_level.txt +0 -0
sky/serve/serve_state.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
"""The database for services information."""
|
2
2
|
import collections
|
3
3
|
import enum
|
4
|
+
import functools
|
4
5
|
import json
|
5
6
|
import pathlib
|
6
7
|
import pickle
|
7
8
|
import sqlite3
|
9
|
+
import threading
|
8
10
|
import typing
|
9
11
|
from typing import Any, Dict, List, Optional, Tuple
|
10
12
|
|
@@ -18,19 +20,6 @@ if typing.TYPE_CHECKING:
|
|
18
20
|
from sky.serve import service_spec
|
19
21
|
|
20
22
|
|
21
|
-
def _get_db_path() -> str:
|
22
|
-
"""Workaround to collapse multi-step Path ops for type checker.
|
23
|
-
Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
|
24
|
-
"""
|
25
|
-
path = pathlib.Path(constants.SKYSERVE_METADATA_DIR) / 'services.db'
|
26
|
-
path = path.expanduser().absolute()
|
27
|
-
path.parents[0].mkdir(parents=True, exist_ok=True)
|
28
|
-
return str(path)
|
29
|
-
|
30
|
-
|
31
|
-
_DB_PATH: str = _get_db_path()
|
32
|
-
|
33
|
-
|
34
23
|
def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
|
35
24
|
"""Creates the service and replica tables if they do not exist."""
|
36
25
|
|
@@ -82,7 +71,37 @@ def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
|
|
82
71
|
conn.commit()
|
83
72
|
|
84
73
|
|
85
|
-
|
74
|
+
def _get_db_path() -> str:
|
75
|
+
"""Workaround to collapse multi-step Path ops for type checker.
|
76
|
+
Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
|
77
|
+
"""
|
78
|
+
path = pathlib.Path(constants.SKYSERVE_METADATA_DIR) / 'services.db'
|
79
|
+
path = path.expanduser().absolute()
|
80
|
+
path.parents[0].mkdir(parents=True, exist_ok=True)
|
81
|
+
return str(path)
|
82
|
+
|
83
|
+
|
84
|
+
_DB_PATH = None
|
85
|
+
_db_init_lock = threading.Lock()
|
86
|
+
|
87
|
+
|
88
|
+
def init_db(func):
|
89
|
+
"""Initialize the database."""
|
90
|
+
|
91
|
+
@functools.wraps(func)
|
92
|
+
def wrapper(*args, **kwargs):
|
93
|
+
global _DB_PATH
|
94
|
+
if _DB_PATH is not None:
|
95
|
+
return func(*args, **kwargs)
|
96
|
+
with _db_init_lock:
|
97
|
+
if _DB_PATH is None:
|
98
|
+
_DB_PATH = _get_db_path()
|
99
|
+
db_utils.SQLiteConn(_DB_PATH, create_table)
|
100
|
+
return func(*args, **kwargs)
|
101
|
+
|
102
|
+
return wrapper
|
103
|
+
|
104
|
+
|
86
105
|
_UNIQUE_CONSTRAINT_FAILED_ERROR_MSG = 'UNIQUE constraint failed: services.name'
|
87
106
|
|
88
107
|
|
@@ -247,6 +266,7 @@ _SERVICE_STATUS_TO_COLOR = {
|
|
247
266
|
}
|
248
267
|
|
249
268
|
|
269
|
+
@init_db
|
250
270
|
def add_service(name: str, controller_job_id: int, policy: str,
|
251
271
|
requested_resources_str: str, load_balancing_policy: str,
|
252
272
|
status: ServiceStatus, tls_encrypted: bool) -> bool:
|
@@ -256,6 +276,7 @@ def add_service(name: str, controller_job_id: int, policy: str,
|
|
256
276
|
True if the service is added successfully, False if the service already
|
257
277
|
exists.
|
258
278
|
"""
|
279
|
+
assert _DB_PATH is not None
|
259
280
|
try:
|
260
281
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
261
282
|
cursor.execute(
|
@@ -275,15 +296,19 @@ def add_service(name: str, controller_job_id: int, policy: str,
|
|
275
296
|
return True
|
276
297
|
|
277
298
|
|
299
|
+
@init_db
|
278
300
|
def remove_service(service_name: str) -> None:
|
279
301
|
"""Removes a service from the database."""
|
302
|
+
assert _DB_PATH is not None
|
280
303
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
281
304
|
cursor.execute("""\
|
282
305
|
DELETE FROM services WHERE name=(?)""", (service_name,))
|
283
306
|
|
284
307
|
|
308
|
+
@init_db
|
285
309
|
def set_service_uptime(service_name: str, uptime: int) -> None:
|
286
310
|
"""Sets the uptime of a service."""
|
311
|
+
assert _DB_PATH is not None
|
287
312
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
288
313
|
cursor.execute(
|
289
314
|
"""\
|
@@ -291,11 +316,13 @@ def set_service_uptime(service_name: str, uptime: int) -> None:
|
|
291
316
|
uptime=(?) WHERE name=(?)""", (uptime, service_name))
|
292
317
|
|
293
318
|
|
319
|
+
@init_db
|
294
320
|
def set_service_status_and_active_versions(
|
295
321
|
service_name: str,
|
296
322
|
status: ServiceStatus,
|
297
323
|
active_versions: Optional[List[int]] = None) -> None:
|
298
324
|
"""Sets the service status."""
|
325
|
+
assert _DB_PATH is not None
|
299
326
|
vars_to_set = 'status=(?)'
|
300
327
|
values: Tuple[str, ...] = (status.value, service_name)
|
301
328
|
if active_versions is not None:
|
@@ -308,9 +335,11 @@ def set_service_status_and_active_versions(
|
|
308
335
|
{vars_to_set} WHERE name=(?)""", values)
|
309
336
|
|
310
337
|
|
338
|
+
@init_db
|
311
339
|
def set_service_controller_port(service_name: str,
|
312
340
|
controller_port: int) -> None:
|
313
341
|
"""Sets the controller port of a service."""
|
342
|
+
assert _DB_PATH is not None
|
314
343
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
315
344
|
cursor.execute(
|
316
345
|
"""\
|
@@ -319,9 +348,11 @@ def set_service_controller_port(service_name: str,
|
|
319
348
|
(controller_port, service_name))
|
320
349
|
|
321
350
|
|
351
|
+
@init_db
|
322
352
|
def set_service_load_balancer_port(service_name: str,
|
323
353
|
load_balancer_port: int) -> None:
|
324
354
|
"""Sets the load balancer port of a service."""
|
355
|
+
assert _DB_PATH is not None
|
325
356
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
326
357
|
cursor.execute(
|
327
358
|
"""\
|
@@ -355,8 +386,10 @@ def _get_service_from_row(row) -> Dict[str, Any]:
|
|
355
386
|
}
|
356
387
|
|
357
388
|
|
389
|
+
@init_db
|
358
390
|
def get_services() -> List[Dict[str, Any]]:
|
359
391
|
"""Get all existing service records."""
|
392
|
+
assert _DB_PATH is not None
|
360
393
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
361
394
|
rows = cursor.execute('SELECT v.max_version, s.* FROM services s '
|
362
395
|
'JOIN ('
|
@@ -369,8 +402,10 @@ def get_services() -> List[Dict[str, Any]]:
|
|
369
402
|
return records
|
370
403
|
|
371
404
|
|
405
|
+
@init_db
|
372
406
|
def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
|
373
407
|
"""Get all existing service records."""
|
408
|
+
assert _DB_PATH is not None
|
374
409
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
375
410
|
rows = cursor.execute(
|
376
411
|
'SELECT v.max_version, s.* FROM services s '
|
@@ -384,8 +419,10 @@ def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
|
|
384
419
|
return None
|
385
420
|
|
386
421
|
|
422
|
+
@init_db
|
387
423
|
def get_service_versions(service_name: str) -> List[int]:
|
388
424
|
"""Gets all versions of a service."""
|
425
|
+
assert _DB_PATH is not None
|
389
426
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
390
427
|
rows = cursor.execute(
|
391
428
|
"""\
|
@@ -394,6 +431,7 @@ def get_service_versions(service_name: str) -> List[int]:
|
|
394
431
|
return [row[0] for row in rows]
|
395
432
|
|
396
433
|
|
434
|
+
@init_db
|
397
435
|
def get_glob_service_names(
|
398
436
|
service_names: Optional[List[str]] = None) -> List[str]:
|
399
437
|
"""Get service names matching the glob patterns.
|
@@ -405,6 +443,7 @@ def get_glob_service_names(
|
|
405
443
|
Returns:
|
406
444
|
A list of non-duplicated service names.
|
407
445
|
"""
|
446
|
+
assert _DB_PATH is not None
|
408
447
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
409
448
|
if service_names is None:
|
410
449
|
rows = cursor.execute('SELECT name FROM services').fetchall()
|
@@ -419,9 +458,11 @@ def get_glob_service_names(
|
|
419
458
|
|
420
459
|
|
421
460
|
# === Replica functions ===
|
461
|
+
@init_db
|
422
462
|
def add_or_update_replica(service_name: str, replica_id: int,
|
423
463
|
replica_info: 'replica_managers.ReplicaInfo') -> None:
|
424
464
|
"""Adds a replica to the database."""
|
465
|
+
assert _DB_PATH is not None
|
425
466
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
426
467
|
cursor.execute(
|
427
468
|
"""\
|
@@ -431,8 +472,10 @@ def add_or_update_replica(service_name: str, replica_id: int,
|
|
431
472
|
(service_name, replica_id, pickle.dumps(replica_info)))
|
432
473
|
|
433
474
|
|
475
|
+
@init_db
|
434
476
|
def remove_replica(service_name: str, replica_id: int) -> None:
|
435
477
|
"""Removes a replica from the database."""
|
478
|
+
assert _DB_PATH is not None
|
436
479
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
437
480
|
cursor.execute(
|
438
481
|
"""\
|
@@ -441,10 +484,12 @@ def remove_replica(service_name: str, replica_id: int) -> None:
|
|
441
484
|
AND replica_id=(?)""", (service_name, replica_id))
|
442
485
|
|
443
486
|
|
487
|
+
@init_db
|
444
488
|
def get_replica_info_from_id(
|
445
489
|
service_name: str,
|
446
490
|
replica_id: int) -> Optional['replica_managers.ReplicaInfo']:
|
447
491
|
"""Gets a replica info from the database."""
|
492
|
+
assert _DB_PATH is not None
|
448
493
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
449
494
|
rows = cursor.execute(
|
450
495
|
"""\
|
@@ -456,9 +501,11 @@ def get_replica_info_from_id(
|
|
456
501
|
return None
|
457
502
|
|
458
503
|
|
504
|
+
@init_db
|
459
505
|
def get_replica_infos(
|
460
506
|
service_name: str) -> List['replica_managers.ReplicaInfo']:
|
461
507
|
"""Gets all replica infos of a service."""
|
508
|
+
assert _DB_PATH is not None
|
462
509
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
463
510
|
rows = cursor.execute(
|
464
511
|
"""\
|
@@ -467,8 +514,10 @@ def get_replica_infos(
|
|
467
514
|
return [pickle.loads(row[0]) for row in rows]
|
468
515
|
|
469
516
|
|
517
|
+
@init_db
|
470
518
|
def total_number_provisioning_replicas() -> int:
|
471
519
|
"""Returns the total number of provisioning replicas."""
|
520
|
+
assert _DB_PATH is not None
|
472
521
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
473
522
|
rows = cursor.execute('SELECT replica_info FROM replicas').fetchall()
|
474
523
|
provisioning_count = 0
|
@@ -488,9 +537,10 @@ def get_replicas_at_status(
|
|
488
537
|
|
489
538
|
|
490
539
|
# === Version functions ===
|
540
|
+
@init_db
|
491
541
|
def add_version(service_name: str) -> int:
|
492
542
|
"""Adds a version to the database."""
|
493
|
-
|
543
|
+
assert _DB_PATH is not None
|
494
544
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
495
545
|
cursor.execute(
|
496
546
|
"""\
|
@@ -507,8 +557,10 @@ def add_version(service_name: str) -> int:
|
|
507
557
|
return inserted_version
|
508
558
|
|
509
559
|
|
560
|
+
@init_db
|
510
561
|
def add_or_update_version(service_name: str, version: int,
|
511
562
|
spec: 'service_spec.SkyServiceSpec') -> None:
|
563
|
+
assert _DB_PATH is not None
|
512
564
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
513
565
|
cursor.execute(
|
514
566
|
"""\
|
@@ -517,8 +569,10 @@ def add_or_update_version(service_name: str, version: int,
|
|
517
569
|
VALUES (?, ?, ?)""", (service_name, version, pickle.dumps(spec)))
|
518
570
|
|
519
571
|
|
572
|
+
@init_db
|
520
573
|
def remove_service_versions(service_name: str) -> None:
|
521
574
|
"""Removes a replica from the database."""
|
575
|
+
assert _DB_PATH is not None
|
522
576
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
523
577
|
cursor.execute(
|
524
578
|
"""\
|
@@ -526,9 +580,11 @@ def remove_service_versions(service_name: str) -> None:
|
|
526
580
|
WHERE service_name=(?)""", (service_name,))
|
527
581
|
|
528
582
|
|
583
|
+
@init_db
|
529
584
|
def get_spec(service_name: str,
|
530
585
|
version: int) -> Optional['service_spec.SkyServiceSpec']:
|
531
586
|
"""Gets spec from the database."""
|
587
|
+
assert _DB_PATH is not None
|
532
588
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
533
589
|
rows = cursor.execute(
|
534
590
|
"""\
|
@@ -540,8 +596,10 @@ def get_spec(service_name: str,
|
|
540
596
|
return None
|
541
597
|
|
542
598
|
|
599
|
+
@init_db
|
543
600
|
def delete_version(service_name: str, version: int) -> None:
|
544
601
|
"""Deletes a version from the database."""
|
602
|
+
assert _DB_PATH is not None
|
545
603
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
546
604
|
cursor.execute(
|
547
605
|
"""\
|
@@ -550,8 +608,10 @@ def delete_version(service_name: str, version: int) -> None:
|
|
550
608
|
AND version=(?)""", (service_name, version))
|
551
609
|
|
552
610
|
|
611
|
+
@init_db
|
553
612
|
def delete_all_versions(service_name: str) -> None:
|
554
613
|
"""Deletes all versions from the database."""
|
614
|
+
assert _DB_PATH is not None
|
555
615
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
556
616
|
cursor.execute(
|
557
617
|
"""\
|
@@ -559,7 +619,9 @@ def delete_all_versions(service_name: str) -> None:
|
|
559
619
|
WHERE service_name=(?)""", (service_name,))
|
560
620
|
|
561
621
|
|
622
|
+
@init_db
|
562
623
|
def get_latest_version(service_name: str) -> Optional[int]:
|
624
|
+
assert _DB_PATH is not None
|
563
625
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
564
626
|
rows = cursor.execute(
|
565
627
|
"""\
|
@@ -570,8 +632,10 @@ def get_latest_version(service_name: str) -> Optional[int]:
|
|
570
632
|
return rows[0][0]
|
571
633
|
|
572
634
|
|
635
|
+
@init_db
|
573
636
|
def get_service_controller_port(service_name: str) -> int:
|
574
637
|
"""Gets the controller port of a service."""
|
638
|
+
assert _DB_PATH is not None
|
575
639
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
576
640
|
cursor.execute('SELECT controller_port FROM services WHERE name = ?',
|
577
641
|
(service_name,))
|
@@ -581,8 +645,10 @@ def get_service_controller_port(service_name: str) -> int:
|
|
581
645
|
return row[0]
|
582
646
|
|
583
647
|
|
648
|
+
@init_db
|
584
649
|
def get_service_load_balancer_port(service_name: str) -> int:
|
585
650
|
"""Gets the load balancer port of a service."""
|
651
|
+
assert _DB_PATH is not None
|
586
652
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
587
653
|
cursor.execute('SELECT load_balancer_port FROM services WHERE name = ?',
|
588
654
|
(service_name,))
|
@@ -169,6 +169,6 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
169
169
|
include_request_names=['sky.launch', 'sky.start'],
|
170
170
|
cluster_names=[self.cluster_name])
|
171
171
|
if len(requests) == 0:
|
172
|
-
# No
|
172
|
+
# No running or pending tasks, the start process is done.
|
173
173
|
return True, None
|
174
174
|
return False, f'Waiting for cluster {self.cluster_name} to be UP.'
|
sky/server/requests/requests.py
CHANGED
@@ -9,6 +9,7 @@ import pathlib
|
|
9
9
|
import shutil
|
10
10
|
import signal
|
11
11
|
import sqlite3
|
12
|
+
import threading
|
12
13
|
import time
|
13
14
|
import traceback
|
14
15
|
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple
|
@@ -392,10 +393,6 @@ def kill_requests(request_ids: Optional[List[str]] = None,
|
|
392
393
|
return cancelled_request_ids
|
393
394
|
|
394
395
|
|
395
|
-
_DB_PATH = os.path.expanduser(server_constants.API_SERVER_REQUEST_DB_PATH)
|
396
|
-
pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
|
397
|
-
|
398
|
-
|
399
396
|
def create_table(cursor, conn):
|
400
397
|
# Enable WAL mode to avoid locking issues.
|
401
398
|
# See: issue #1441 and PR #1509
|
@@ -433,6 +430,7 @@ def create_table(cursor, conn):
|
|
433
430
|
|
434
431
|
|
435
432
|
_DB = None
|
433
|
+
_init_db_lock = threading.Lock()
|
436
434
|
|
437
435
|
|
438
436
|
def init_db(func):
|
@@ -441,8 +439,15 @@ def init_db(func):
|
|
441
439
|
@functools.wraps(func)
|
442
440
|
def wrapper(*args, **kwargs):
|
443
441
|
global _DB
|
444
|
-
if _DB is None:
|
445
|
-
|
442
|
+
if _DB is not None:
|
443
|
+
return func(*args, **kwargs)
|
444
|
+
with _init_db_lock:
|
445
|
+
if _DB is None:
|
446
|
+
db_path = os.path.expanduser(
|
447
|
+
server_constants.API_SERVER_REQUEST_DB_PATH)
|
448
|
+
pathlib.Path(db_path).parents[0].mkdir(parents=True,
|
449
|
+
exist_ok=True)
|
450
|
+
_DB = db_utils.SQLiteConn(db_path, create_table)
|
446
451
|
return func(*args, **kwargs)
|
447
452
|
|
448
453
|
return wrapper
|
sky/skylet/configs.py
CHANGED
@@ -2,17 +2,16 @@
|
|
2
2
|
import functools
|
3
3
|
import os
|
4
4
|
import pathlib
|
5
|
+
import threading
|
5
6
|
from typing import Callable, Optional, Union
|
6
7
|
|
7
8
|
from sky.utils import db_utils
|
8
9
|
|
9
|
-
_DB_PATH =
|
10
|
-
|
10
|
+
_DB_PATH = None
|
11
|
+
_db_init_lock = threading.Lock()
|
11
12
|
|
12
|
-
_table_created = False
|
13
13
|
|
14
|
-
|
15
|
-
def ensure_table(func: Callable):
|
14
|
+
def init_db(func: Callable):
|
16
15
|
"""Ensure the table exists before calling the function.
|
17
16
|
|
18
17
|
Since this module will be imported whenever `sky` is imported (due to
|
@@ -24,25 +23,32 @@ def ensure_table(func: Callable):
|
|
24
23
|
|
25
24
|
@functools.wraps(func)
|
26
25
|
def wrapper(*args, **kwargs):
|
27
|
-
global
|
28
|
-
if not
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
26
|
+
global _DB_PATH
|
27
|
+
if _DB_PATH is not None:
|
28
|
+
return func(*args, **kwargs)
|
29
|
+
|
30
|
+
with _db_init_lock:
|
31
|
+
if _DB_PATH is None:
|
32
|
+
_DB_PATH = os.path.expanduser('~/.sky/skylet_config.db')
|
33
|
+
os.makedirs(pathlib.Path(_DB_PATH).parents[0], exist_ok=True)
|
34
|
+
with db_utils.safe_cursor(
|
35
|
+
_DB_PATH
|
36
|
+
) as c: # Call it 'c' to avoid pylint complaining.
|
37
|
+
# Use WAL mode to avoid locking problem in #1507.
|
38
|
+
# Reference: https://stackoverflow.com/a/39265148
|
39
|
+
c.execute('PRAGMA journal_mode=WAL')
|
40
|
+
c.execute("""\
|
41
|
+
CREATE TABLE IF NOT EXISTS config (
|
42
|
+
key TEXT PRIMARY KEY,
|
43
|
+
value TEXT)""")
|
39
44
|
return func(*args, **kwargs)
|
40
45
|
|
41
46
|
return wrapper
|
42
47
|
|
43
48
|
|
44
|
-
@
|
49
|
+
@init_db
|
45
50
|
def get_config(key: str) -> Optional[bytes]:
|
51
|
+
assert _DB_PATH is not None
|
46
52
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
47
53
|
rows = cursor.execute('SELECT value FROM config WHERE key = ?', (key,))
|
48
54
|
for (value,) in rows:
|
@@ -50,8 +56,9 @@ def get_config(key: str) -> Optional[bytes]:
|
|
50
56
|
return None
|
51
57
|
|
52
58
|
|
53
|
-
@
|
59
|
+
@init_db
|
54
60
|
def set_config(key: str, value: Union[bytes, str]) -> None:
|
61
|
+
assert _DB_PATH is not None
|
55
62
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
56
63
|
cursor.execute(
|
57
64
|
"""\
|
sky/skylet/job_lib.py
CHANGED
@@ -64,10 +64,6 @@ class JobInfoLoc(enum.IntEnum):
|
|
64
64
|
PID = 9
|
65
65
|
|
66
66
|
|
67
|
-
_DB_PATH = os.path.expanduser('~/.sky/jobs.db')
|
68
|
-
os.makedirs(pathlib.Path(_DB_PATH).parents[0], exist_ok=True)
|
69
|
-
|
70
|
-
|
71
67
|
def create_table(cursor, conn):
|
72
68
|
# Enable WAL mode to avoid locking issues.
|
73
69
|
# See: issue #3863, #1441 and PR #1509
|
@@ -136,7 +132,9 @@ def init_db(func):
|
|
136
132
|
|
137
133
|
with _db_init_lock:
|
138
134
|
if _DB is None:
|
139
|
-
|
135
|
+
db_path = os.path.expanduser('~/.sky/jobs.db')
|
136
|
+
os.makedirs(pathlib.Path(db_path).parents[0], exist_ok=True)
|
137
|
+
_DB = db_utils.SQLiteConn(db_path, create_table)
|
140
138
|
return func(*args, **kwargs)
|
141
139
|
|
142
140
|
return wrapper
|
sky/task.py
CHANGED
@@ -911,7 +911,7 @@ class Task:
|
|
911
911
|
|
912
912
|
Different from set_file_mounts(), this function updates into the
|
913
913
|
existing file_mounts (calls ``dict.update()``), rather than
|
914
|
-
|
914
|
+
overwriting it.
|
915
915
|
|
916
916
|
This should be called before provisioning in order to take effect.
|
917
917
|
|
@@ -740,7 +740,7 @@ available_node_types:
|
|
740
740
|
spec:
|
741
741
|
securityContext:
|
742
742
|
fsGroup: 1000
|
743
|
-
# To prevent the home dir provided by the docker image from being
|
743
|
+
# To prevent the home dir provided by the docker image from being overridden by pvc mounting,
|
744
744
|
# we use initContainers to copy it first to /mnt/home, which will later be mounted to home dir.
|
745
745
|
initContainers:
|
746
746
|
- name: init-copy-home
|
sky/utils/common_utils.py
CHANGED
@@ -1017,3 +1017,9 @@ def _get_cgroup_memory_limit() -> Optional[int]:
|
|
1017
1017
|
def _is_cgroup_v2() -> bool:
|
1018
1018
|
"""Return True if the environment is running cgroup v2."""
|
1019
1019
|
return os.path.isfile('/sys/fs/cgroup/cgroup.controllers')
|
1020
|
+
|
1021
|
+
|
1022
|
+
def removeprefix(string: str, prefix: str) -> str:
|
1023
|
+
if string.startswith(prefix):
|
1024
|
+
return string[len(prefix):]
|
1025
|
+
return string
|
sky/utils/context.py
CHANGED
@@ -19,7 +19,7 @@ class Context(object):
|
|
19
19
|
This is a wrapper around `contextvars.ContextVar` that provides a typed
|
20
20
|
interface for the SkyPilot specific context variables that can be accessed
|
21
21
|
at any layer of the call stack. ContextVar is coroutine local, an empty
|
22
|
-
Context will be
|
22
|
+
Context will be initialized for each coroutine when it is created.
|
23
23
|
|
24
24
|
Adding a new context variable for a new feature is as simple as:
|
25
25
|
1. Add a new instance variable to the Context class.
|
sky/utils/infra_utils.py
CHANGED
@@ -180,7 +180,7 @@ class InfraInfo:
|
|
180
180
|
# Node Pools.
|
181
181
|
# TODO(romilb): This is a workaround while we use the global
|
182
182
|
# kubeconfig to store the ssh contexts.
|
183
|
-
region_or_zone = self.region
|
183
|
+
region_or_zone = common_utils.removeprefix(self.region, 'ssh-')
|
184
184
|
|
185
185
|
if region_or_zone is not None and truncate:
|
186
186
|
region_or_zone = common_utils.truncate_long_string(
|
@@ -316,7 +316,7 @@ EOF
|
|
316
316
|
echo "---
|
317
317
|
Done!
|
318
318
|
|
319
|
-
Kubeconfig using service
|
319
|
+
Kubeconfig using service account '${SKYPILOT_SA}' in namespace '${NAMESPACE}' written at $(pwd)/kubeconfig
|
320
320
|
|
321
321
|
Copy the generated kubeconfig file to your ~/.kube/ directory to use it with
|
322
322
|
kubectl and skypilot:
|