skypilot-nightly 1.0.0.dev20250916__py3-none-any.whl → 1.0.0.dev20250918__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +68 -4
- sky/authentication.py +25 -0
- sky/backends/__init__.py +3 -2
- sky/backends/backend_utils.py +16 -12
- sky/backends/cloud_vm_ray_backend.py +57 -0
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/primeintellect.py +314 -0
- sky/core.py +10 -3
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/3015-ba5be550eb80fd8c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6856-e0754534b3015377.js → 6856-9a2538f38c004652.js} +1 -1
- sky/dashboard/out/_next/static/chunks/8969-a3e3f0683e19d340.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +6 -0
- sky/dashboard/out/_next/static/chunks/{webpack-05f82d90d6fd7f82.js → webpack-487697b47d8c5e50.js} +1 -1
- sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → k1mo5xWZrV9djgjd0moOT}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +42 -34
- sky/jobs/server/server.py +14 -1
- sky/jobs/state.py +26 -1
- sky/provision/__init__.py +1 -0
- sky/provision/docker_utils.py +6 -2
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/resources.py +9 -1
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_utils.py +29 -12
- sky/serve/server/core.py +37 -19
- sky/serve/server/impl.py +221 -129
- sky/server/requests/executor.py +3 -0
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +5 -3
- sky/skylet/services.py +98 -0
- sky/skylet/skylet.py +3 -1
- sky/templates/kubernetes-ray.yml.j2 +22 -12
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/METADATA +37 -36
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/RECORD +64 -52
- sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-0487dfbf149d9e53.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +0 -6
- /sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → k1mo5xWZrV9djgjd0moOT}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/top_level.txt +0 -0
sky/serve/server/impl.py
CHANGED
|
@@ -5,6 +5,7 @@ import shlex
|
|
|
5
5
|
import signal
|
|
6
6
|
import tempfile
|
|
7
7
|
import threading
|
|
8
|
+
import typing
|
|
8
9
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
9
10
|
import uuid
|
|
10
11
|
|
|
@@ -17,10 +18,12 @@ from sky import execution
|
|
|
17
18
|
from sky import sky_logging
|
|
18
19
|
from sky import skypilot_config
|
|
19
20
|
from sky import task as task_lib
|
|
21
|
+
from sky.adaptors import common as adaptors_common
|
|
20
22
|
from sky.backends import backend_utils
|
|
21
23
|
from sky.catalog import common as service_catalog_common
|
|
22
24
|
from sky.data import storage as storage_lib
|
|
23
25
|
from sky.serve import constants as serve_constants
|
|
26
|
+
from sky.serve import serve_rpc_utils
|
|
24
27
|
from sky.serve import serve_state
|
|
25
28
|
from sky.serve import serve_utils
|
|
26
29
|
from sky.skylet import constants
|
|
@@ -36,6 +39,11 @@ from sky.utils import subprocess_utils
|
|
|
36
39
|
from sky.utils import ux_utils
|
|
37
40
|
from sky.utils import yaml_utils
|
|
38
41
|
|
|
42
|
+
if typing.TYPE_CHECKING:
|
|
43
|
+
import grpc
|
|
44
|
+
else:
|
|
45
|
+
grpc = adaptors_common.LazyImport('grpc')
|
|
46
|
+
|
|
39
47
|
logger = sky_logging.init_logger(__name__)
|
|
40
48
|
|
|
41
49
|
|
|
@@ -78,24 +86,35 @@ def _get_service_record(
|
|
|
78
86
|
"""Get the service record."""
|
|
79
87
|
noun = 'pool' if pool else 'service'
|
|
80
88
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
84
|
-
handle,
|
|
85
|
-
code,
|
|
86
|
-
require_outputs=True,
|
|
87
|
-
stream_logs=False,
|
|
88
|
-
separate_stderr=True)
|
|
89
|
-
try:
|
|
90
|
-
subprocess_utils.handle_returncode(returncode,
|
|
91
|
-
code,
|
|
92
|
-
f'Failed to get {noun} status',
|
|
93
|
-
stderr,
|
|
94
|
-
stream_logs=True)
|
|
95
|
-
except exceptions.CommandError as e:
|
|
96
|
-
raise RuntimeError(e.error_msg) from e
|
|
89
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
90
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
97
91
|
|
|
98
|
-
|
|
92
|
+
if handle.is_grpc_enabled_with_flag:
|
|
93
|
+
try:
|
|
94
|
+
service_statuses = serve_rpc_utils.RpcRunner.get_service_status(
|
|
95
|
+
handle, [service_name], pool)
|
|
96
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
97
|
+
use_legacy = True
|
|
98
|
+
|
|
99
|
+
if use_legacy:
|
|
100
|
+
code = serve_utils.ServeCodeGen.get_service_status([service_name],
|
|
101
|
+
pool=pool)
|
|
102
|
+
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
103
|
+
handle,
|
|
104
|
+
code,
|
|
105
|
+
require_outputs=True,
|
|
106
|
+
stream_logs=False,
|
|
107
|
+
separate_stderr=True)
|
|
108
|
+
try:
|
|
109
|
+
subprocess_utils.handle_returncode(returncode,
|
|
110
|
+
code,
|
|
111
|
+
f'Failed to get {noun} status',
|
|
112
|
+
stderr,
|
|
113
|
+
stream_logs=True)
|
|
114
|
+
except exceptions.CommandError as e:
|
|
115
|
+
raise RuntimeError(e.error_msg) from e
|
|
116
|
+
|
|
117
|
+
service_statuses = serve_utils.load_service_status(serve_status_payload)
|
|
99
118
|
|
|
100
119
|
assert len(service_statuses) <= 1, service_statuses
|
|
101
120
|
if not service_statuses:
|
|
@@ -287,30 +306,44 @@ def up(
|
|
|
287
306
|
fore = colorama.Fore
|
|
288
307
|
|
|
289
308
|
assert controller_job_id is not None and controller_handle is not None
|
|
309
|
+
assert isinstance(controller_handle, backends.CloudVmRayResourceHandle)
|
|
310
|
+
backend = backend_utils.get_backend_from_handle(controller_handle)
|
|
311
|
+
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
290
312
|
# TODO(tian): Cache endpoint locally to speedup. Endpoint won't
|
|
291
313
|
# change after the first time, so there is no consistency issue.
|
|
292
|
-
with rich_utils.safe_status(
|
|
293
|
-
ux_utils.spinner_message(
|
|
294
|
-
f'Waiting for the {noun} to register')):
|
|
295
|
-
# This function will check the controller job id in the database
|
|
296
|
-
# and return the endpoint if the job id matches. Otherwise it will
|
|
297
|
-
# return None.
|
|
298
|
-
code = serve_utils.ServeCodeGen.wait_service_registration(
|
|
299
|
-
service_name, controller_job_id, pool)
|
|
300
|
-
backend = backend_utils.get_backend_from_handle(controller_handle)
|
|
301
|
-
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
302
|
-
assert isinstance(controller_handle,
|
|
303
|
-
backends.CloudVmRayResourceHandle)
|
|
304
|
-
returncode, lb_port_payload, _ = backend.run_on_head(
|
|
305
|
-
controller_handle,
|
|
306
|
-
code,
|
|
307
|
-
require_outputs=True,
|
|
308
|
-
stream_logs=False)
|
|
309
314
|
try:
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
315
|
+
with rich_utils.safe_status(
|
|
316
|
+
ux_utils.spinner_message(
|
|
317
|
+
f'Waiting for the {noun} to register')):
|
|
318
|
+
# This function will check the controller job id in the database
|
|
319
|
+
# and return the endpoint if the job id matches. Otherwise it
|
|
320
|
+
# will return None.
|
|
321
|
+
use_legacy = not controller_handle.is_grpc_enabled_with_flag
|
|
322
|
+
|
|
323
|
+
if controller_handle.is_grpc_enabled_with_flag:
|
|
324
|
+
try:
|
|
325
|
+
lb_port = serve_rpc_utils.RpcRunner.wait_service_registration( # pylint: disable=line-too-long
|
|
326
|
+
controller_handle, service_name, controller_job_id,
|
|
327
|
+
pool)
|
|
328
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
329
|
+
use_legacy = True
|
|
330
|
+
|
|
331
|
+
if use_legacy:
|
|
332
|
+
code = serve_utils.ServeCodeGen.wait_service_registration(
|
|
333
|
+
service_name, controller_job_id, pool)
|
|
334
|
+
returncode, lb_port_payload, _ = backend.run_on_head(
|
|
335
|
+
controller_handle,
|
|
336
|
+
code,
|
|
337
|
+
require_outputs=True,
|
|
338
|
+
stream_logs=False)
|
|
339
|
+
subprocess_utils.handle_returncode(
|
|
340
|
+
returncode, code,
|
|
341
|
+
f'Failed to wait for {noun} initialization',
|
|
342
|
+
lb_port_payload)
|
|
343
|
+
lb_port = serve_utils.load_service_initialization_result(
|
|
344
|
+
lb_port_payload)
|
|
345
|
+
except (exceptions.CommandError, grpc.FutureTimeoutError,
|
|
346
|
+
grpc.RpcError):
|
|
314
347
|
if serve_utils.is_consolidation_mode(pool):
|
|
315
348
|
with ux_utils.print_exception_no_traceback():
|
|
316
349
|
raise RuntimeError(
|
|
@@ -344,8 +377,6 @@ def up(
|
|
|
344
377
|
'Failed to spin up the service. Please '
|
|
345
378
|
'check the logs above for more details.') from None
|
|
346
379
|
else:
|
|
347
|
-
lb_port = serve_utils.load_service_initialization_result(
|
|
348
|
-
lb_port_payload)
|
|
349
380
|
if not serve_utils.is_consolidation_mode(pool) and not pool:
|
|
350
381
|
socket_endpoint = backend_utils.get_endpoints(
|
|
351
382
|
controller_handle.cluster_name,
|
|
@@ -461,6 +492,7 @@ def update(
|
|
|
461
492
|
f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
|
|
462
493
|
)
|
|
463
494
|
|
|
495
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
464
496
|
backend = backend_utils.get_backend_from_handle(handle)
|
|
465
497
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
466
498
|
|
|
@@ -503,29 +535,39 @@ def update(
|
|
|
503
535
|
controller_utils.maybe_translate_local_file_mounts_and_sync_up(
|
|
504
536
|
task, task_type='serve')
|
|
505
537
|
|
|
506
|
-
|
|
507
|
-
returncode, version_string_payload, stderr = backend.run_on_head(
|
|
508
|
-
handle,
|
|
509
|
-
code,
|
|
510
|
-
require_outputs=True,
|
|
511
|
-
stream_logs=False,
|
|
512
|
-
separate_stderr=True)
|
|
513
|
-
try:
|
|
514
|
-
subprocess_utils.handle_returncode(returncode,
|
|
515
|
-
code,
|
|
516
|
-
'Failed to add version',
|
|
517
|
-
stderr,
|
|
518
|
-
stream_logs=True)
|
|
519
|
-
except exceptions.CommandError as e:
|
|
520
|
-
raise RuntimeError(e.error_msg) from e
|
|
538
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
521
539
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
540
|
+
if handle.is_grpc_enabled_with_flag:
|
|
541
|
+
try:
|
|
542
|
+
current_version = serve_rpc_utils.RpcRunner.add_version(
|
|
543
|
+
handle, service_name)
|
|
544
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
545
|
+
use_legacy = True
|
|
546
|
+
|
|
547
|
+
if use_legacy:
|
|
548
|
+
code = serve_utils.ServeCodeGen.add_version(service_name)
|
|
549
|
+
returncode, version_string_payload, stderr = backend.run_on_head(
|
|
550
|
+
handle,
|
|
551
|
+
code,
|
|
552
|
+
require_outputs=True,
|
|
553
|
+
stream_logs=False,
|
|
554
|
+
separate_stderr=True)
|
|
555
|
+
try:
|
|
556
|
+
subprocess_utils.handle_returncode(returncode,
|
|
557
|
+
code,
|
|
558
|
+
'Failed to add version',
|
|
559
|
+
stderr,
|
|
560
|
+
stream_logs=True)
|
|
561
|
+
except exceptions.CommandError as e:
|
|
562
|
+
raise RuntimeError(e.error_msg) from e
|
|
563
|
+
|
|
564
|
+
version_string = serve_utils.load_version_string(version_string_payload)
|
|
565
|
+
try:
|
|
566
|
+
current_version = int(version_string)
|
|
567
|
+
except ValueError as e:
|
|
568
|
+
with ux_utils.print_exception_no_traceback():
|
|
569
|
+
raise ValueError(f'Failed to parse version: {version_string}; '
|
|
570
|
+
f'Returncode: {returncode}') from e
|
|
529
571
|
|
|
530
572
|
with tempfile.NamedTemporaryFile(
|
|
531
573
|
prefix=f'{service_name}-v{current_version}',
|
|
@@ -540,23 +582,33 @@ def update(
|
|
|
540
582
|
{remote_task_yaml_path: service_file.name},
|
|
541
583
|
storage_mounts=None)
|
|
542
584
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
585
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
586
|
+
|
|
587
|
+
if handle.is_grpc_enabled_with_flag:
|
|
588
|
+
try:
|
|
589
|
+
serve_rpc_utils.RpcRunner.update_service(
|
|
590
|
+
handle, service_name, current_version, mode, pool)
|
|
591
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
592
|
+
use_legacy = True
|
|
593
|
+
|
|
594
|
+
if use_legacy:
|
|
595
|
+
code = serve_utils.ServeCodeGen.update_service(service_name,
|
|
596
|
+
current_version,
|
|
597
|
+
mode=mode.value,
|
|
598
|
+
pool=pool)
|
|
599
|
+
returncode, _, stderr = backend.run_on_head(handle,
|
|
600
|
+
code,
|
|
601
|
+
require_outputs=True,
|
|
602
|
+
stream_logs=False,
|
|
603
|
+
separate_stderr=True)
|
|
604
|
+
try:
|
|
605
|
+
subprocess_utils.handle_returncode(returncode,
|
|
606
|
+
code,
|
|
607
|
+
f'Failed to update {noun}s',
|
|
608
|
+
stderr,
|
|
609
|
+
stream_logs=True)
|
|
610
|
+
except exceptions.CommandError as e:
|
|
611
|
+
raise RuntimeError(e.error_msg) from e
|
|
560
612
|
|
|
561
613
|
cmd = 'sky jobs pool status' if pool else 'sky serve status'
|
|
562
614
|
logger.info(
|
|
@@ -619,29 +671,44 @@ def down(
|
|
|
619
671
|
raise ValueError(f'Can only specify one of {noun}_names or all. '
|
|
620
672
|
f'Provided {argument_str!r}.')
|
|
621
673
|
|
|
622
|
-
backend = backend_utils.get_backend_from_handle(handle)
|
|
623
|
-
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
624
674
|
service_names = None if all else service_names
|
|
625
|
-
code = serve_utils.ServeCodeGen.terminate_services(service_names, purge,
|
|
626
|
-
pool)
|
|
627
675
|
|
|
628
676
|
try:
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
677
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
678
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
679
|
+
|
|
680
|
+
if handle.is_grpc_enabled_with_flag:
|
|
681
|
+
try:
|
|
682
|
+
stdout = serve_rpc_utils.RpcRunner.terminate_services(
|
|
683
|
+
handle, service_names, purge, pool)
|
|
684
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
685
|
+
use_legacy = True
|
|
686
|
+
|
|
687
|
+
if use_legacy:
|
|
688
|
+
backend = backend_utils.get_backend_from_handle(handle)
|
|
689
|
+
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
690
|
+
code = serve_utils.ServeCodeGen.terminate_services(
|
|
691
|
+
service_names, purge, pool)
|
|
692
|
+
|
|
693
|
+
returncode, stdout, _ = backend.run_on_head(handle,
|
|
694
|
+
code,
|
|
695
|
+
require_outputs=True,
|
|
696
|
+
stream_logs=False)
|
|
697
|
+
|
|
698
|
+
subprocess_utils.handle_returncode(returncode, code,
|
|
699
|
+
f'Failed to terminate {noun}',
|
|
700
|
+
stdout)
|
|
633
701
|
except exceptions.FetchClusterInfoError as e:
|
|
634
702
|
raise RuntimeError(
|
|
635
703
|
'Failed to fetch controller IP. Please refresh controller status '
|
|
636
|
-
f'by `sky status -r {controller_type.value.cluster_name}` '
|
|
637
|
-
'
|
|
638
|
-
|
|
639
|
-
try:
|
|
640
|
-
subprocess_utils.handle_returncode(returncode, code,
|
|
641
|
-
f'Failed to terminate {noun}',
|
|
642
|
-
stdout)
|
|
704
|
+
f'by `sky status -r {controller_type.value.cluster_name}` and try '
|
|
705
|
+
'again.') from e
|
|
643
706
|
except exceptions.CommandError as e:
|
|
644
707
|
raise RuntimeError(e.error_msg) from e
|
|
708
|
+
except grpc.RpcError as e:
|
|
709
|
+
raise RuntimeError(f'{e.details()} ({e.code()})') from e
|
|
710
|
+
except grpc.FutureTimeoutError as e:
|
|
711
|
+
raise RuntimeError('gRPC timed out') from e
|
|
645
712
|
|
|
646
713
|
logger.info(stdout)
|
|
647
714
|
|
|
@@ -669,27 +736,40 @@ def status(
|
|
|
669
736
|
stopped_message=controller_type.value.default_hint_if_non_existent.
|
|
670
737
|
replace('service', noun))
|
|
671
738
|
|
|
672
|
-
|
|
673
|
-
|
|
739
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
740
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
674
741
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
742
|
+
if handle.is_grpc_enabled_with_flag:
|
|
743
|
+
try:
|
|
744
|
+
service_records = serve_rpc_utils.RpcRunner.get_service_status(
|
|
745
|
+
handle, service_names, pool)
|
|
746
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
747
|
+
use_legacy = True
|
|
748
|
+
|
|
749
|
+
if use_legacy:
|
|
750
|
+
backend = backend_utils.get_backend_from_handle(handle)
|
|
751
|
+
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
752
|
+
|
|
753
|
+
code = serve_utils.ServeCodeGen.get_service_status(service_names,
|
|
754
|
+
pool=pool)
|
|
755
|
+
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
756
|
+
handle,
|
|
757
|
+
code,
|
|
758
|
+
require_outputs=True,
|
|
759
|
+
stream_logs=False,
|
|
760
|
+
separate_stderr=True)
|
|
682
761
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
762
|
+
try:
|
|
763
|
+
subprocess_utils.handle_returncode(returncode,
|
|
764
|
+
code,
|
|
765
|
+
f'Failed to fetch {noun}s',
|
|
766
|
+
stderr,
|
|
767
|
+
stream_logs=True)
|
|
768
|
+
except exceptions.CommandError as e:
|
|
769
|
+
raise RuntimeError(e.error_msg) from e
|
|
770
|
+
|
|
771
|
+
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
691
772
|
|
|
692
|
-
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
693
773
|
# Get the endpoint for each service
|
|
694
774
|
for service_record in service_records:
|
|
695
775
|
service_record['endpoint'] = None
|
|
@@ -792,25 +872,37 @@ def _get_all_replica_targets(
|
|
|
792
872
|
handle: backends.CloudVmRayResourceHandle,
|
|
793
873
|
pool: bool) -> Set[serve_utils.ServiceComponentTarget]:
|
|
794
874
|
"""Helper function to get targets for all live replicas."""
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
798
|
-
handle,
|
|
799
|
-
code,
|
|
800
|
-
require_outputs=True,
|
|
801
|
-
stream_logs=False,
|
|
802
|
-
separate_stderr=True)
|
|
875
|
+
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
876
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
803
877
|
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
878
|
+
if handle.is_grpc_enabled_with_flag:
|
|
879
|
+
try:
|
|
880
|
+
service_records = serve_rpc_utils.RpcRunner.get_service_status(
|
|
881
|
+
handle, [service_name], pool)
|
|
882
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
883
|
+
use_legacy = True
|
|
884
|
+
|
|
885
|
+
if use_legacy:
|
|
886
|
+
code = serve_utils.ServeCodeGen.get_service_status([service_name],
|
|
887
|
+
pool=pool)
|
|
888
|
+
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
889
|
+
handle,
|
|
890
|
+
code,
|
|
891
|
+
require_outputs=True,
|
|
892
|
+
stream_logs=False,
|
|
893
|
+
separate_stderr=True)
|
|
894
|
+
|
|
895
|
+
try:
|
|
896
|
+
subprocess_utils.handle_returncode(returncode,
|
|
897
|
+
code,
|
|
898
|
+
'Failed to fetch services',
|
|
899
|
+
stderr,
|
|
900
|
+
stream_logs=True)
|
|
901
|
+
except exceptions.CommandError as e:
|
|
902
|
+
raise RuntimeError(e.error_msg) from e
|
|
903
|
+
|
|
904
|
+
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
812
905
|
|
|
813
|
-
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
814
906
|
if not service_records:
|
|
815
907
|
raise ValueError(f'Service {service_name!r} not found.')
|
|
816
908
|
assert len(service_records) == 1
|
sky/server/requests/executor.py
CHANGED
|
@@ -465,6 +465,9 @@ def _request_execution_wrapper(request_id: str,
|
|
|
465
465
|
# Capture the peak RSS before GC.
|
|
466
466
|
peak_rss = max(proc.memory_info().rss,
|
|
467
467
|
metrics_lib.peak_rss_bytes)
|
|
468
|
+
# Clear request level cache to release all memory used by
|
|
469
|
+
# the request.
|
|
470
|
+
annotations.clear_request_level_cache()
|
|
468
471
|
with metrics_lib.time_it(name='release_memory',
|
|
469
472
|
group='internal'):
|
|
470
473
|
common_utils.release_memory()
|
sky/setup_files/dependencies.py
CHANGED
|
@@ -189,6 +189,7 @@ extras_require: Dict[str, List[str]] = {
|
|
|
189
189
|
'fluidstack': [], # No dependencies needed for fluidstack
|
|
190
190
|
'cudo': ['cudo-compute>=0.1.10'],
|
|
191
191
|
'paperspace': [], # No dependencies needed for paperspace
|
|
192
|
+
'primeintellect': [], # No dependencies needed for primeintellect
|
|
192
193
|
'do': ['pydo>=0.3.0', 'azure-core>=1.24.0', 'azure-common'],
|
|
193
194
|
'vast': ['vastai-sdk>=0.1.12'],
|
|
194
195
|
'vsphere': [
|
sky/skylet/constants.py
CHANGED
|
@@ -62,7 +62,8 @@ SKY_UV_INSTALL_CMD = (f'{SKY_UV_CMD} -V >/dev/null 2>&1 || '
|
|
|
62
62
|
'curl -LsSf https://astral.sh/uv/install.sh '
|
|
63
63
|
f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh')
|
|
64
64
|
SKY_UV_PIP_CMD: str = (f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip')
|
|
65
|
-
SKY_UV_RUN_CMD: str = (
|
|
65
|
+
SKY_UV_RUN_CMD: str = (
|
|
66
|
+
f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} run --active')
|
|
66
67
|
# Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH and unsetting relevant
|
|
67
68
|
# VIRTUAL_ENV envvars to deactivate the environment. `deactivate` command does
|
|
68
69
|
# not work when conda is used.
|
|
@@ -153,7 +154,7 @@ CONDA_INSTALLATION_COMMANDS = (
|
|
|
153
154
|
# because for some images, conda is already installed, but not initialized.
|
|
154
155
|
# In this case, we need to initialize conda and set auto_activate_base to
|
|
155
156
|
# true.
|
|
156
|
-
'{ bash Miniconda3-Linux.sh -b; '
|
|
157
|
+
'{ bash Miniconda3-Linux.sh -b || true; '
|
|
157
158
|
'eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && '
|
|
158
159
|
# Caller should replace {conda_auto_activate} with either true or false.
|
|
159
160
|
'conda config --set auto_activate_base {conda_auto_activate} && '
|
|
@@ -456,7 +457,8 @@ CATALOG_SCHEMA_VERSION = 'v8'
|
|
|
456
457
|
CATALOG_DIR = '~/.sky/catalogs'
|
|
457
458
|
ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
|
|
458
459
|
'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
|
|
459
|
-
'paperspace', '
|
|
460
|
+
'paperspace', 'primeintellect', 'do', 'nebius', 'ssh',
|
|
461
|
+
'hyperbolic', 'seeweb')
|
|
460
462
|
# END constants used for service catalog.
|
|
461
463
|
|
|
462
464
|
# The user ID of the SkyPilot system.
|
sky/skylet/services.py
CHANGED
|
@@ -10,7 +10,11 @@ from sky.schemas.generated import autostopv1_pb2
|
|
|
10
10
|
from sky.schemas.generated import autostopv1_pb2_grpc
|
|
11
11
|
from sky.schemas.generated import jobsv1_pb2
|
|
12
12
|
from sky.schemas.generated import jobsv1_pb2_grpc
|
|
13
|
+
from sky.schemas.generated import servev1_pb2
|
|
14
|
+
from sky.schemas.generated import servev1_pb2_grpc
|
|
15
|
+
from sky.serve import serve_rpc_utils
|
|
13
16
|
from sky.serve import serve_state
|
|
17
|
+
from sky.serve import serve_utils
|
|
14
18
|
from sky.skylet import autostop_lib
|
|
15
19
|
from sky.skylet import constants
|
|
16
20
|
from sky.skylet import job_lib
|
|
@@ -52,6 +56,100 @@ class AutostopServiceImpl(autostopv1_pb2_grpc.AutostopServiceServicer):
|
|
|
52
56
|
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
53
57
|
|
|
54
58
|
|
|
59
|
+
class ServeServiceImpl(servev1_pb2_grpc.ServeServiceServicer):
|
|
60
|
+
"""Implementation of the ServeService gRPC service."""
|
|
61
|
+
|
|
62
|
+
# NOTE (kyuds): this grpc service will run cluster-side,
|
|
63
|
+
# thus guaranteeing that SERVE_VERSION is above 5.
|
|
64
|
+
# Therefore, we removed some SERVE_VERSION checks
|
|
65
|
+
# present in the original codegen.
|
|
66
|
+
|
|
67
|
+
def GetServiceStatus( # type: ignore[return]
|
|
68
|
+
self, request: servev1_pb2.GetServiceStatusRequest,
|
|
69
|
+
context: grpc.ServicerContext
|
|
70
|
+
) -> servev1_pb2.GetServiceStatusResponse:
|
|
71
|
+
"""Gets serve status."""
|
|
72
|
+
try:
|
|
73
|
+
service_names, pool = (
|
|
74
|
+
serve_rpc_utils.GetServiceStatusRequestConverter.from_proto(request)) # pylint: disable=line-too-long
|
|
75
|
+
statuses = serve_utils.get_service_status_pickled(
|
|
76
|
+
service_names, pool)
|
|
77
|
+
return serve_rpc_utils.GetServiceStatusResponseConverter.to_proto(
|
|
78
|
+
statuses)
|
|
79
|
+
except Exception as e: # pylint: disable=broad-except
|
|
80
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
81
|
+
|
|
82
|
+
def AddVersion( # type: ignore[return]
|
|
83
|
+
self, request: servev1_pb2.AddVersionRequest,
|
|
84
|
+
context: grpc.ServicerContext) -> servev1_pb2.AddVersionResponse:
|
|
85
|
+
"""Adds serve version"""
|
|
86
|
+
try:
|
|
87
|
+
service_name = request.service_name
|
|
88
|
+
version = serve_state.add_version(service_name)
|
|
89
|
+
return servev1_pb2.AddVersionResponse(version=version)
|
|
90
|
+
except Exception as e: # pylint: disable=broad-except
|
|
91
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
92
|
+
|
|
93
|
+
def TerminateServices( # type: ignore[return]
|
|
94
|
+
self, request: servev1_pb2.TerminateServicesRequest,
|
|
95
|
+
context: grpc.ServicerContext
|
|
96
|
+
) -> servev1_pb2.TerminateServicesResponse:
|
|
97
|
+
"""Terminates serve"""
|
|
98
|
+
try:
|
|
99
|
+
service_names, purge, pool = (
|
|
100
|
+
serve_rpc_utils.TerminateServicesRequestConverter.from_proto(request)) # pylint: disable=line-too-long
|
|
101
|
+
message = serve_utils.terminate_services(service_names, purge, pool)
|
|
102
|
+
return servev1_pb2.TerminateServicesResponse(message=message)
|
|
103
|
+
except Exception as e: # pylint: disable=broad-except
|
|
104
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
105
|
+
|
|
106
|
+
def TerminateReplica( # type: ignore[return]
|
|
107
|
+
self, request: servev1_pb2.TerminateReplicaRequest,
|
|
108
|
+
context: grpc.ServicerContext
|
|
109
|
+
) -> servev1_pb2.TerminateReplicaResponse:
|
|
110
|
+
"""Terminate replica"""
|
|
111
|
+
try:
|
|
112
|
+
service_name = request.service_name
|
|
113
|
+
replica_id = request.replica_id
|
|
114
|
+
purge = request.purge
|
|
115
|
+
message = serve_utils.terminate_replica(service_name, replica_id,
|
|
116
|
+
purge)
|
|
117
|
+
return servev1_pb2.TerminateReplicaResponse(message=message)
|
|
118
|
+
except Exception as e: # pylint: disable=broad-except
|
|
119
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
120
|
+
|
|
121
|
+
def WaitServiceRegistration( # type: ignore[return]
|
|
122
|
+
self, request: servev1_pb2.WaitServiceRegistrationRequest,
|
|
123
|
+
context: grpc.ServicerContext
|
|
124
|
+
) -> servev1_pb2.WaitServiceRegistrationResponse:
|
|
125
|
+
"""Wait for service to be registered"""
|
|
126
|
+
try:
|
|
127
|
+
service_name = request.service_name
|
|
128
|
+
job_id = request.job_id
|
|
129
|
+
pool = request.pool
|
|
130
|
+
encoded = serve_utils.wait_service_registration(
|
|
131
|
+
service_name, job_id, pool)
|
|
132
|
+
lb_port = serve_utils.load_service_initialization_result(encoded)
|
|
133
|
+
return servev1_pb2.WaitServiceRegistrationResponse(lb_port=lb_port)
|
|
134
|
+
except Exception as e: # pylint: disable=broad-except
|
|
135
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
136
|
+
|
|
137
|
+
def UpdateService( # type: ignore[return]
|
|
138
|
+
self, request: servev1_pb2.UpdateServiceRequest,
|
|
139
|
+
context: grpc.ServicerContext) -> servev1_pb2.UpdateServiceResponse:
|
|
140
|
+
"""Update service"""
|
|
141
|
+
try:
|
|
142
|
+
service_name = request.service_name
|
|
143
|
+
version = request.version
|
|
144
|
+
mode = request.mode
|
|
145
|
+
pool = request.pool
|
|
146
|
+
serve_utils.update_service_encoded(service_name, version, mode,
|
|
147
|
+
pool)
|
|
148
|
+
return servev1_pb2.UpdateServiceResponse()
|
|
149
|
+
except Exception as e: # pylint: disable=broad-except
|
|
150
|
+
context.abort(grpc.StatusCode.INTERNAL, str(e))
|
|
151
|
+
|
|
152
|
+
|
|
55
153
|
class JobsServiceImpl(jobsv1_pb2_grpc.JobsServiceServicer):
|
|
56
154
|
"""Implementation of the JobsService gRPC service."""
|
|
57
155
|
|
sky/skylet/skylet.py
CHANGED
|
@@ -10,6 +10,7 @@ import sky
|
|
|
10
10
|
from sky import sky_logging
|
|
11
11
|
from sky.schemas.generated import autostopv1_pb2_grpc
|
|
12
12
|
from sky.schemas.generated import jobsv1_pb2_grpc
|
|
13
|
+
from sky.schemas.generated import servev1_pb2_grpc
|
|
13
14
|
from sky.skylet import constants
|
|
14
15
|
from sky.skylet import events
|
|
15
16
|
from sky.skylet import services
|
|
@@ -50,9 +51,10 @@ def start_grpc_server(port: int = constants.SKYLET_GRPC_PORT) -> grpc.Server:
|
|
|
50
51
|
|
|
51
52
|
autostopv1_pb2_grpc.add_AutostopServiceServicer_to_server(
|
|
52
53
|
services.AutostopServiceImpl(), server)
|
|
53
|
-
|
|
54
54
|
jobsv1_pb2_grpc.add_JobsServiceServicer_to_server(
|
|
55
55
|
services.JobsServiceImpl(), server)
|
|
56
|
+
servev1_pb2_grpc.add_ServeServiceServicer_to_server(
|
|
57
|
+
services.ServeServiceImpl(), server)
|
|
56
58
|
|
|
57
59
|
listen_addr = f'127.0.0.1:{port}'
|
|
58
60
|
server.add_insecure_port(listen_addr)
|