skypilot-nightly 1.0.0.dev20250916__py3-none-any.whl → 1.0.0.dev20250918__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (67) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/primeintellect.py +1 -0
  3. sky/adaptors/seeweb.py +68 -4
  4. sky/authentication.py +25 -0
  5. sky/backends/__init__.py +3 -2
  6. sky/backends/backend_utils.py +16 -12
  7. sky/backends/cloud_vm_ray_backend.py +57 -0
  8. sky/catalog/primeintellect_catalog.py +95 -0
  9. sky/clouds/__init__.py +2 -0
  10. sky/clouds/primeintellect.py +314 -0
  11. sky/core.py +10 -3
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/chunks/3015-ba5be550eb80fd8c.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/{6856-e0754534b3015377.js → 6856-9a2538f38c004652.js} +1 -1
  15. sky/dashboard/out/_next/static/chunks/8969-a3e3f0683e19d340.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/{webpack-05f82d90d6fd7f82.js → webpack-487697b47d8c5e50.js} +1 -1
  18. sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → k1mo5xWZrV9djgjd0moOT}/_buildManifest.js +1 -1
  19. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  20. sky/dashboard/out/clusters/[cluster].html +1 -1
  21. sky/dashboard/out/clusters.html +1 -1
  22. sky/dashboard/out/config.html +1 -1
  23. sky/dashboard/out/index.html +1 -1
  24. sky/dashboard/out/infra/[context].html +1 -1
  25. sky/dashboard/out/infra.html +1 -1
  26. sky/dashboard/out/jobs/[job].html +1 -1
  27. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  28. sky/dashboard/out/jobs.html +1 -1
  29. sky/dashboard/out/users.html +1 -1
  30. sky/dashboard/out/volumes.html +1 -1
  31. sky/dashboard/out/workspace/new.html +1 -1
  32. sky/dashboard/out/workspaces/[name].html +1 -1
  33. sky/dashboard/out/workspaces.html +1 -1
  34. sky/global_user_state.py +42 -34
  35. sky/jobs/server/server.py +14 -1
  36. sky/jobs/state.py +26 -1
  37. sky/provision/__init__.py +1 -0
  38. sky/provision/docker_utils.py +6 -2
  39. sky/provision/primeintellect/__init__.py +10 -0
  40. sky/provision/primeintellect/config.py +11 -0
  41. sky/provision/primeintellect/instance.py +454 -0
  42. sky/provision/primeintellect/utils.py +398 -0
  43. sky/resources.py +9 -1
  44. sky/schemas/generated/servev1_pb2.py +58 -0
  45. sky/schemas/generated/servev1_pb2.pyi +115 -0
  46. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  47. sky/serve/serve_rpc_utils.py +179 -0
  48. sky/serve/serve_utils.py +29 -12
  49. sky/serve/server/core.py +37 -19
  50. sky/serve/server/impl.py +221 -129
  51. sky/server/requests/executor.py +3 -0
  52. sky/setup_files/dependencies.py +1 -0
  53. sky/skylet/constants.py +5 -3
  54. sky/skylet/services.py +98 -0
  55. sky/skylet/skylet.py +3 -1
  56. sky/templates/kubernetes-ray.yml.j2 +22 -12
  57. sky/templates/primeintellect-ray.yml.j2 +71 -0
  58. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/METADATA +37 -36
  59. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/RECORD +64 -52
  60. sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +0 -1
  61. sky/dashboard/out/_next/static/chunks/8969-0487dfbf149d9e53.js +0 -1
  62. sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +0 -6
  63. /sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → k1mo5xWZrV9djgjd0moOT}/_ssgManifest.js +0 -0
  64. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/WHEEL +0 -0
  65. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/entry_points.txt +0 -0
  66. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/licenses/LICENSE +0 -0
  67. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250918.dist-info}/top_level.txt +0 -0
sky/serve/server/impl.py CHANGED
@@ -5,6 +5,7 @@ import shlex
5
5
  import signal
6
6
  import tempfile
7
7
  import threading
8
+ import typing
8
9
  from typing import Any, Dict, List, Optional, Set, Tuple, Union
9
10
  import uuid
10
11
 
@@ -17,10 +18,12 @@ from sky import execution
17
18
  from sky import sky_logging
18
19
  from sky import skypilot_config
19
20
  from sky import task as task_lib
21
+ from sky.adaptors import common as adaptors_common
20
22
  from sky.backends import backend_utils
21
23
  from sky.catalog import common as service_catalog_common
22
24
  from sky.data import storage as storage_lib
23
25
  from sky.serve import constants as serve_constants
26
+ from sky.serve import serve_rpc_utils
24
27
  from sky.serve import serve_state
25
28
  from sky.serve import serve_utils
26
29
  from sky.skylet import constants
@@ -36,6 +39,11 @@ from sky.utils import subprocess_utils
36
39
  from sky.utils import ux_utils
37
40
  from sky.utils import yaml_utils
38
41
 
42
+ if typing.TYPE_CHECKING:
43
+ import grpc
44
+ else:
45
+ grpc = adaptors_common.LazyImport('grpc')
46
+
39
47
  logger = sky_logging.init_logger(__name__)
40
48
 
41
49
 
@@ -78,24 +86,35 @@ def _get_service_record(
78
86
  """Get the service record."""
79
87
  noun = 'pool' if pool else 'service'
80
88
 
81
- code = serve_utils.ServeCodeGen.get_service_status([service_name],
82
- pool=pool)
83
- returncode, serve_status_payload, stderr = backend.run_on_head(
84
- handle,
85
- code,
86
- require_outputs=True,
87
- stream_logs=False,
88
- separate_stderr=True)
89
- try:
90
- subprocess_utils.handle_returncode(returncode,
91
- code,
92
- f'Failed to get {noun} status',
93
- stderr,
94
- stream_logs=True)
95
- except exceptions.CommandError as e:
96
- raise RuntimeError(e.error_msg) from e
89
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
90
+ use_legacy = not handle.is_grpc_enabled_with_flag
97
91
 
98
- service_statuses = serve_utils.load_service_status(serve_status_payload)
92
+ if handle.is_grpc_enabled_with_flag:
93
+ try:
94
+ service_statuses = serve_rpc_utils.RpcRunner.get_service_status(
95
+ handle, [service_name], pool)
96
+ except exceptions.SkyletMethodNotImplementedError:
97
+ use_legacy = True
98
+
99
+ if use_legacy:
100
+ code = serve_utils.ServeCodeGen.get_service_status([service_name],
101
+ pool=pool)
102
+ returncode, serve_status_payload, stderr = backend.run_on_head(
103
+ handle,
104
+ code,
105
+ require_outputs=True,
106
+ stream_logs=False,
107
+ separate_stderr=True)
108
+ try:
109
+ subprocess_utils.handle_returncode(returncode,
110
+ code,
111
+ f'Failed to get {noun} status',
112
+ stderr,
113
+ stream_logs=True)
114
+ except exceptions.CommandError as e:
115
+ raise RuntimeError(e.error_msg) from e
116
+
117
+ service_statuses = serve_utils.load_service_status(serve_status_payload)
99
118
 
100
119
  assert len(service_statuses) <= 1, service_statuses
101
120
  if not service_statuses:
@@ -287,30 +306,44 @@ def up(
287
306
  fore = colorama.Fore
288
307
 
289
308
  assert controller_job_id is not None and controller_handle is not None
309
+ assert isinstance(controller_handle, backends.CloudVmRayResourceHandle)
310
+ backend = backend_utils.get_backend_from_handle(controller_handle)
311
+ assert isinstance(backend, backends.CloudVmRayBackend)
290
312
  # TODO(tian): Cache endpoint locally to speedup. Endpoint won't
291
313
  # change after the first time, so there is no consistency issue.
292
- with rich_utils.safe_status(
293
- ux_utils.spinner_message(
294
- f'Waiting for the {noun} to register')):
295
- # This function will check the controller job id in the database
296
- # and return the endpoint if the job id matches. Otherwise it will
297
- # return None.
298
- code = serve_utils.ServeCodeGen.wait_service_registration(
299
- service_name, controller_job_id, pool)
300
- backend = backend_utils.get_backend_from_handle(controller_handle)
301
- assert isinstance(backend, backends.CloudVmRayBackend)
302
- assert isinstance(controller_handle,
303
- backends.CloudVmRayResourceHandle)
304
- returncode, lb_port_payload, _ = backend.run_on_head(
305
- controller_handle,
306
- code,
307
- require_outputs=True,
308
- stream_logs=False)
309
314
  try:
310
- subprocess_utils.handle_returncode(
311
- returncode, code, f'Failed to wait for {noun} initialization',
312
- lb_port_payload)
313
- except exceptions.CommandError:
315
+ with rich_utils.safe_status(
316
+ ux_utils.spinner_message(
317
+ f'Waiting for the {noun} to register')):
318
+ # This function will check the controller job id in the database
319
+ # and return the endpoint if the job id matches. Otherwise it
320
+ # will return None.
321
+ use_legacy = not controller_handle.is_grpc_enabled_with_flag
322
+
323
+ if controller_handle.is_grpc_enabled_with_flag:
324
+ try:
325
+ lb_port = serve_rpc_utils.RpcRunner.wait_service_registration( # pylint: disable=line-too-long
326
+ controller_handle, service_name, controller_job_id,
327
+ pool)
328
+ except exceptions.SkyletMethodNotImplementedError:
329
+ use_legacy = True
330
+
331
+ if use_legacy:
332
+ code = serve_utils.ServeCodeGen.wait_service_registration(
333
+ service_name, controller_job_id, pool)
334
+ returncode, lb_port_payload, _ = backend.run_on_head(
335
+ controller_handle,
336
+ code,
337
+ require_outputs=True,
338
+ stream_logs=False)
339
+ subprocess_utils.handle_returncode(
340
+ returncode, code,
341
+ f'Failed to wait for {noun} initialization',
342
+ lb_port_payload)
343
+ lb_port = serve_utils.load_service_initialization_result(
344
+ lb_port_payload)
345
+ except (exceptions.CommandError, grpc.FutureTimeoutError,
346
+ grpc.RpcError):
314
347
  if serve_utils.is_consolidation_mode(pool):
315
348
  with ux_utils.print_exception_no_traceback():
316
349
  raise RuntimeError(
@@ -344,8 +377,6 @@ def up(
344
377
  'Failed to spin up the service. Please '
345
378
  'check the logs above for more details.') from None
346
379
  else:
347
- lb_port = serve_utils.load_service_initialization_result(
348
- lb_port_payload)
349
380
  if not serve_utils.is_consolidation_mode(pool) and not pool:
350
381
  socket_endpoint = backend_utils.get_endpoints(
351
382
  controller_handle.cluster_name,
@@ -461,6 +492,7 @@ def update(
461
492
  f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
462
493
  )
463
494
 
495
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
464
496
  backend = backend_utils.get_backend_from_handle(handle)
465
497
  assert isinstance(backend, backends.CloudVmRayBackend)
466
498
 
@@ -503,29 +535,39 @@ def update(
503
535
  controller_utils.maybe_translate_local_file_mounts_and_sync_up(
504
536
  task, task_type='serve')
505
537
 
506
- code = serve_utils.ServeCodeGen.add_version(service_name)
507
- returncode, version_string_payload, stderr = backend.run_on_head(
508
- handle,
509
- code,
510
- require_outputs=True,
511
- stream_logs=False,
512
- separate_stderr=True)
513
- try:
514
- subprocess_utils.handle_returncode(returncode,
515
- code,
516
- 'Failed to add version',
517
- stderr,
518
- stream_logs=True)
519
- except exceptions.CommandError as e:
520
- raise RuntimeError(e.error_msg) from e
538
+ use_legacy = not handle.is_grpc_enabled_with_flag
521
539
 
522
- version_string = serve_utils.load_version_string(version_string_payload)
523
- try:
524
- current_version = int(version_string)
525
- except ValueError as e:
526
- with ux_utils.print_exception_no_traceback():
527
- raise ValueError(f'Failed to parse version: {version_string}; '
528
- f'Returncode: {returncode}') from e
540
+ if handle.is_grpc_enabled_with_flag:
541
+ try:
542
+ current_version = serve_rpc_utils.RpcRunner.add_version(
543
+ handle, service_name)
544
+ except exceptions.SkyletMethodNotImplementedError:
545
+ use_legacy = True
546
+
547
+ if use_legacy:
548
+ code = serve_utils.ServeCodeGen.add_version(service_name)
549
+ returncode, version_string_payload, stderr = backend.run_on_head(
550
+ handle,
551
+ code,
552
+ require_outputs=True,
553
+ stream_logs=False,
554
+ separate_stderr=True)
555
+ try:
556
+ subprocess_utils.handle_returncode(returncode,
557
+ code,
558
+ 'Failed to add version',
559
+ stderr,
560
+ stream_logs=True)
561
+ except exceptions.CommandError as e:
562
+ raise RuntimeError(e.error_msg) from e
563
+
564
+ version_string = serve_utils.load_version_string(version_string_payload)
565
+ try:
566
+ current_version = int(version_string)
567
+ except ValueError as e:
568
+ with ux_utils.print_exception_no_traceback():
569
+ raise ValueError(f'Failed to parse version: {version_string}; '
570
+ f'Returncode: {returncode}') from e
529
571
 
530
572
  with tempfile.NamedTemporaryFile(
531
573
  prefix=f'{service_name}-v{current_version}',
@@ -540,23 +582,33 @@ def update(
540
582
  {remote_task_yaml_path: service_file.name},
541
583
  storage_mounts=None)
542
584
 
543
- code = serve_utils.ServeCodeGen.update_service(service_name,
544
- current_version,
545
- mode=mode.value,
546
- pool=pool)
547
- returncode, _, stderr = backend.run_on_head(handle,
548
- code,
549
- require_outputs=True,
550
- stream_logs=False,
551
- separate_stderr=True)
552
- try:
553
- subprocess_utils.handle_returncode(returncode,
554
- code,
555
- f'Failed to update {noun}s',
556
- stderr,
557
- stream_logs=True)
558
- except exceptions.CommandError as e:
559
- raise RuntimeError(e.error_msg) from e
585
+ use_legacy = not handle.is_grpc_enabled_with_flag
586
+
587
+ if handle.is_grpc_enabled_with_flag:
588
+ try:
589
+ serve_rpc_utils.RpcRunner.update_service(
590
+ handle, service_name, current_version, mode, pool)
591
+ except exceptions.SkyletMethodNotImplementedError:
592
+ use_legacy = True
593
+
594
+ if use_legacy:
595
+ code = serve_utils.ServeCodeGen.update_service(service_name,
596
+ current_version,
597
+ mode=mode.value,
598
+ pool=pool)
599
+ returncode, _, stderr = backend.run_on_head(handle,
600
+ code,
601
+ require_outputs=True,
602
+ stream_logs=False,
603
+ separate_stderr=True)
604
+ try:
605
+ subprocess_utils.handle_returncode(returncode,
606
+ code,
607
+ f'Failed to update {noun}s',
608
+ stderr,
609
+ stream_logs=True)
610
+ except exceptions.CommandError as e:
611
+ raise RuntimeError(e.error_msg) from e
560
612
 
561
613
  cmd = 'sky jobs pool status' if pool else 'sky serve status'
562
614
  logger.info(
@@ -619,29 +671,44 @@ def down(
619
671
  raise ValueError(f'Can only specify one of {noun}_names or all. '
620
672
  f'Provided {argument_str!r}.')
621
673
 
622
- backend = backend_utils.get_backend_from_handle(handle)
623
- assert isinstance(backend, backends.CloudVmRayBackend)
624
674
  service_names = None if all else service_names
625
- code = serve_utils.ServeCodeGen.terminate_services(service_names, purge,
626
- pool)
627
675
 
628
676
  try:
629
- returncode, stdout, _ = backend.run_on_head(handle,
630
- code,
631
- require_outputs=True,
632
- stream_logs=False)
677
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
678
+ use_legacy = not handle.is_grpc_enabled_with_flag
679
+
680
+ if handle.is_grpc_enabled_with_flag:
681
+ try:
682
+ stdout = serve_rpc_utils.RpcRunner.terminate_services(
683
+ handle, service_names, purge, pool)
684
+ except exceptions.SkyletMethodNotImplementedError:
685
+ use_legacy = True
686
+
687
+ if use_legacy:
688
+ backend = backend_utils.get_backend_from_handle(handle)
689
+ assert isinstance(backend, backends.CloudVmRayBackend)
690
+ code = serve_utils.ServeCodeGen.terminate_services(
691
+ service_names, purge, pool)
692
+
693
+ returncode, stdout, _ = backend.run_on_head(handle,
694
+ code,
695
+ require_outputs=True,
696
+ stream_logs=False)
697
+
698
+ subprocess_utils.handle_returncode(returncode, code,
699
+ f'Failed to terminate {noun}',
700
+ stdout)
633
701
  except exceptions.FetchClusterInfoError as e:
634
702
  raise RuntimeError(
635
703
  'Failed to fetch controller IP. Please refresh controller status '
636
- f'by `sky status -r {controller_type.value.cluster_name}` '
637
- 'and try again.') from e
638
-
639
- try:
640
- subprocess_utils.handle_returncode(returncode, code,
641
- f'Failed to terminate {noun}',
642
- stdout)
704
+ f'by `sky status -r {controller_type.value.cluster_name}` and try '
705
+ 'again.') from e
643
706
  except exceptions.CommandError as e:
644
707
  raise RuntimeError(e.error_msg) from e
708
+ except grpc.RpcError as e:
709
+ raise RuntimeError(f'{e.details()} ({e.code()})') from e
710
+ except grpc.FutureTimeoutError as e:
711
+ raise RuntimeError('gRPC timed out') from e
645
712
 
646
713
  logger.info(stdout)
647
714
 
@@ -669,27 +736,40 @@ def status(
669
736
  stopped_message=controller_type.value.default_hint_if_non_existent.
670
737
  replace('service', noun))
671
738
 
672
- backend = backend_utils.get_backend_from_handle(handle)
673
- assert isinstance(backend, backends.CloudVmRayBackend)
739
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
740
+ use_legacy = not handle.is_grpc_enabled_with_flag
674
741
 
675
- code = serve_utils.ServeCodeGen.get_service_status(service_names, pool=pool)
676
- returncode, serve_status_payload, stderr = backend.run_on_head(
677
- handle,
678
- code,
679
- require_outputs=True,
680
- stream_logs=False,
681
- separate_stderr=True)
742
+ if handle.is_grpc_enabled_with_flag:
743
+ try:
744
+ service_records = serve_rpc_utils.RpcRunner.get_service_status(
745
+ handle, service_names, pool)
746
+ except exceptions.SkyletMethodNotImplementedError:
747
+ use_legacy = True
748
+
749
+ if use_legacy:
750
+ backend = backend_utils.get_backend_from_handle(handle)
751
+ assert isinstance(backend, backends.CloudVmRayBackend)
752
+
753
+ code = serve_utils.ServeCodeGen.get_service_status(service_names,
754
+ pool=pool)
755
+ returncode, serve_status_payload, stderr = backend.run_on_head(
756
+ handle,
757
+ code,
758
+ require_outputs=True,
759
+ stream_logs=False,
760
+ separate_stderr=True)
682
761
 
683
- try:
684
- subprocess_utils.handle_returncode(returncode,
685
- code,
686
- f'Failed to fetch {noun}s',
687
- stderr,
688
- stream_logs=True)
689
- except exceptions.CommandError as e:
690
- raise RuntimeError(e.error_msg) from e
762
+ try:
763
+ subprocess_utils.handle_returncode(returncode,
764
+ code,
765
+ f'Failed to fetch {noun}s',
766
+ stderr,
767
+ stream_logs=True)
768
+ except exceptions.CommandError as e:
769
+ raise RuntimeError(e.error_msg) from e
770
+
771
+ service_records = serve_utils.load_service_status(serve_status_payload)
691
772
 
692
- service_records = serve_utils.load_service_status(serve_status_payload)
693
773
  # Get the endpoint for each service
694
774
  for service_record in service_records:
695
775
  service_record['endpoint'] = None
@@ -792,25 +872,37 @@ def _get_all_replica_targets(
792
872
  handle: backends.CloudVmRayResourceHandle,
793
873
  pool: bool) -> Set[serve_utils.ServiceComponentTarget]:
794
874
  """Helper function to get targets for all live replicas."""
795
- code = serve_utils.ServeCodeGen.get_service_status([service_name],
796
- pool=pool)
797
- returncode, serve_status_payload, stderr = backend.run_on_head(
798
- handle,
799
- code,
800
- require_outputs=True,
801
- stream_logs=False,
802
- separate_stderr=True)
875
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
876
+ use_legacy = not handle.is_grpc_enabled_with_flag
803
877
 
804
- try:
805
- subprocess_utils.handle_returncode(returncode,
806
- code,
807
- 'Failed to fetch services',
808
- stderr,
809
- stream_logs=True)
810
- except exceptions.CommandError as e:
811
- raise RuntimeError(e.error_msg) from e
878
+ if handle.is_grpc_enabled_with_flag:
879
+ try:
880
+ service_records = serve_rpc_utils.RpcRunner.get_service_status(
881
+ handle, [service_name], pool)
882
+ except exceptions.SkyletMethodNotImplementedError:
883
+ use_legacy = True
884
+
885
+ if use_legacy:
886
+ code = serve_utils.ServeCodeGen.get_service_status([service_name],
887
+ pool=pool)
888
+ returncode, serve_status_payload, stderr = backend.run_on_head(
889
+ handle,
890
+ code,
891
+ require_outputs=True,
892
+ stream_logs=False,
893
+ separate_stderr=True)
894
+
895
+ try:
896
+ subprocess_utils.handle_returncode(returncode,
897
+ code,
898
+ 'Failed to fetch services',
899
+ stderr,
900
+ stream_logs=True)
901
+ except exceptions.CommandError as e:
902
+ raise RuntimeError(e.error_msg) from e
903
+
904
+ service_records = serve_utils.load_service_status(serve_status_payload)
812
905
 
813
- service_records = serve_utils.load_service_status(serve_status_payload)
814
906
  if not service_records:
815
907
  raise ValueError(f'Service {service_name!r} not found.')
816
908
  assert len(service_records) == 1
@@ -465,6 +465,9 @@ def _request_execution_wrapper(request_id: str,
465
465
  # Capture the peak RSS before GC.
466
466
  peak_rss = max(proc.memory_info().rss,
467
467
  metrics_lib.peak_rss_bytes)
468
+ # Clear request level cache to release all memory used by
469
+ # the request.
470
+ annotations.clear_request_level_cache()
468
471
  with metrics_lib.time_it(name='release_memory',
469
472
  group='internal'):
470
473
  common_utils.release_memory()
@@ -189,6 +189,7 @@ extras_require: Dict[str, List[str]] = {
189
189
  'fluidstack': [], # No dependencies needed for fluidstack
190
190
  'cudo': ['cudo-compute>=0.1.10'],
191
191
  'paperspace': [], # No dependencies needed for paperspace
192
+ 'primeintellect': [], # No dependencies needed for primeintellect
192
193
  'do': ['pydo>=0.3.0', 'azure-core>=1.24.0', 'azure-common'],
193
194
  'vast': ['vastai-sdk>=0.1.12'],
194
195
  'vsphere': [
sky/skylet/constants.py CHANGED
@@ -62,7 +62,8 @@ SKY_UV_INSTALL_CMD = (f'{SKY_UV_CMD} -V >/dev/null 2>&1 || '
62
62
  'curl -LsSf https://astral.sh/uv/install.sh '
63
63
  f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh')
64
64
  SKY_UV_PIP_CMD: str = (f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip')
65
- SKY_UV_RUN_CMD: str = (f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} run')
65
+ SKY_UV_RUN_CMD: str = (
66
+ f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} run --active')
66
67
  # Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH and unsetting relevant
67
68
  # VIRTUAL_ENV envvars to deactivate the environment. `deactivate` command does
68
69
  # not work when conda is used.
@@ -153,7 +154,7 @@ CONDA_INSTALLATION_COMMANDS = (
153
154
  # because for some images, conda is already installed, but not initialized.
154
155
  # In this case, we need to initialize conda and set auto_activate_base to
155
156
  # true.
156
- '{ bash Miniconda3-Linux.sh -b; '
157
+ '{ bash Miniconda3-Linux.sh -b || true; '
157
158
  'eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && '
158
159
  # Caller should replace {conda_auto_activate} with either true or false.
159
160
  'conda config --set auto_activate_base {conda_auto_activate} && '
@@ -456,7 +457,8 @@ CATALOG_SCHEMA_VERSION = 'v8'
456
457
  CATALOG_DIR = '~/.sky/catalogs'
457
458
  ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
458
459
  'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
459
- 'paperspace', 'do', 'nebius', 'ssh', 'hyperbolic', 'seeweb')
460
+ 'paperspace', 'primeintellect', 'do', 'nebius', 'ssh',
461
+ 'hyperbolic', 'seeweb')
460
462
  # END constants used for service catalog.
461
463
 
462
464
  # The user ID of the SkyPilot system.
sky/skylet/services.py CHANGED
@@ -10,7 +10,11 @@ from sky.schemas.generated import autostopv1_pb2
10
10
  from sky.schemas.generated import autostopv1_pb2_grpc
11
11
  from sky.schemas.generated import jobsv1_pb2
12
12
  from sky.schemas.generated import jobsv1_pb2_grpc
13
+ from sky.schemas.generated import servev1_pb2
14
+ from sky.schemas.generated import servev1_pb2_grpc
15
+ from sky.serve import serve_rpc_utils
13
16
  from sky.serve import serve_state
17
+ from sky.serve import serve_utils
14
18
  from sky.skylet import autostop_lib
15
19
  from sky.skylet import constants
16
20
  from sky.skylet import job_lib
@@ -52,6 +56,100 @@ class AutostopServiceImpl(autostopv1_pb2_grpc.AutostopServiceServicer):
52
56
  context.abort(grpc.StatusCode.INTERNAL, str(e))
53
57
 
54
58
 
59
+ class ServeServiceImpl(servev1_pb2_grpc.ServeServiceServicer):
60
+ """Implementation of the ServeService gRPC service."""
61
+
62
+ # NOTE (kyuds): this grpc service will run cluster-side,
63
+ # thus guaranteeing that SERVE_VERSION is above 5.
64
+ # Therefore, we removed some SERVE_VERSION checks
65
+ # present in the original codegen.
66
+
67
+ def GetServiceStatus( # type: ignore[return]
68
+ self, request: servev1_pb2.GetServiceStatusRequest,
69
+ context: grpc.ServicerContext
70
+ ) -> servev1_pb2.GetServiceStatusResponse:
71
+ """Gets serve status."""
72
+ try:
73
+ service_names, pool = (
74
+ serve_rpc_utils.GetServiceStatusRequestConverter.from_proto(request)) # pylint: disable=line-too-long
75
+ statuses = serve_utils.get_service_status_pickled(
76
+ service_names, pool)
77
+ return serve_rpc_utils.GetServiceStatusResponseConverter.to_proto(
78
+ statuses)
79
+ except Exception as e: # pylint: disable=broad-except
80
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
81
+
82
+ def AddVersion( # type: ignore[return]
83
+ self, request: servev1_pb2.AddVersionRequest,
84
+ context: grpc.ServicerContext) -> servev1_pb2.AddVersionResponse:
85
+ """Adds serve version"""
86
+ try:
87
+ service_name = request.service_name
88
+ version = serve_state.add_version(service_name)
89
+ return servev1_pb2.AddVersionResponse(version=version)
90
+ except Exception as e: # pylint: disable=broad-except
91
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
92
+
93
+ def TerminateServices( # type: ignore[return]
94
+ self, request: servev1_pb2.TerminateServicesRequest,
95
+ context: grpc.ServicerContext
96
+ ) -> servev1_pb2.TerminateServicesResponse:
97
+ """Terminates serve"""
98
+ try:
99
+ service_names, purge, pool = (
100
+ serve_rpc_utils.TerminateServicesRequestConverter.from_proto(request)) # pylint: disable=line-too-long
101
+ message = serve_utils.terminate_services(service_names, purge, pool)
102
+ return servev1_pb2.TerminateServicesResponse(message=message)
103
+ except Exception as e: # pylint: disable=broad-except
104
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
105
+
106
+ def TerminateReplica( # type: ignore[return]
107
+ self, request: servev1_pb2.TerminateReplicaRequest,
108
+ context: grpc.ServicerContext
109
+ ) -> servev1_pb2.TerminateReplicaResponse:
110
+ """Terminate replica"""
111
+ try:
112
+ service_name = request.service_name
113
+ replica_id = request.replica_id
114
+ purge = request.purge
115
+ message = serve_utils.terminate_replica(service_name, replica_id,
116
+ purge)
117
+ return servev1_pb2.TerminateReplicaResponse(message=message)
118
+ except Exception as e: # pylint: disable=broad-except
119
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
120
+
121
+ def WaitServiceRegistration( # type: ignore[return]
122
+ self, request: servev1_pb2.WaitServiceRegistrationRequest,
123
+ context: grpc.ServicerContext
124
+ ) -> servev1_pb2.WaitServiceRegistrationResponse:
125
+ """Wait for service to be registered"""
126
+ try:
127
+ service_name = request.service_name
128
+ job_id = request.job_id
129
+ pool = request.pool
130
+ encoded = serve_utils.wait_service_registration(
131
+ service_name, job_id, pool)
132
+ lb_port = serve_utils.load_service_initialization_result(encoded)
133
+ return servev1_pb2.WaitServiceRegistrationResponse(lb_port=lb_port)
134
+ except Exception as e: # pylint: disable=broad-except
135
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
136
+
137
+ def UpdateService( # type: ignore[return]
138
+ self, request: servev1_pb2.UpdateServiceRequest,
139
+ context: grpc.ServicerContext) -> servev1_pb2.UpdateServiceResponse:
140
+ """Update service"""
141
+ try:
142
+ service_name = request.service_name
143
+ version = request.version
144
+ mode = request.mode
145
+ pool = request.pool
146
+ serve_utils.update_service_encoded(service_name, version, mode,
147
+ pool)
148
+ return servev1_pb2.UpdateServiceResponse()
149
+ except Exception as e: # pylint: disable=broad-except
150
+ context.abort(grpc.StatusCode.INTERNAL, str(e))
151
+
152
+
55
153
  class JobsServiceImpl(jobsv1_pb2_grpc.JobsServiceServicer):
56
154
  """Implementation of the JobsService gRPC service."""
57
155
 
sky/skylet/skylet.py CHANGED
@@ -10,6 +10,7 @@ import sky
10
10
  from sky import sky_logging
11
11
  from sky.schemas.generated import autostopv1_pb2_grpc
12
12
  from sky.schemas.generated import jobsv1_pb2_grpc
13
+ from sky.schemas.generated import servev1_pb2_grpc
13
14
  from sky.skylet import constants
14
15
  from sky.skylet import events
15
16
  from sky.skylet import services
@@ -50,9 +51,10 @@ def start_grpc_server(port: int = constants.SKYLET_GRPC_PORT) -> grpc.Server:
50
51
 
51
52
  autostopv1_pb2_grpc.add_AutostopServiceServicer_to_server(
52
53
  services.AutostopServiceImpl(), server)
53
-
54
54
  jobsv1_pb2_grpc.add_JobsServiceServicer_to_server(
55
55
  services.JobsServiceImpl(), server)
56
+ servev1_pb2_grpc.add_ServeServiceServicer_to_server(
57
+ services.ServeServiceImpl(), server)
56
58
 
57
59
  listen_addr = f'127.0.0.1:{port}'
58
60
  server.add_insecure_port(listen_addr)