skypilot-nightly 1.0.0.dev20250225__py3-none-any.whl → 1.0.0.dev20250226__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '546c0864e0f1e32e3d1080d9b7a5fdf293bc1ad9'
8
+ _SKYPILOT_COMMIT_SHA = '107180a5a0ec2ffd9bc6f619fc707c49103f3942'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250225'
38
+ __version__ = '1.0.0.dev20250226'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/utils/common_utils.py CHANGED
@@ -774,13 +774,10 @@ def is_port_available(port: int, reuse_addr: bool = True) -> bool:
774
774
  return False
775
775
 
776
776
 
777
- # TODO(aylei): should be aware of cgroups
778
777
  def get_cpu_count() -> int:
779
- """Get the number of CPUs.
780
-
781
- If the API server is deployed as a pod in k8s cluster, we assume the
782
- number of CPUs is provided by the downward API.
783
- """
778
+ """Get the number of CPUs, with cgroup awareness."""
779
+ # This env-var is kept since it is still useful for limiting the resource
780
+ # of SkyPilot in non-containerized environments.
784
781
  cpu_count = os.getenv('SKYPILOT_POD_CPU_CORE_LIMIT')
785
782
  if cpu_count is not None:
786
783
  try:
@@ -790,16 +787,11 @@ def get_cpu_count() -> int:
790
787
  raise ValueError(
791
788
  f'Failed to parse the number of CPUs from {cpu_count}'
792
789
  ) from e
793
- return psutil.cpu_count()
790
+ return _cpu_count()
794
791
 
795
792
 
796
- # TODO(aylei): should be aware of cgroups
797
793
  def get_mem_size_gb() -> float:
798
- """Get the memory size in GB.
799
-
800
- If the API server is deployed as a pod in k8s cluster, we assume the
801
- memory size is provided by the downward API.
802
- """
794
+ """Get the memory size in GB, with cgroup awareness."""
803
795
  mem_size = os.getenv('SKYPILOT_POD_MEMORY_GB_LIMIT')
804
796
  if mem_size is not None:
805
797
  try:
@@ -808,4 +800,92 @@ def get_mem_size_gb() -> float:
808
800
  with ux_utils.print_exception_no_traceback():
809
801
  raise ValueError(
810
802
  f'Failed to parse the memory size from {mem_size}') from e
811
- return psutil.virtual_memory().total / (1024**3)
803
+ return _mem_size_gb()
804
+
805
+
806
+ def _cpu_count() -> int:
807
+ # host cpu cores (logical)
808
+ cpu = psutil.cpu_count()
809
+ # cpu affinity on Linux
810
+ if hasattr(os, 'sched_getaffinity'):
811
+ # just for safe, length of CPU set should always <= logical cpu cores
812
+ cpu = min(cpu, len(os.sched_getaffinity(0)))
813
+ cgroup_cpu = _get_cgroup_cpu_limit()
814
+ if cgroup_cpu is not None:
815
+ cpu = min(cpu, int(cgroup_cpu))
816
+ return cpu
817
+
818
+
819
+ def _mem_size_gb() -> float:
820
+ # host memory limit
821
+ mem = psutil.virtual_memory().total
822
+ cgroup_mem = _get_cgroup_memory_limit()
823
+ if cgroup_mem is not None:
824
+ mem = min(mem, cgroup_mem)
825
+ return mem / (1024**3)
826
+
827
+
828
+ # Refer to:
829
+ # - https://docs.kernel.org/admin-guide/cgroup-v1/index.html
830
+ # - https://docs.kernel.org/admin-guide/cgroup-v2.html
831
+ # for the standards of handler files in cgroupv1 and v2.
832
+ # Since all those paths are well-known standards that are unlikely to change,
833
+ # we use string literals instead of defining extra constants.
834
+ def _get_cgroup_cpu_limit() -> Optional[float]:
835
+ """Return cpu limit from cgroups in cores.
836
+
837
+ Returns:
838
+ The cpu limit in cores as a float (can be fractional), or None if there
839
+ is no limit in cgroups.
840
+ """
841
+ try:
842
+ if _is_cgroup_v2():
843
+ with open('/sys/fs/cgroup/cpu.max', 'r', encoding='utf-8') as f:
844
+ quota_str, period_str = f.read().strip().split()
845
+ if quota_str == 'max':
846
+ return None
847
+ quota = float(quota_str)
848
+ period = float(period_str)
849
+ return quota / period if quota > 0 else None
850
+ else:
851
+ # cgroup v1
852
+ with open('/sys/fs/cgroup/cpu/cpu.cfs_quota_us',
853
+ 'r',
854
+ encoding='utf-8') as f:
855
+ quota = float(f.read().strip())
856
+ with open('/sys/fs/cgroup/cpu/cpu.cfs_period_us',
857
+ 'r',
858
+ encoding='utf-8') as f:
859
+ period = float(f.read().strip())
860
+ # Return unlimited if cpu quota is not set.
861
+ # Note that we do not use cpu.shares since it is a relative weight
862
+ # instead of a hard limit. It is okay to get CPU throttling under
863
+ # high contention. And unlimited enables the server to use as much
864
+ # CPU as available if there is no contention.
865
+ return quota / period if (quota > 0 and period > 0) else None
866
+ except (OSError, ValueError):
867
+ return None
868
+
869
+
870
+ def _get_cgroup_memory_limit() -> Optional[int]:
871
+ """Return memory limit from cgroups in bytes.
872
+
873
+ Returns:
874
+ The memory limit in bytes, or None if there is no limit in cgroups.
875
+ """
876
+ try:
877
+ path = ('/sys/fs/cgroup/memory.max' if _is_cgroup_v2() else
878
+ '/sys/fs/cgroup/memory/memory.limit_in_bytes')
879
+ with open(path, 'r', encoding='utf-8') as f:
880
+ value = f.read().strip()
881
+ if value == 'max' or not value:
882
+ return None
883
+ limit = int(value)
884
+ return limit if limit > 0 else None
885
+ except (OSError, ValueError):
886
+ return None
887
+
888
+
889
+ def _is_cgroup_v2() -> bool:
890
+ """Return True if the environment is running cgroup v2."""
891
+ return os.path.isfile('/sys/fs/cgroup/cgroup.controllers')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250225
3
+ Version: 1.0.0.dev20250226
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -192,6 +192,7 @@ Dynamic: summary
192
192
 
193
193
  ----
194
194
  :fire: *News* :fire:
195
+ - [Feb 2025] Prepare and serve **Retrieval Augmented Generation (RAG) with DeepSeek-R1**: [**blog post**](https://blog.skypilot.co/deepseek-rag), [**example**](./llm/rag/)
195
196
  - [Feb 2025] Run and serve **DeepSeek-R1 671B** using SkyPilot and SGLang with high throughput: [**example**](./llm/deepseek-r1/)
196
197
  - [Feb 2025] Prepare and serve large-scale image search with **vector databases**: [**blog post**](https://blog.skypilot.co/large-scale-vector-database/), [**example**](./examples/vector_database/)
197
198
  - [Jan 2025] Launch and serve distilled models from **[DeepSeek-R1](https://github.com/deepseek-ai/DeepSeek-R1)** and **[Janus](https://github.com/deepseek-ai/DeepSeek-Janus)** on Kubernetes or any cloud: [**R1 example**](./llm/deepseek-r1-distilled/) and [**Janus example**](./llm/deepseek-janus/)
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=KSl1VCjCC3uU-BihR5aIRst73gWU1IOO1xhIx-4pmso,6428
1
+ sky/__init__.py,sha256=yBzHHts3hZ_4Ijs5pLgwKb4bQ250dNPA-xL6EE_jbIU,6428
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=hCEqi77nprQEg3ktfRL51xiiw16zwZOmFEDB_Z7fWVU,22384
4
4
  sky/check.py,sha256=NDKx_Zm7YRxPjMv82wz3ESLnGIPljaACyqVdVNM0PzY,11258
@@ -310,7 +310,7 @@ sky/utils/cluster_utils.py,sha256=s6DFRXktv6_gF_DnwDEXJ7CniifHp8CAPeGciRCbXgI,14
310
310
  sky/utils/command_runner.py,sha256=-7vxLvwZnTvYMQ_nScmuQWY6ZvQYv69yvvIp2uOaOqU,39063
311
311
  sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
312
312
  sky/utils/common.py,sha256=P4oVXFATUYgkruHX92cN12SJBtfb8DiOOYZtbN1kvP0,1927
313
- sky/utils/common_utils.py,sha256=SChX042UH_Beoq0SJiyw0kRpCA0faycPRaqXEF2GNZ4,28091
313
+ sky/utils/common_utils.py,sha256=1KfqF_hgH9l1ieyV-_3fJd6ma41-tOstj-ihAQcEDIM,31162
314
314
  sky/utils/config_utils.py,sha256=VQ2E3DQ2XysD-kul-diSrxn_pXWsDMfKAev91OiJQ1Q,9041
315
315
  sky/utils/control_master_utils.py,sha256=iD4M0onjYOdZ2RuxjwMBl4KhafHXJzuHjvqlBUnu-VE,1450
316
316
  sky/utils/controller_utils.py,sha256=Wth_esy2NX9nco-MK01bgQMIChAYky0Uq4T35jQkXxY,48472
@@ -344,9 +344,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
344
344
  sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=iAjfyPclOs8qlALACcfxLpRAO9CZ-h16leFqXZ6tNaY,10096
345
345
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
346
346
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
347
- skypilot_nightly-1.0.0.dev20250225.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
348
- skypilot_nightly-1.0.0.dev20250225.dist-info/METADATA,sha256=n0NjxuYEKzBIVHBricBiNx4ILxluNGwheNot34KQd2M,19055
349
- skypilot_nightly-1.0.0.dev20250225.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
350
- skypilot_nightly-1.0.0.dev20250225.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
351
- skypilot_nightly-1.0.0.dev20250225.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
352
- skypilot_nightly-1.0.0.dev20250225.dist-info/RECORD,,
347
+ skypilot_nightly-1.0.0.dev20250226.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
348
+ skypilot_nightly-1.0.0.dev20250226.dist-info/METADATA,sha256=zi9_kttCT3c1fDXvbH5gpMqHwoT_jYd_oli7tbLYw2k,19227
349
+ skypilot_nightly-1.0.0.dev20250226.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
350
+ skypilot_nightly-1.0.0.dev20250226.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
351
+ skypilot_nightly-1.0.0.dev20250226.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
352
+ skypilot_nightly-1.0.0.dev20250226.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5