skypilot-nightly 1.0.0.dev20250216__py3-none-any.whl → 1.0.0.dev20250218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. sky/__init__.py +48 -22
  2. sky/adaptors/aws.py +2 -1
  3. sky/adaptors/azure.py +4 -4
  4. sky/adaptors/cloudflare.py +4 -4
  5. sky/adaptors/kubernetes.py +8 -8
  6. sky/authentication.py +42 -45
  7. sky/backends/backend.py +2 -2
  8. sky/backends/backend_utils.py +108 -221
  9. sky/backends/cloud_vm_ray_backend.py +283 -282
  10. sky/benchmark/benchmark_utils.py +6 -2
  11. sky/check.py +40 -28
  12. sky/cli.py +1213 -1116
  13. sky/client/__init__.py +1 -0
  14. sky/client/cli.py +5644 -0
  15. sky/client/common.py +345 -0
  16. sky/client/sdk.py +1757 -0
  17. sky/cloud_stores.py +12 -6
  18. sky/clouds/__init__.py +0 -2
  19. sky/clouds/aws.py +20 -13
  20. sky/clouds/azure.py +5 -3
  21. sky/clouds/cloud.py +1 -1
  22. sky/clouds/cudo.py +2 -1
  23. sky/clouds/do.py +7 -3
  24. sky/clouds/fluidstack.py +3 -2
  25. sky/clouds/gcp.py +10 -8
  26. sky/clouds/ibm.py +8 -7
  27. sky/clouds/kubernetes.py +7 -6
  28. sky/clouds/lambda_cloud.py +8 -7
  29. sky/clouds/oci.py +4 -3
  30. sky/clouds/paperspace.py +2 -1
  31. sky/clouds/runpod.py +2 -1
  32. sky/clouds/scp.py +8 -7
  33. sky/clouds/service_catalog/__init__.py +3 -3
  34. sky/clouds/service_catalog/aws_catalog.py +7 -1
  35. sky/clouds/service_catalog/common.py +4 -2
  36. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +2 -2
  37. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +1 -0
  38. sky/clouds/utils/oci_utils.py +1 -1
  39. sky/clouds/vast.py +2 -1
  40. sky/clouds/vsphere.py +2 -1
  41. sky/core.py +263 -99
  42. sky/dag.py +4 -0
  43. sky/data/mounting_utils.py +2 -1
  44. sky/data/storage.py +97 -35
  45. sky/data/storage_utils.py +69 -9
  46. sky/exceptions.py +138 -5
  47. sky/execution.py +47 -50
  48. sky/global_user_state.py +105 -22
  49. sky/jobs/__init__.py +12 -14
  50. sky/jobs/client/__init__.py +0 -0
  51. sky/jobs/client/sdk.py +296 -0
  52. sky/jobs/constants.py +30 -1
  53. sky/jobs/controller.py +12 -6
  54. sky/jobs/dashboard/dashboard.py +2 -6
  55. sky/jobs/recovery_strategy.py +22 -29
  56. sky/jobs/server/__init__.py +1 -0
  57. sky/jobs/{core.py → server/core.py} +101 -34
  58. sky/jobs/server/dashboard_utils.py +64 -0
  59. sky/jobs/server/server.py +182 -0
  60. sky/jobs/utils.py +32 -23
  61. sky/models.py +27 -0
  62. sky/optimizer.py +22 -22
  63. sky/provision/__init__.py +6 -3
  64. sky/provision/aws/config.py +2 -2
  65. sky/provision/aws/instance.py +1 -1
  66. sky/provision/azure/instance.py +1 -1
  67. sky/provision/cudo/instance.py +1 -1
  68. sky/provision/do/instance.py +1 -1
  69. sky/provision/do/utils.py +0 -5
  70. sky/provision/fluidstack/fluidstack_utils.py +4 -3
  71. sky/provision/fluidstack/instance.py +4 -2
  72. sky/provision/gcp/instance.py +1 -1
  73. sky/provision/instance_setup.py +2 -2
  74. sky/provision/kubernetes/constants.py +8 -0
  75. sky/provision/kubernetes/instance.py +1 -1
  76. sky/provision/kubernetes/utils.py +67 -76
  77. sky/provision/lambda_cloud/instance.py +3 -15
  78. sky/provision/logging.py +1 -1
  79. sky/provision/oci/instance.py +7 -4
  80. sky/provision/paperspace/instance.py +1 -1
  81. sky/provision/provisioner.py +3 -2
  82. sky/provision/runpod/instance.py +1 -1
  83. sky/provision/vast/instance.py +1 -1
  84. sky/provision/vsphere/instance.py +2 -11
  85. sky/resources.py +63 -47
  86. sky/serve/__init__.py +6 -10
  87. sky/serve/client/__init__.py +0 -0
  88. sky/serve/client/sdk.py +366 -0
  89. sky/serve/constants.py +3 -0
  90. sky/serve/replica_managers.py +10 -10
  91. sky/serve/serve_utils.py +56 -36
  92. sky/serve/server/__init__.py +0 -0
  93. sky/serve/{core.py → server/core.py} +37 -17
  94. sky/serve/server/server.py +117 -0
  95. sky/serve/service.py +8 -1
  96. sky/server/__init__.py +1 -0
  97. sky/server/common.py +442 -0
  98. sky/server/constants.py +21 -0
  99. sky/server/html/log.html +174 -0
  100. sky/server/requests/__init__.py +0 -0
  101. sky/server/requests/executor.py +462 -0
  102. sky/server/requests/payloads.py +481 -0
  103. sky/server/requests/queues/__init__.py +0 -0
  104. sky/server/requests/queues/mp_queue.py +76 -0
  105. sky/server/requests/requests.py +567 -0
  106. sky/server/requests/serializers/__init__.py +0 -0
  107. sky/server/requests/serializers/decoders.py +192 -0
  108. sky/server/requests/serializers/encoders.py +166 -0
  109. sky/server/server.py +1095 -0
  110. sky/server/stream_utils.py +144 -0
  111. sky/setup_files/MANIFEST.in +1 -0
  112. sky/setup_files/dependencies.py +12 -4
  113. sky/setup_files/setup.py +1 -1
  114. sky/sky_logging.py +9 -13
  115. sky/skylet/autostop_lib.py +2 -2
  116. sky/skylet/constants.py +46 -12
  117. sky/skylet/events.py +5 -6
  118. sky/skylet/job_lib.py +78 -66
  119. sky/skylet/log_lib.py +17 -11
  120. sky/skypilot_config.py +79 -94
  121. sky/task.py +119 -73
  122. sky/templates/aws-ray.yml.j2 +4 -4
  123. sky/templates/azure-ray.yml.j2 +3 -2
  124. sky/templates/cudo-ray.yml.j2 +3 -2
  125. sky/templates/fluidstack-ray.yml.j2 +3 -2
  126. sky/templates/gcp-ray.yml.j2 +3 -2
  127. sky/templates/ibm-ray.yml.j2 +3 -2
  128. sky/templates/jobs-controller.yaml.j2 +1 -12
  129. sky/templates/kubernetes-ray.yml.j2 +3 -2
  130. sky/templates/lambda-ray.yml.j2 +3 -2
  131. sky/templates/oci-ray.yml.j2 +3 -2
  132. sky/templates/paperspace-ray.yml.j2 +3 -2
  133. sky/templates/runpod-ray.yml.j2 +3 -2
  134. sky/templates/scp-ray.yml.j2 +3 -2
  135. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  136. sky/templates/vsphere-ray.yml.j2 +4 -2
  137. sky/templates/websocket_proxy.py +64 -0
  138. sky/usage/constants.py +8 -0
  139. sky/usage/usage_lib.py +45 -11
  140. sky/utils/accelerator_registry.py +33 -53
  141. sky/utils/admin_policy_utils.py +2 -1
  142. sky/utils/annotations.py +51 -0
  143. sky/utils/cli_utils/status_utils.py +33 -3
  144. sky/utils/cluster_utils.py +356 -0
  145. sky/utils/command_runner.py +69 -14
  146. sky/utils/common.py +74 -0
  147. sky/utils/common_utils.py +133 -93
  148. sky/utils/config_utils.py +204 -0
  149. sky/utils/control_master_utils.py +2 -3
  150. sky/utils/controller_utils.py +133 -147
  151. sky/utils/dag_utils.py +72 -24
  152. sky/utils/kubernetes/deploy_remote_cluster.sh +2 -2
  153. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  154. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  155. sky/utils/log_utils.py +83 -23
  156. sky/utils/message_utils.py +81 -0
  157. sky/utils/registry.py +127 -0
  158. sky/utils/resources_utils.py +2 -2
  159. sky/utils/rich_utils.py +213 -34
  160. sky/utils/schemas.py +19 -2
  161. sky/{status_lib.py → utils/status_lib.py} +12 -7
  162. sky/utils/subprocess_utils.py +51 -35
  163. sky/utils/timeline.py +7 -2
  164. sky/utils/ux_utils.py +95 -25
  165. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/METADATA +8 -3
  166. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/RECORD +170 -132
  167. sky/clouds/cloud_registry.py +0 -76
  168. sky/utils/cluster_yaml_utils.py +0 -24
  169. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/LICENSE +0 -0
  170. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/WHEEL +0 -0
  171. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/entry_points.txt +0 -0
  172. {skypilot_nightly-1.0.0.dev20250216.dist-info → skypilot_nightly-1.0.0.dev20250218.dist-info}/top_level.txt +0 -0
sky/cloud_stores.py CHANGED
@@ -54,7 +54,8 @@ class S3CloudStorage(CloudStorage):
54
54
 
55
55
  # List of commands to install AWS CLI
56
56
  _GET_AWSCLI = [
57
- 'aws --version >/dev/null 2>&1 || pip3 install awscli',
57
+ 'aws --version >/dev/null 2>&1 || '
58
+ f'{constants.SKY_UV_PIP_CMD} install awscli',
58
59
  ]
59
60
 
60
61
  def is_directory(self, url: str) -> bool:
@@ -84,7 +85,8 @@ class S3CloudStorage(CloudStorage):
84
85
  # AWS Sync by default uses 10 threads to upload files to the bucket.
85
86
  # To increase parallelism, modify max_concurrent_requests in your
86
87
  # aws config file (Default path: ~/.aws/config).
87
- download_via_awscli = ('aws s3 sync --no-follow-symlinks '
88
+ download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
89
+ 'sync --no-follow-symlinks '
88
90
  f'{source} {destination}')
89
91
 
90
92
  all_commands = list(self._GET_AWSCLI)
@@ -93,7 +95,8 @@ class S3CloudStorage(CloudStorage):
93
95
 
94
96
  def make_sync_file_command(self, source: str, destination: str) -> str:
95
97
  """Downloads a file using AWS CLI."""
96
- download_via_awscli = f'aws s3 cp {source} {destination}'
98
+ download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
99
+ f'cp {source} {destination}')
97
100
 
98
101
  all_commands = list(self._GET_AWSCLI)
99
102
  all_commands.append(download_via_awscli)
@@ -354,7 +357,8 @@ class R2CloudStorage(CloudStorage):
354
357
 
355
358
  # List of commands to install AWS CLI
356
359
  _GET_AWSCLI = [
357
- 'aws --version >/dev/null 2>&1 || pip3 install awscli',
360
+ 'aws --version >/dev/null 2>&1 || '
361
+ f'{constants.SKY_UV_PIP_CMD} install awscli',
358
362
  ]
359
363
 
360
364
  def is_directory(self, url: str) -> bool:
@@ -389,7 +393,8 @@ class R2CloudStorage(CloudStorage):
389
393
  source = source.replace('r2://', 's3://')
390
394
  download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
391
395
  f'{cloudflare.R2_CREDENTIALS_PATH} '
392
- 'aws s3 sync --no-follow-symlinks '
396
+ f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
397
+ 'sync --no-follow-symlinks '
393
398
  f'{source} {destination} '
394
399
  f'--endpoint {endpoint_url} '
395
400
  f'--profile={cloudflare.R2_PROFILE_NAME}')
@@ -405,7 +410,8 @@ class R2CloudStorage(CloudStorage):
405
410
  source = source.replace('r2://', 's3://')
406
411
  download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
407
412
  f'{cloudflare.R2_CREDENTIALS_PATH} '
408
- f'aws s3 cp {source} {destination} '
413
+ f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
414
+ f'cp {source} {destination} '
409
415
  f'--endpoint {endpoint_url} '
410
416
  f'--profile={cloudflare.R2_PROFILE_NAME}')
411
417
 
sky/clouds/__init__.py CHANGED
@@ -8,7 +8,6 @@ from sky.clouds.cloud import ProvisionerVersion
8
8
  from sky.clouds.cloud import Region
9
9
  from sky.clouds.cloud import StatusVersion
10
10
  from sky.clouds.cloud import Zone
11
- from sky.clouds.cloud_registry import CLOUD_REGISTRY
12
11
 
13
12
  # NOTE: import the above first to avoid circular imports.
14
13
  # isort: split
@@ -47,7 +46,6 @@ __all__ = [
47
46
  'CloudImplementationFeatures',
48
47
  'Region',
49
48
  'Zone',
50
- 'CLOUD_REGISTRY',
51
49
  'ProvisionerVersion',
52
50
  'StatusVersion',
53
51
  'Fluidstack',
sky/clouds/aws.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """Amazon Web Services."""
2
2
  import enum
3
3
  import fnmatch
4
- import functools
5
4
  import hashlib
6
5
  import json
7
6
  import os
@@ -21,7 +20,9 @@ from sky.clouds import service_catalog
21
20
  from sky.clouds.service_catalog import common as catalog_common
22
21
  from sky.clouds.utils import aws_utils
23
22
  from sky.skylet import constants
23
+ from sky.utils import annotations
24
24
  from sky.utils import common_utils
25
+ from sky.utils import registry
25
26
  from sky.utils import resources_utils
26
27
  from sky.utils import rich_utils
27
28
  from sky.utils import subprocess_utils
@@ -30,7 +31,7 @@ from sky.utils import ux_utils
30
31
  if typing.TYPE_CHECKING:
31
32
  # renaming to avoid shadowing variables
32
33
  from sky import resources as resources_lib
33
- from sky import status_lib
34
+ from sky.utils import status_lib
34
35
 
35
36
  logger = sky_logging.init_logger(__name__)
36
37
 
@@ -126,7 +127,7 @@ class AWSIdentityType(enum.Enum):
126
127
  return self in expirable_types
127
128
 
128
129
 
129
- @clouds.CLOUD_REGISTRY.register
130
+ @registry.CLOUD_REGISTRY.register
130
131
  class AWS(clouds.Cloud):
131
132
  """Amazon Web Services."""
132
133
 
@@ -558,7 +559,8 @@ class AWS(clouds.Cloud):
558
559
  fuzzy_candidate_list, None)
559
560
 
560
561
  @classmethod
561
- @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
562
+ @annotations.lru_cache(scope='global',
563
+ maxsize=1) # Cache since getting identity is slow.
562
564
  def check_credentials(cls) -> Tuple[bool, Optional[str]]:
563
565
  """Checks if the user has access credentials to this cloud."""
564
566
 
@@ -696,7 +698,7 @@ class AWS(clouds.Cloud):
696
698
  return AWSIdentityType.SHARED_CREDENTIALS_FILE
697
699
 
698
700
  @classmethod
699
- @functools.lru_cache(maxsize=1)
701
+ @annotations.lru_cache(scope='global', maxsize=1)
700
702
  def _aws_configure_list(cls) -> Optional[bytes]:
701
703
  proc = subprocess.run('aws configure list',
702
704
  shell=True,
@@ -708,7 +710,8 @@ class AWS(clouds.Cloud):
708
710
  return proc.stdout
709
711
 
710
712
  @classmethod
711
- @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
713
+ @annotations.lru_cache(scope='global',
714
+ maxsize=1) # Cache since getting identity is slow.
712
715
  def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
713
716
  try:
714
717
  sts = aws.client('sts', check_credentials=False)
@@ -789,7 +792,8 @@ class AWS(clouds.Cloud):
789
792
  return [user_ids]
790
793
 
791
794
  @classmethod
792
- @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
795
+ @annotations.lru_cache(scope='global',
796
+ maxsize=1) # Cache since getting identity is slow.
793
797
  def get_user_identities(cls) -> Optional[List[List[str]]]:
794
798
  """Returns a [UserId, Account] list that uniquely identifies the user.
795
799
 
@@ -893,11 +897,11 @@ class AWS(clouds.Cloud):
893
897
  if os.path.exists(os.path.expanduser(f'~/.aws/{filename}'))
894
898
  }
895
899
 
896
- @functools.lru_cache(maxsize=1)
900
+ @annotations.lru_cache(scope='global', maxsize=1)
897
901
  def can_credential_expire(self) -> bool:
898
902
  identity_type = self._current_identity_type()
899
- return identity_type is not None and identity_type.can_credential_expire(
900
- )
903
+ return (identity_type is not None and
904
+ identity_type.can_credential_expire())
901
905
 
902
906
  def instance_type_exists(self, instance_type):
903
907
  return service_catalog.instance_type_exists(instance_type, clouds='aws')
@@ -945,7 +949,8 @@ class AWS(clouds.Cloud):
945
949
  Returns:
946
950
  False if the quota is found to be zero, and True otherwise.
947
951
  Raises:
948
- ImportError: if the dependencies for AWS are not able to be installed.
952
+ ImportError: if the dependencies for AWS are not able to be
953
+ installed.
949
954
  botocore.exceptions.ClientError: error in Boto3 client request.
950
955
  """
951
956
 
@@ -959,7 +964,8 @@ class AWS(clouds.Cloud):
959
964
  quota_code = aws_catalog.get_quota_code(instance_type, use_spot)
960
965
 
961
966
  if quota_code is None:
962
- # Quota code not found in the catalog for the chosen instance_type, try provisioning anyway
967
+ # Quota code not found in the catalog for the chosen instance_type,
968
+ # try provisioning anyway.
963
969
  return True
964
970
 
965
971
  if aws_utils.use_reservations():
@@ -973,7 +979,8 @@ class AWS(clouds.Cloud):
973
979
  response = client.get_service_quota(ServiceCode='ec2',
974
980
  QuotaCode=quota_code)
975
981
  except aws.botocore_exceptions().ClientError:
976
- # Botocore client connection not established, try provisioning anyways
982
+ # Botocore client connection not established, try provisioning
983
+ # anyways
977
984
  return True
978
985
 
979
986
  if response['Quota']['Value'] == 0:
sky/clouds/azure.py CHANGED
@@ -1,5 +1,4 @@
1
1
  """Azure."""
2
- import functools
3
2
  import os
4
3
  import re
5
4
  import subprocess
@@ -17,7 +16,9 @@ from sky import skypilot_config
17
16
  from sky.adaptors import azure
18
17
  from sky.clouds import service_catalog
19
18
  from sky.clouds.utils import azure_utils
19
+ from sky.utils import annotations
20
20
  from sky.utils import common_utils
21
+ from sky.utils import registry
21
22
  from sky.utils import resources_utils
22
23
  from sky.utils import ux_utils
23
24
 
@@ -60,7 +61,7 @@ def _run_output(cmd):
60
61
  return proc.stdout.decode('ascii')
61
62
 
62
63
 
63
- @clouds.CLOUD_REGISTRY.register
64
+ @registry.CLOUD_REGISTRY.register
64
65
  class Azure(clouds.Cloud):
65
66
  """Azure."""
66
67
 
@@ -574,7 +575,8 @@ class Azure(clouds.Cloud):
574
575
  clouds='azure')
575
576
 
576
577
  @classmethod
577
- @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
578
+ @annotations.lru_cache(scope='global',
579
+ maxsize=1) # Cache since getting identity is slow.
578
580
  def get_user_identities(cls) -> Optional[List[List[str]]]:
579
581
  """Returns the cloud user identity."""
580
582
  # This returns the user's email address + [subscription_id].
sky/clouds/cloud.py CHANGED
@@ -23,7 +23,7 @@ from sky.utils import ux_utils
23
23
 
24
24
  if typing.TYPE_CHECKING:
25
25
  from sky import resources as resources_lib
26
- from sky import status_lib
26
+ from sky.utils import status_lib
27
27
 
28
28
 
29
29
  class CloudImplementationFeatures(enum.Enum):
sky/clouds/cudo.py CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
6
6
  from sky import clouds
7
7
  from sky.clouds import service_catalog
8
8
  from sky.utils import common_utils
9
+ from sky.utils import registry
9
10
  from sky.utils import resources_utils
10
11
 
11
12
  if typing.TYPE_CHECKING:
@@ -27,7 +28,7 @@ def _run_output(cmd):
27
28
  return proc.stdout.decode('ascii')
28
29
 
29
30
 
30
- @clouds.CLOUD_REGISTRY.register
31
+ @registry.CLOUD_REGISTRY.register
31
32
  class Cudo(clouds.Cloud):
32
33
  """Cudo Compute"""
33
34
  _REPR = 'Cudo'
sky/clouds/do.py CHANGED
@@ -8,6 +8,7 @@ from sky import clouds
8
8
  from sky.adaptors import do
9
9
  from sky.clouds import service_catalog
10
10
  from sky.provision.do import utils as do_utils
11
+ from sky.utils import registry
11
12
  from sky.utils import resources_utils
12
13
 
13
14
  if typing.TYPE_CHECKING:
@@ -16,7 +17,7 @@ if typing.TYPE_CHECKING:
16
17
  _CREDENTIAL_FILE = 'config.yaml'
17
18
 
18
19
 
19
- @clouds.CLOUD_REGISTRY.register(aliases=['digitalocean'])
20
+ @registry.CLOUD_REGISTRY.register(aliases=['digitalocean'])
20
21
  class DO(clouds.Cloud):
21
22
  """Digital Ocean Cloud"""
22
23
 
@@ -232,8 +233,11 @@ class DO(clouds.Cloud):
232
233
  cpus=resources.cpus,
233
234
  memory=resources.memory,
234
235
  disk_tier=resources.disk_tier)
235
- return resources_utils.FeasibleResources(
236
- _make([default_instance_type]), [], None)
236
+ if default_instance_type is None:
237
+ return resources_utils.FeasibleResources([], [], None)
238
+ else:
239
+ return resources_utils.FeasibleResources(
240
+ _make([default_instance_type]), [], None)
237
241
 
238
242
  assert len(accelerators) == 1, resources
239
243
  acc, acc_count = list(accelerators.items())[0]
sky/clouds/fluidstack.py CHANGED
@@ -6,10 +6,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
6
6
  import requests
7
7
 
8
8
  from sky import clouds
9
- from sky import status_lib
10
9
  from sky.clouds import service_catalog
11
10
  from sky.provision.fluidstack import fluidstack_utils
11
+ from sky.utils import registry
12
12
  from sky.utils import resources_utils
13
+ from sky.utils import status_lib
13
14
  from sky.utils.resources_utils import DiskTier
14
15
 
15
16
  _CREDENTIAL_FILES = [
@@ -21,7 +22,7 @@ if typing.TYPE_CHECKING:
21
22
  from sky import resources as resources_lib
22
23
 
23
24
 
24
- @clouds.CLOUD_REGISTRY.register
25
+ @registry.CLOUD_REGISTRY.register
25
26
  class Fluidstack(clouds.Cloud):
26
27
  """FluidStack GPU Cloud."""
27
28
 
sky/clouds/gcp.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Google Cloud Platform."""
2
2
  import enum
3
- import functools
4
3
  import json
5
4
  import os
6
5
  import re
@@ -18,14 +17,16 @@ from sky import skypilot_config
18
17
  from sky.adaptors import gcp
19
18
  from sky.clouds import service_catalog
20
19
  from sky.clouds.utils import gcp_utils
20
+ from sky.utils import annotations
21
21
  from sky.utils import common_utils
22
+ from sky.utils import registry
22
23
  from sky.utils import resources_utils
23
24
  from sky.utils import subprocess_utils
24
25
  from sky.utils import ux_utils
25
26
 
26
27
  if typing.TYPE_CHECKING:
27
28
  from sky import resources
28
- from sky import status_lib
29
+ from sky.utils import status_lib
29
30
 
30
31
  logger = sky_logging.init_logger(__name__)
31
32
 
@@ -136,7 +137,7 @@ class GCPIdentityType(enum.Enum):
136
137
  return self == GCPIdentityType.SHARED_CREDENTIALS_FILE
137
138
 
138
139
 
139
- @clouds.CLOUD_REGISTRY.register
140
+ @registry.CLOUD_REGISTRY.register
140
141
  class GCP(clouds.Cloud):
141
142
  """Google Cloud Platform."""
142
143
 
@@ -348,7 +349,7 @@ class GCP(clouds.Cloud):
348
349
  return find_machine is not None
349
350
 
350
351
  @classmethod
351
- @functools.lru_cache(maxsize=1)
352
+ @annotations.lru_cache(scope='global', maxsize=1)
352
353
  def _get_image_size(cls, image_id: str) -> float:
353
354
  if image_id.startswith('skypilot:'):
354
355
  return DEFAULT_GCP_IMAGE_GB
@@ -866,11 +867,11 @@ class GCP(clouds.Cloud):
866
867
  pass
867
868
  return credentials
868
869
 
869
- @functools.lru_cache(maxsize=1)
870
+ @annotations.lru_cache(scope='global', maxsize=1)
870
871
  def can_credential_expire(self) -> bool:
871
872
  identity_type = self._get_identity_type()
872
- return identity_type is not None and identity_type.can_credential_expire(
873
- )
873
+ return (identity_type is not None and
874
+ identity_type.can_credential_expire())
874
875
 
875
876
  @classmethod
876
877
  def _get_identity_type(cls) -> Optional[GCPIdentityType]:
@@ -886,7 +887,8 @@ class GCP(clouds.Cloud):
886
887
  return GCPIdentityType.SHARED_CREDENTIALS_FILE
887
888
 
888
889
  @classmethod
889
- @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
890
+ @annotations.lru_cache(scope='request',
891
+ maxsize=1) # Cache since getting identity is slow.
890
892
  def get_user_identities(cls) -> List[List[str]]:
891
893
  """Returns the email address + project id of the active user."""
892
894
  try:
sky/clouds/ibm.py CHANGED
@@ -7,11 +7,12 @@ import colorama
7
7
 
8
8
  from sky import clouds
9
9
  from sky import sky_logging
10
- from sky import status_lib
11
10
  from sky.adaptors import ibm
12
11
  from sky.adaptors.ibm import CREDENTIAL_FILE
13
12
  from sky.clouds import service_catalog
13
+ from sky.utils import registry
14
14
  from sky.utils import resources_utils
15
+ from sky.utils import status_lib
15
16
  from sky.utils import ux_utils
16
17
 
17
18
  if typing.TYPE_CHECKING:
@@ -21,7 +22,7 @@ if typing.TYPE_CHECKING:
21
22
  logger = sky_logging.init_logger(__name__)
22
23
 
23
24
 
24
- @clouds.CLOUD_REGISTRY.register
25
+ @registry.CLOUD_REGISTRY.register
25
26
  class IBM(clouds.Cloud):
26
27
  """IBM Web Services."""
27
28
 
@@ -167,7 +168,7 @@ class IBM(clouds.Cloud):
167
168
  def make_deploy_resources_variables(
168
169
  self,
169
170
  resources: 'resources_lib.Resources',
170
- cluster_name: resources_utils.ClusterName,
171
+ cluster_name: 'resources_utils.ClusterName',
171
172
  region: 'clouds.Region',
172
173
  zones: Optional[List['clouds.Zone']],
173
174
  num_nodes: int,
@@ -252,10 +253,10 @@ class IBM(clouds.Cloud):
252
253
 
253
254
  @classmethod
254
255
  def get_default_instance_type(
255
- cls,
256
- cpus: Optional[str] = None,
257
- memory: Optional[str] = None,
258
- disk_tier: Optional[resources_utils.DiskTier] = None
256
+ cls,
257
+ cpus: Optional[str] = None,
258
+ memory: Optional[str] = None,
259
+ disk_tier: Optional['resources_utils.DiskTier'] = None
259
260
  ) -> Optional[str]:
260
261
  return service_catalog.get_default_instance_type(cpus=cpus,
261
262
  memory=memory,
sky/clouds/kubernetes.py CHANGED
@@ -1,5 +1,4 @@
1
1
  """Kubernetes."""
2
- import functools
3
2
  import os
4
3
  import re
5
4
  import typing
@@ -14,7 +13,9 @@ from sky.provision import instance_setup
14
13
  from sky.provision.kubernetes import network_utils
15
14
  from sky.provision.kubernetes import utils as kubernetes_utils
16
15
  from sky.skylet import constants
16
+ from sky.utils import annotations
17
17
  from sky.utils import common_utils
18
+ from sky.utils import registry
18
19
  from sky.utils import resources_utils
19
20
  from sky.utils import schemas
20
21
 
@@ -34,7 +35,7 @@ CREDENTIAL_PATH = os.environ.get('KUBECONFIG', DEFAULT_KUBECONFIG_PATH)
34
35
  _SKYPILOT_SYSTEM_NAMESPACE = 'skypilot-system'
35
36
 
36
37
 
37
- @clouds.CLOUD_REGISTRY.register(aliases=['k8s'])
38
+ @registry.CLOUD_REGISTRY.register(aliases=['k8s'])
38
39
  class Kubernetes(clouds.Cloud):
39
40
  """Kubernetes."""
40
41
 
@@ -82,7 +83,7 @@ class Kubernetes(clouds.Cloud):
82
83
  # Use a fresh user hash to avoid conflicts in the secret object naming.
83
84
  # This can happen when the controller is reusing the same user hash
84
85
  # through USER_ID_ENV_VAR but has a different SSH key.
85
- fresh_user_hash = common_utils.get_user_hash(force_fresh_hash=True)
86
+ fresh_user_hash = common_utils.generate_user_hash()
86
87
  return f'ssh-publickey-{fresh_user_hash}'
87
88
 
88
89
  @classmethod
@@ -116,7 +117,7 @@ class Kubernetes(clouds.Cloud):
116
117
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
117
118
 
118
119
  @classmethod
119
- @functools.lru_cache(maxsize=1)
120
+ @annotations.lru_cache(scope='global', maxsize=1)
120
121
  def _log_skipped_contexts_once(cls, skipped_contexts: Tuple[str,
121
122
  ...]) -> None:
122
123
  """Log skipped contexts for only once.
@@ -240,7 +241,7 @@ class Kubernetes(clouds.Cloud):
240
241
  cls,
241
242
  cpus: Optional[str] = None,
242
243
  memory: Optional[str] = None,
243
- disk_tier: Optional[resources_utils.DiskTier] = None) -> str:
244
+ disk_tier: Optional['resources_utils.DiskTier'] = None) -> str:
244
245
  # TODO(romilb): In the future, we may want to move the instance type
245
246
  # selection + availability checking to a kubernetes_catalog module.
246
247
  del disk_tier # Unused.
@@ -330,7 +331,7 @@ class Kubernetes(clouds.Cloud):
330
331
  def make_deploy_resources_variables(
331
332
  self,
332
333
  resources: 'resources_lib.Resources',
333
- cluster_name: resources_utils.ClusterName,
334
+ cluster_name: 'resources_utils.ClusterName',
334
335
  region: Optional['clouds.Region'],
335
336
  zones: Optional[List['clouds.Zone']],
336
337
  num_nodes: int,
@@ -5,10 +5,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
5
5
  import requests
6
6
 
7
7
  from sky import clouds
8
- from sky import status_lib
9
8
  from sky.clouds import service_catalog
10
9
  from sky.provision.lambda_cloud import lambda_utils
10
+ from sky.utils import registry
11
11
  from sky.utils import resources_utils
12
+ from sky.utils import status_lib
12
13
 
13
14
  if typing.TYPE_CHECKING:
14
15
  # Renaming to avoid shadowing variables.
@@ -20,7 +21,7 @@ _CREDENTIAL_FILES = [
20
21
  ]
21
22
 
22
23
 
23
- @clouds.CLOUD_REGISTRY.register
24
+ @registry.CLOUD_REGISTRY.register
24
25
  class Lambda(clouds.Cloud):
25
26
  """Lambda Labs GPU Cloud."""
26
27
 
@@ -121,10 +122,10 @@ class Lambda(clouds.Cloud):
121
122
 
122
123
  @classmethod
123
124
  def get_default_instance_type(
124
- cls,
125
- cpus: Optional[str] = None,
126
- memory: Optional[str] = None,
127
- disk_tier: Optional[resources_utils.DiskTier] = None
125
+ cls,
126
+ cpus: Optional[str] = None,
127
+ memory: Optional[str] = None,
128
+ disk_tier: Optional['resources_utils.DiskTier'] = None
128
129
  ) -> Optional[str]:
129
130
  return service_catalog.get_default_instance_type(cpus=cpus,
130
131
  memory=memory,
@@ -154,7 +155,7 @@ class Lambda(clouds.Cloud):
154
155
  def make_deploy_resources_variables(
155
156
  self,
156
157
  resources: 'resources_lib.Resources',
157
- cluster_name: resources_utils.ClusterName,
158
+ cluster_name: 'resources_utils.ClusterName',
158
159
  region: 'clouds.Region',
159
160
  zones: Optional[List['clouds.Zone']],
160
161
  num_nodes: int,
sky/clouds/oci.py CHANGED
@@ -9,7 +9,7 @@ History:
9
9
  file path resolution (by os.path.expanduser) when construct the file
10
10
  mounts. This bug will cause the created workder nodes located in different
11
11
  compartment and VCN than the header node if user specifies compartment_id
12
- in the sky config file, because the ~/.sky/config is not sync-ed to the
12
+ in the sky config file, because the ~/.sky/config.yaml is not sync-ed to the
13
13
  remote machine.
14
14
  The workaround is set the sky config file path using ENV before running
15
15
  the sky launch: export SKYPILOT_CONFIG=/home/ubuntu/.sky/config.yaml
@@ -27,13 +27,14 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
27
27
 
28
28
  from sky import clouds
29
29
  from sky import exceptions
30
- from sky import status_lib
31
30
  from sky.adaptors import oci as oci_adaptor
32
31
  from sky.clouds import service_catalog
33
32
  from sky.clouds.utils import oci_utils
34
33
  from sky.provision.oci.query_utils import query_helper
35
34
  from sky.utils import common_utils
35
+ from sky.utils import registry
36
36
  from sky.utils import resources_utils
37
+ from sky.utils import status_lib
37
38
  from sky.utils import ux_utils
38
39
 
39
40
  if typing.TYPE_CHECKING:
@@ -45,7 +46,7 @@ logger = logging.getLogger(__name__)
45
46
  _tenancy_prefix: Optional[str] = None
46
47
 
47
48
 
48
- @clouds.CLOUD_REGISTRY.register
49
+ @registry.CLOUD_REGISTRY.register
49
50
  class OCI(clouds.Cloud):
50
51
  """OCI: Oracle Cloud Infrastructure """
51
52
 
sky/clouds/paperspace.py CHANGED
@@ -8,6 +8,7 @@ import requests
8
8
  from sky import clouds
9
9
  from sky.clouds import service_catalog
10
10
  from sky.provision.paperspace import utils
11
+ from sky.utils import registry
11
12
  from sky.utils import resources_utils
12
13
 
13
14
  if typing.TYPE_CHECKING:
@@ -19,7 +20,7 @@ _CREDENTIAL_FILES = [
19
20
  ]
20
21
 
21
22
 
22
- @clouds.CLOUD_REGISTRY.register
23
+ @registry.CLOUD_REGISTRY.register
23
24
  class Paperspace(clouds.Cloud):
24
25
  """Paperspace GPU Cloud"""
25
26
 
sky/clouds/runpod.py CHANGED
@@ -5,6 +5,7 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
5
5
 
6
6
  from sky import clouds
7
7
  from sky.clouds import service_catalog
8
+ from sky.utils import registry
8
9
  from sky.utils import resources_utils
9
10
 
10
11
  if typing.TYPE_CHECKING:
@@ -15,7 +16,7 @@ _CREDENTIAL_FILES = [
15
16
  ]
16
17
 
17
18
 
18
- @clouds.CLOUD_REGISTRY.register
19
+ @registry.CLOUD_REGISTRY.register
19
20
  class RunPod(clouds.Cloud):
20
21
  """ RunPod GPU Cloud
21
22
 
sky/clouds/scp.py CHANGED
@@ -10,10 +10,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
10
10
  from sky import clouds
11
11
  from sky import exceptions
12
12
  from sky import sky_logging
13
- from sky import status_lib
14
13
  from sky.clouds import service_catalog
15
14
  from sky.clouds.utils import scp_utils
15
+ from sky.utils import registry
16
16
  from sky.utils import resources_utils
17
+ from sky.utils import status_lib
17
18
 
18
19
  if typing.TYPE_CHECKING:
19
20
  # Renaming to avoid shadowing variables.
@@ -29,7 +30,7 @@ _SCP_MIN_DISK_SIZE_GB = 100
29
30
  _SCP_MAX_DISK_SIZE_GB = 300
30
31
 
31
32
 
32
- @clouds.CLOUD_REGISTRY.register
33
+ @registry.CLOUD_REGISTRY.register
33
34
  class SCP(clouds.Cloud):
34
35
  """SCP Cloud."""
35
36
 
@@ -145,10 +146,10 @@ class SCP(clouds.Cloud):
145
146
 
146
147
  @classmethod
147
148
  def get_default_instance_type(
148
- cls,
149
- cpus: Optional[str] = None,
150
- memory: Optional[str] = None,
151
- disk_tier: Optional[resources_utils.DiskTier] = None
149
+ cls,
150
+ cpus: Optional[str] = None,
151
+ memory: Optional[str] = None,
152
+ disk_tier: Optional['resources_utils.DiskTier'] = None
152
153
  ) -> Optional[str]:
153
154
  return service_catalog.get_default_instance_type(cpus=cpus,
154
155
  memory=memory,
@@ -178,7 +179,7 @@ class SCP(clouds.Cloud):
178
179
  def make_deploy_resources_variables(
179
180
  self,
180
181
  resources: 'resources_lib.Resources',
181
- cluster_name: resources_utils.ClusterName,
182
+ cluster_name: 'resources_utils.ClusterName',
182
183
  region: 'clouds.Region',
183
184
  zones: Optional[List['clouds.Zone']],
184
185
  num_nodes: int,
@@ -67,7 +67,7 @@ def list_accelerators(
67
67
  all_regions: bool = False,
68
68
  require_price: bool = True,
69
69
  ) -> 'Dict[str, List[common.InstanceTypeInfo]]':
70
- """List the names of all accelerators offered by Sky.
70
+ """Lists the names of all accelerators offered by Sky.
71
71
 
72
72
  This will include all accelerators offered by Sky, including those
73
73
  that may not be available in the user's account.
@@ -95,7 +95,7 @@ def list_accelerator_counts(
95
95
  quantity_filter: Optional[int] = None,
96
96
  clouds: CloudFilter = None,
97
97
  ) -> Dict[str, List[int]]:
98
- """List all accelerators offered by Sky and available counts.
98
+ """Lists all accelerators offered by Sky and available counts.
99
99
 
100
100
  Returns: A dictionary of canonical accelerator names mapped to a list
101
101
  of available counts. See usage in cli.py.
@@ -129,7 +129,7 @@ def list_accelerator_realtime(
129
129
  clouds: CloudFilter = None,
130
130
  case_sensitive: bool = True,
131
131
  ) -> Tuple[Dict[str, List[int]], Dict[str, int], Dict[str, int]]:
132
- """List all accelerators offered by Sky with their realtime availability.
132
+ """Lists all accelerators offered by Sky with their realtime availability.
133
133
 
134
134
  Realtime availability is the total number of accelerators in the cluster
135
135
  and number of accelerators available at the time of the call.