skypilot-nightly 1.0.0.dev20241228__py3-none-any.whl → 1.0.0.dev20241230__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '138679859b9844a8737f8dff1bf5a739e77e96c4'
8
+ _SKYPILOT_COMMIT_SHA = '7e40bcdce7437f601bb07e6eaf9fd954efdd12c6'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241228'
38
+ __version__ = '1.0.0.dev20241230'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/adaptors/oci.py CHANGED
@@ -1,9 +1,11 @@
1
1
  """Oracle OCI cloud adaptor"""
2
2
 
3
+ import functools
3
4
  import logging
4
5
  import os
5
6
 
6
7
  from sky.adaptors import common
8
+ from sky.clouds.utils import oci_utils
7
9
 
8
10
  # Suppress OCI circuit breaker logging before lazy import, because
9
11
  # oci modules prints additional message during imports, i.e., the
@@ -30,10 +32,16 @@ def get_config_file() -> str:
30
32
 
31
33
  def get_oci_config(region=None, profile='DEFAULT'):
32
34
  conf_file_path = get_config_file()
35
+ if not profile or profile == 'DEFAULT':
36
+ config_profile = oci_utils.oci_config.get_profile()
37
+ else:
38
+ config_profile = profile
39
+
33
40
  oci_config = oci.config.from_file(file_location=conf_file_path,
34
- profile_name=profile)
41
+ profile_name=config_profile)
35
42
  if region is not None:
36
43
  oci_config['region'] = region
44
+
37
45
  return oci_config
38
46
 
39
47
 
@@ -54,6 +62,29 @@ def get_identity_client(region=None, profile='DEFAULT'):
54
62
  return oci.identity.IdentityClient(get_oci_config(region, profile))
55
63
 
56
64
 
65
+ def get_object_storage_client(region=None, profile='DEFAULT'):
66
+ return oci.object_storage.ObjectStorageClient(
67
+ get_oci_config(region, profile))
68
+
69
+
57
70
  def service_exception():
58
71
  """OCI service exception."""
59
72
  return oci.exceptions.ServiceError
73
+
74
+
75
+ def with_oci_env(f):
76
+
77
+ @functools.wraps(f)
78
+ def wrapper(*args, **kwargs):
79
+ # pylint: disable=line-too-long
80
+ enter_env_cmds = [
81
+ 'conda info --envs | grep "sky-oci-cli-env" || conda create -n sky-oci-cli-env python=3.10 -y',
82
+ '. $(conda info --base 2> /dev/null)/etc/profile.d/conda.sh > /dev/null 2>&1 || true',
83
+ 'conda activate sky-oci-cli-env', 'pip install oci-cli',
84
+ 'export OCI_CLI_SUPPRESS_FILE_PERMISSIONS_WARNING=True'
85
+ ]
86
+ operation_cmd = [f(*args, **kwargs)]
87
+ leave_env_cmds = ['conda deactivate']
88
+ return ' && '.join(enter_env_cmds + operation_cmd + leave_env_cmds)
89
+
90
+ return wrapper
sky/cloud_stores.py CHANGED
@@ -7,6 +7,7 @@ TODO:
7
7
  * Better interface.
8
8
  * Better implementation (e.g., fsspec, smart_open, using each cloud's SDK).
9
9
  """
10
+ import os
10
11
  import shlex
11
12
  import subprocess
12
13
  import time
@@ -18,6 +19,7 @@ from sky.adaptors import aws
18
19
  from sky.adaptors import azure
19
20
  from sky.adaptors import cloudflare
20
21
  from sky.adaptors import ibm
22
+ from sky.adaptors import oci
21
23
  from sky.clouds import gcp
22
24
  from sky.data import data_utils
23
25
  from sky.data.data_utils import Rclone
@@ -470,6 +472,64 @@ class IBMCosCloudStorage(CloudStorage):
470
472
  return self.make_sync_dir_command(source, destination)
471
473
 
472
474
 
475
+ class OciCloudStorage(CloudStorage):
476
+ """OCI Cloud Storage."""
477
+
478
+ def is_directory(self, url: str) -> bool:
479
+ """Returns whether OCI 'url' is a directory.
480
+ In cloud object stores, a "directory" refers to a regular object whose
481
+ name is a prefix of other objects.
482
+ """
483
+ bucket_name, path = data_utils.split_oci_path(url)
484
+
485
+ client = oci.get_object_storage_client()
486
+ namespace = client.get_namespace(
487
+ compartment_id=oci.get_oci_config()['tenancy']).data
488
+
489
+ objects = client.list_objects(namespace_name=namespace,
490
+ bucket_name=bucket_name,
491
+ prefix=path).data.objects
492
+
493
+ if len(objects) == 0:
494
+ # A directory with few or no items
495
+ return True
496
+
497
+ if len(objects) > 1:
498
+ # A directory with more than 1 items
499
+ return True
500
+
501
+ object_name = objects[0].name
502
+ if path.endswith(object_name):
503
+ # An object path
504
+ return False
505
+
506
+ # A directory with only 1 item
507
+ return True
508
+
509
+ @oci.with_oci_env
510
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
511
+ """Downloads using OCI CLI."""
512
+ bucket_name, path = data_utils.split_oci_path(source)
513
+
514
+ download_via_ocicli = (f'oci os object sync --no-follow-symlinks '
515
+ f'--bucket-name {bucket_name} '
516
+ f'--prefix "{path}" --dest-dir "{destination}"')
517
+
518
+ return download_via_ocicli
519
+
520
+ @oci.with_oci_env
521
+ def make_sync_file_command(self, source: str, destination: str) -> str:
522
+ """Downloads a file using OCI CLI."""
523
+ bucket_name, path = data_utils.split_oci_path(source)
524
+ filename = os.path.basename(path)
525
+ destination = os.path.join(destination, filename)
526
+
527
+ download_via_ocicli = (f'oci os object get --bucket-name {bucket_name} '
528
+ f'--name "{path}" --file "{destination}"')
529
+
530
+ return download_via_ocicli
531
+
532
+
473
533
  def get_storage_from_path(url: str) -> CloudStorage:
474
534
  """Returns a CloudStorage by identifying the scheme:// in a URL."""
475
535
  result = urllib.parse.urlsplit(url)
@@ -485,6 +545,7 @@ _REGISTRY = {
485
545
  's3': S3CloudStorage(),
486
546
  'r2': R2CloudStorage(),
487
547
  'cos': IBMCosCloudStorage(),
548
+ 'oci': OciCloudStorage(),
488
549
  # TODO: This is a hack, as Azure URL starts with https://, we should
489
550
  # refactor the registry to be able to take regex, so that Azure blob can
490
551
  # be identified with `https://(.*?)\.blob\.core\.windows\.net`
sky/data/data_transfer.py CHANGED
@@ -200,3 +200,40 @@ def _add_bucket_iam_member(bucket_name: str, role: str, member: str) -> None:
200
200
  bucket.set_iam_policy(policy)
201
201
 
202
202
  logger.debug(f'Added {member} with role {role} to {bucket_name}.')
203
+
204
+
205
+ def s3_to_oci(s3_bucket_name: str, oci_bucket_name: str) -> None:
206
+ """Creates a one-time transfer from Amazon S3 to OCI Object Storage.
207
+ Args:
208
+ s3_bucket_name: str; Name of the Amazon S3 Bucket
209
+ oci_bucket_name: str; Name of the OCI Bucket
210
+ """
211
+ # TODO(HysunHe): Implement sync with other clouds (s3, gs)
212
+ raise NotImplementedError('Moving data directly from S3 to OCI bucket '
213
+ 'is currently not supported. Please specify '
214
+ 'a local source for the storage object.')
215
+
216
+
217
+ def gcs_to_oci(gs_bucket_name: str, oci_bucket_name: str) -> None:
218
+ """Creates a one-time transfer from Google Cloud Storage to
219
+ OCI Object Storage.
220
+ Args:
221
+ gs_bucket_name: str; Name of the Google Cloud Storage Bucket
222
+ oci_bucket_name: str; Name of the OCI Bucket
223
+ """
224
+ # TODO(HysunHe): Implement sync with other clouds (s3, gs)
225
+ raise NotImplementedError('Moving data directly from GCS to OCI bucket '
226
+ 'is currently not supported. Please specify '
227
+ 'a local source for the storage object.')
228
+
229
+
230
+ def r2_to_oci(r2_bucket_name: str, oci_bucket_name: str) -> None:
231
+ """Creates a one-time transfer from Cloudflare R2 to OCI Bucket.
232
+ Args:
233
+ r2_bucket_name: str; Name of the Cloudflare R2 Bucket
234
+ oci_bucket_name: str; Name of the OCI Bucket
235
+ """
236
+ raise NotImplementedError(
237
+ 'Moving data directly from Cloudflare R2 to OCI '
238
+ 'bucket is currently not supported. Please specify '
239
+ 'a local source for the storage object.')
sky/data/data_utils.py CHANGED
@@ -730,3 +730,14 @@ class Rclone():
730
730
  lines_to_keep.append(line)
731
731
 
732
732
  return lines_to_keep
733
+
734
+
735
+ def split_oci_path(oci_path: str) -> Tuple[str, str]:
736
+ """Splits OCI Path into Bucket name and Relative Path to Bucket
737
+ Args:
738
+ oci_path: str; OCI Path, e.g. oci://imagenet/train/
739
+ """
740
+ path_parts = oci_path.replace('oci://', '').split('/')
741
+ bucket = path_parts.pop(0)
742
+ key = '/'.join(path_parts)
743
+ return bucket, key
@@ -19,6 +19,7 @@ BLOBFUSE2_VERSION = '2.2.0'
19
19
  _BLOBFUSE_CACHE_ROOT_DIR = '~/.sky/blobfuse2_cache'
20
20
  _BLOBFUSE_CACHE_DIR = ('~/.sky/blobfuse2_cache/'
21
21
  '{storage_account_name}_{container_name}')
22
+ RCLONE_VERSION = 'v1.68.2'
22
23
 
23
24
 
24
25
  def get_s3_mount_install_cmd() -> str:
@@ -30,12 +31,19 @@ def get_s3_mount_install_cmd() -> str:
30
31
  return install_cmd
31
32
 
32
33
 
33
- def get_s3_mount_cmd(bucket_name: str, mount_path: str) -> str:
34
+ # pylint: disable=invalid-name
35
+ def get_s3_mount_cmd(bucket_name: str,
36
+ mount_path: str,
37
+ _bucket_sub_path: Optional[str] = None) -> str:
34
38
  """Returns a command to mount an S3 bucket using goofys."""
39
+ if _bucket_sub_path is None:
40
+ _bucket_sub_path = ''
41
+ else:
42
+ _bucket_sub_path = f':{_bucket_sub_path}'
35
43
  mount_cmd = ('goofys -o allow_other '
36
44
  f'--stat-cache-ttl {_STAT_CACHE_TTL} '
37
45
  f'--type-cache-ttl {_TYPE_CACHE_TTL} '
38
- f'{bucket_name} {mount_path}')
46
+ f'{bucket_name}{_bucket_sub_path} {mount_path}')
39
47
  return mount_cmd
40
48
 
41
49
 
@@ -49,15 +57,20 @@ def get_gcs_mount_install_cmd() -> str:
49
57
  return install_cmd
50
58
 
51
59
 
52
- def get_gcs_mount_cmd(bucket_name: str, mount_path: str) -> str:
60
+ # pylint: disable=invalid-name
61
+ def get_gcs_mount_cmd(bucket_name: str,
62
+ mount_path: str,
63
+ _bucket_sub_path: Optional[str] = None) -> str:
53
64
  """Returns a command to mount a GCS bucket using gcsfuse."""
54
-
65
+ bucket_sub_path_arg = f'--only-dir {_bucket_sub_path} '\
66
+ if _bucket_sub_path else ''
55
67
  mount_cmd = ('gcsfuse -o allow_other '
56
68
  '--implicit-dirs '
57
69
  f'--stat-cache-capacity {_STAT_CACHE_CAPACITY} '
58
70
  f'--stat-cache-ttl {_STAT_CACHE_TTL} '
59
71
  f'--type-cache-ttl {_TYPE_CACHE_TTL} '
60
72
  f'--rename-dir-limit {_RENAME_DIR_LIMIT} '
73
+ f'{bucket_sub_path_arg}'
61
74
  f'{bucket_name} {mount_path}')
62
75
  return mount_cmd
63
76
 
@@ -78,10 +91,12 @@ def get_az_mount_install_cmd() -> str:
78
91
  return install_cmd
79
92
 
80
93
 
94
+ # pylint: disable=invalid-name
81
95
  def get_az_mount_cmd(container_name: str,
82
96
  storage_account_name: str,
83
97
  mount_path: str,
84
- storage_account_key: Optional[str] = None) -> str:
98
+ storage_account_key: Optional[str] = None,
99
+ _bucket_sub_path: Optional[str] = None) -> str:
85
100
  """Returns a command to mount an AZ Container using blobfuse2.
86
101
 
87
102
  Args:
@@ -90,6 +105,7 @@ def get_az_mount_cmd(container_name: str,
90
105
  belongs to.
91
106
  mount_path: Path where the container will be mounting.
92
107
  storage_account_key: Access key for the given storage account.
108
+ _bucket_sub_path: Sub path of the mounting container.
93
109
 
94
110
  Returns:
95
111
  str: Command used to mount AZ container with blobfuse2.
@@ -106,25 +122,38 @@ def get_az_mount_cmd(container_name: str,
106
122
  cache_path = _BLOBFUSE_CACHE_DIR.format(
107
123
  storage_account_name=storage_account_name,
108
124
  container_name=container_name)
125
+ if _bucket_sub_path is None:
126
+ bucket_sub_path_arg = ''
127
+ else:
128
+ bucket_sub_path_arg = f'--subdirectory={_bucket_sub_path}/ '
109
129
  mount_cmd = (f'AZURE_STORAGE_ACCOUNT={storage_account_name} '
110
130
  f'{key_env_var} '
111
131
  f'blobfuse2 {mount_path} --allow-other --no-symlinks '
112
132
  '-o umask=022 -o default_permissions '
113
133
  f'--tmp-path {cache_path} '
134
+ f'{bucket_sub_path_arg}'
114
135
  f'--container-name {container_name}')
115
136
  return mount_cmd
116
137
 
117
138
 
118
- def get_r2_mount_cmd(r2_credentials_path: str, r2_profile_name: str,
119
- endpoint_url: str, bucket_name: str,
120
- mount_path: str) -> str:
139
+ # pylint: disable=invalid-name
140
+ def get_r2_mount_cmd(r2_credentials_path: str,
141
+ r2_profile_name: str,
142
+ endpoint_url: str,
143
+ bucket_name: str,
144
+ mount_path: str,
145
+ _bucket_sub_path: Optional[str] = None) -> str:
121
146
  """Returns a command to install R2 mount utility goofys."""
147
+ if _bucket_sub_path is None:
148
+ _bucket_sub_path = ''
149
+ else:
150
+ _bucket_sub_path = f':{_bucket_sub_path}'
122
151
  mount_cmd = (f'AWS_SHARED_CREDENTIALS_FILE={r2_credentials_path} '
123
152
  f'AWS_PROFILE={r2_profile_name} goofys -o allow_other '
124
153
  f'--stat-cache-ttl {_STAT_CACHE_TTL} '
125
154
  f'--type-cache-ttl {_TYPE_CACHE_TTL} '
126
155
  f'--endpoint {endpoint_url} '
127
- f'{bucket_name} {mount_path}')
156
+ f'{bucket_name}{_bucket_sub_path} {mount_path}')
128
157
  return mount_cmd
129
158
 
130
159
 
@@ -136,9 +165,12 @@ def get_cos_mount_install_cmd() -> str:
136
165
  return install_cmd
137
166
 
138
167
 
139
- def get_cos_mount_cmd(rclone_config_data: str, rclone_config_path: str,
140
- bucket_rclone_profile: str, bucket_name: str,
141
- mount_path: str) -> str:
168
+ def get_cos_mount_cmd(rclone_config_data: str,
169
+ rclone_config_path: str,
170
+ bucket_rclone_profile: str,
171
+ bucket_name: str,
172
+ mount_path: str,
173
+ _bucket_sub_path: Optional[str] = None) -> str:
142
174
  """Returns a command to mount an IBM COS bucket using rclone."""
143
175
  # creates a fusermount soft link on older (<22) Ubuntu systems for
144
176
  # rclone's mount utility.
@@ -150,14 +182,60 @@ def get_cos_mount_cmd(rclone_config_data: str, rclone_config_path: str,
150
182
  'mkdir -p ~/.config/rclone/ && '
151
183
  f'echo "{rclone_config_data}" >> '
152
184
  f'{rclone_config_path}')
185
+ if _bucket_sub_path is None:
186
+ sub_path_arg = f'{bucket_name}/{_bucket_sub_path}'
187
+ else:
188
+ sub_path_arg = f'/{bucket_name}'
153
189
  # --daemon will keep the mounting process running in the background.
154
190
  mount_cmd = (f'{configure_rclone_profile} && '
155
191
  'rclone mount '
156
- f'{bucket_rclone_profile}:{bucket_name} {mount_path} '
192
+ f'{bucket_rclone_profile}:{sub_path_arg} {mount_path} '
157
193
  '--daemon')
158
194
  return mount_cmd
159
195
 
160
196
 
197
+ def get_rclone_install_cmd() -> str:
198
+ """ RClone installation for both apt-get and rpm.
199
+ This would be common command.
200
+ """
201
+ # pylint: disable=line-too-long
202
+ install_cmd = (
203
+ f'(which dpkg > /dev/null 2>&1 && (which rclone > /dev/null || (cd ~ > /dev/null'
204
+ f' && curl -O https://downloads.rclone.org/{RCLONE_VERSION}/rclone-{RCLONE_VERSION}-linux-amd64.deb'
205
+ f' && sudo dpkg -i rclone-{RCLONE_VERSION}-linux-amd64.deb'
206
+ f' && rm -f rclone-{RCLONE_VERSION}-linux-amd64.deb)))'
207
+ f' || (which rclone > /dev/null || (cd ~ > /dev/null'
208
+ f' && curl -O https://downloads.rclone.org/{RCLONE_VERSION}/rclone-{RCLONE_VERSION}-linux-amd64.rpm'
209
+ f' && sudo yum --nogpgcheck install rclone-{RCLONE_VERSION}-linux-amd64.rpm -y'
210
+ f' && rm -f rclone-{RCLONE_VERSION}-linux-amd64.rpm))')
211
+ return install_cmd
212
+
213
+
214
+ def get_oci_mount_cmd(mount_path: str, store_name: str, region: str,
215
+ namespace: str, compartment: str, config_file: str,
216
+ config_profile: str) -> str:
217
+ """ OCI specific RClone mount command for oci object storage. """
218
+ # pylint: disable=line-too-long
219
+ mount_cmd = (
220
+ f'sudo chown -R `whoami` {mount_path}'
221
+ f' && rclone config create oos_{store_name} oracleobjectstorage'
222
+ f' provider user_principal_auth namespace {namespace}'
223
+ f' compartment {compartment} region {region}'
224
+ f' oci-config-file {config_file}'
225
+ f' oci-config-profile {config_profile}'
226
+ f' && sed -i "s/oci-config-file/config_file/g;'
227
+ f' s/oci-config-profile/config_profile/g" ~/.config/rclone/rclone.conf'
228
+ f' && ([ ! -f /bin/fusermount3 ] && sudo ln -s /bin/fusermount /bin/fusermount3 || true)'
229
+ f' && (grep -q {mount_path} /proc/mounts || rclone mount oos_{store_name}:{store_name} {mount_path} --daemon --allow-non-empty)'
230
+ )
231
+ return mount_cmd
232
+
233
+
234
+ def get_rclone_version_check_cmd() -> str:
235
+ """ RClone version check. This would be common command. """
236
+ return f'rclone --version | grep -q {RCLONE_VERSION}'
237
+
238
+
161
239
  def _get_mount_binary(mount_cmd: str) -> str:
162
240
  """Returns mounting binary in string given as the mount command.
163
241
 
@@ -209,7 +287,7 @@ def get_mounting_script(
209
287
  script = textwrap.dedent(f"""
210
288
  #!/usr/bin/env bash
211
289
  set -e
212
-
290
+
213
291
  {command_runner.ALIAS_SUDO_TO_EMPTY_FOR_ROOT_CMD}
214
292
 
215
293
  MOUNT_PATH={mount_path}