konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,994 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Google Cloud Platform Storage."""
14
+
15
+ import enum
16
+ import os
17
+ import re
18
+ import shlex
19
+ import subprocess
20
+ import time
21
+ import typing
22
+ from typing import List, Optional, Tuple
23
+
24
+ import colorama
25
+
26
+ if typing.TYPE_CHECKING:
27
+ from google.cloud import storage as gcs_storage
28
+
29
+ from konduktor import logging
30
+ from konduktor.adaptors import gcp
31
+ from konduktor.backends import constants as backend_constants
32
+ from konduktor.data import constants, data_utils, storage_utils
33
+ from konduktor.data.gcp import utils
34
+ from konduktor.utils import (
35
+ base64_utils,
36
+ common_utils,
37
+ exceptions,
38
+ kubernetes_utils,
39
+ rich_utils,
40
+ ux_utils,
41
+ )
42
+
43
+ logger = logging.get_logger(__name__)
44
+
45
+ # Maximum number of concurrent rsync upload processes
46
+ _MAX_CONCURRENT_UPLOADS = 32
47
+
48
+ # Env var pointing to any service account key. If it exists, this path takes
49
+ # priority over the DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH below, and will be
50
+ # used instead for Konduktro-launched instances. This is the same behavior as
51
+ # gcloud:
52
+ # https://cloud.google.com/docs/authentication/provide-credentials-adc#local-key
53
+ _GCP_APPLICATION_CREDENTIAL_ENV = 'GOOGLE_APPLICATION_CREDENTIALS'
54
+ # NOTE: do not expanduser() on this path. It's used as a destination path on the
55
+ # remote cluster.
56
+ DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH: str = (
57
+ '~/.config/gcloud/' 'application_default_credentials.json'
58
+ )
59
+ DEFAULT_GCP_CREDENTIALS_DIR = '~/.config/gcloud'
60
+
61
+ # TODO(wei-lin): config_default may not be the config in use.
62
+ # See: https://github.com/skypilot-org/skypilot/pull/1539
63
+ # NOTE: do not expanduser() on this path. It's used as a destination path on the
64
+ # remote cluster.
65
+ GCP_CONFIG_PATH = '~/.config/gcloud/configurations/config_default'
66
+
67
+ # Minimum set of files under ~/.config/gcloud that grant GCP access.
68
+ _CREDENTIAL_FILES = [
69
+ 'credentials.db',
70
+ 'access_tokens.db',
71
+ 'configurations',
72
+ 'legacy_credentials',
73
+ 'active_config',
74
+ 'application_default_credentials.json',
75
+ ]
76
+
77
+ # k8s secret name for gcp credentials
78
+ GCP_SECRET_NAME = 'gcpcredentials'
79
+ GCP_CREDENTIALS_KEY = 'gcpcredentials'
80
+
81
+ # NOTE: do not expanduser() on this path. It's used as a destination path on the
82
+ # remote cluster.
83
+ _GCLOUD_INSTALLATION_LOG = '~/.konduktor/logs/gcloud_installation.log'
84
+ _GCLOUD_VERSION = '424.0.0'
85
+ # Need to be run with /bin/bash
86
+ # We factor out the installation logic to keep it align in both spot
87
+ # controller and cloud stores.
88
+ GOOGLE_SDK_INSTALLATION_COMMAND: str = f'pushd /tmp &>/dev/null && \
89
+ {{ gcloud --help > /dev/null 2>&1 || \
90
+ {{ mkdir -p {os.path.dirname(_GCLOUD_INSTALLATION_LOG)} && \
91
+ wget --quiet https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-{_GCLOUD_VERSION}-linux-x86_64.tar.gz > {_GCLOUD_INSTALLATION_LOG} && \
92
+ tar xzf google-cloud-sdk-{_GCLOUD_VERSION}-linux-x86_64.tar.gz >> {_GCLOUD_INSTALLATION_LOG} && \
93
+ rm -rf ~/google-cloud-sdk >> {_GCLOUD_INSTALLATION_LOG} && \
94
+ mv google-cloud-sdk ~/ && \
95
+ ~/google-cloud-sdk/install.sh -q >> {_GCLOUD_INSTALLATION_LOG} 2>&1 && \
96
+ echo "source ~/google-cloud-sdk/path.bash.inc > /dev/null 2>&1" >> ~/.bashrc && \
97
+ source ~/google-cloud-sdk/path.bash.inc >> {_GCLOUD_INSTALLATION_LOG} 2>&1; }}; }} && \
98
+ popd &>/dev/null' # noqa: E501
99
+
100
+
101
+ class GCPIdentityType(enum.Enum):
102
+ """GCP identity type.
103
+
104
+ The account type is determined by the current user identity, based on
105
+ the identity email.
106
+ """
107
+
108
+ # Example of a service account email:
109
+ # skypilot-v1@xxxx.iam.gserviceaccount.com
110
+ SERVICE_ACCOUNT = 'iam.gserviceaccount.com'
111
+
112
+ SHARED_CREDENTIALS_FILE = ''
113
+
114
+ def can_credential_expire(self) -> bool:
115
+ return self == GCPIdentityType.SHARED_CREDENTIALS_FILE
116
+
117
+
118
+ def _run_output(cmd):
119
+ proc = subprocess.run(
120
+ cmd, shell=True, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE
121
+ )
122
+ return proc.stdout.decode('ascii')
123
+
124
+
125
+ def is_api_disabled(endpoint: str, project_id: str) -> bool:
126
+ proc = subprocess.run(
127
+ (
128
+ f'gcloud services list --project {project_id} '
129
+ f' | grep {endpoint}.googleapis.com'
130
+ ),
131
+ check=False,
132
+ shell=True,
133
+ stderr=subprocess.PIPE,
134
+ stdout=subprocess.PIPE,
135
+ )
136
+ return proc.returncode != 0
137
+
138
+
139
+ class GcsStore(storage_utils.AbstractStore):
140
+ """GcsStore inherits from Storage Object and represents the backend
141
+ for GCS buckets.
142
+ """
143
+
144
+ # k8s secret name for gcp credentials
145
+ _GCP_SECRET_NAME = f'{GCP_SECRET_NAME}-{common_utils.user_and_hostname_hash()}'
146
+ _GCP_CREDENTIALS_KEY = GCP_CREDENTIALS_KEY
147
+
148
+ _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
149
+
150
+ _INDENT_PREFIX = ' '
151
+ _DEPENDENCY_HINT = (
152
+ 'GCP tools are not installed. Run the following commands:\n'
153
+ # Install the Google Cloud SDK:
154
+ f'{_INDENT_PREFIX} $ pip install google-api-python-client\n'
155
+ f'{_INDENT_PREFIX} $ conda install -c conda-forge '
156
+ 'google-cloud-sdk -y'
157
+ )
158
+
159
+ _CREDENTIAL_HINT = (
160
+ 'Run the following commands:\n'
161
+ # This authenticates the CLI to make `gsutil` work:
162
+ f'{_INDENT_PREFIX} $ gcloud init\n'
163
+ # This will generate
164
+ # ~/.config/gcloud/application_default_credentials.json.
165
+ f'{_INDENT_PREFIX} $ gcloud auth application-default login\n'
166
+ )
167
+ _APPLICATION_CREDENTIAL_HINT = (
168
+ 'Run the following commands:\n'
169
+ f'{_INDENT_PREFIX} $ gcloud auth application-default login\n'
170
+ f'{_INDENT_PREFIX}Or set the environment variable '
171
+ 'GOOGLE_APPLICATION_CREDENTIALS '
172
+ 'to the path of your service account key file.\n'
173
+ )
174
+
175
+ _REPR = 'GcsStore'
176
+
177
+ def __init__(
178
+ self,
179
+ name: str,
180
+ source: str,
181
+ region: Optional[str] = 'us-central1',
182
+ is_sky_managed: Optional[bool] = False,
183
+ sync_on_reconstruction: Optional[bool] = True,
184
+ _bucket_sub_path: Optional[str] = None,
185
+ ):
186
+ self.client: 'gcs_storage.Client'
187
+ self.bucket: 'constants.StorageHandle'
188
+ super().__init__(
189
+ name,
190
+ source,
191
+ region,
192
+ is_sky_managed,
193
+ sync_on_reconstruction,
194
+ _bucket_sub_path,
195
+ )
196
+
197
+ def __repr__(self):
198
+ return self._REPR
199
+
200
+ def _validate(self):
201
+ if self.source is not None and isinstance(self.source, str):
202
+ # if self.source.startswith('s3://'):
203
+ # assert self.name == data_utils.split_s3_path(self.source)[0], (
204
+ # 'S3 Bucket is specified as path, the name should be the'
205
+ # ' same as S3 bucket.')
206
+ # assert data_utils.verify_s3_bucket(self.name), (
207
+ # f'Source specified as {self.source}, an S3 bucket. ',
208
+ # 'S3 Bucket should exist.')
209
+ if self.source.startswith('gs://'):
210
+ assert self.name == data_utils.split_gcs_path(self.source)[0], (
211
+ 'GCS Bucket is specified as path, the name should be '
212
+ 'the same as GCS bucket.'
213
+ )
214
+ # elif data_utils.is_az_container_endpoint(self.source):
215
+ # storage_account_name, container_name, _ = (
216
+ # data_utils.split_az_path(self.source))
217
+ # assert self.name == container_name, (
218
+ # 'Azure bucket is specified as path, the name should be '
219
+ # 'the same as Azure bucket.')
220
+ # assert data_utils.verify_az_bucket(
221
+ # storage_account_name, self.name), (
222
+ # f'Source specified as {self.source}, an Azure bucket. '
223
+ # 'Azure bucket should exist.')
224
+ # elif self.source.startswith('r2://'):
225
+ # assert self.name == data_utils.split_r2_path(self.source)[0], (
226
+ # 'R2 Bucket is specified as path, the name should be '
227
+ # 'the same as R2 bucket.')
228
+ # assert data_utils.verify_r2_bucket(self.name), (
229
+ # f'Source specified as {self.source}, a R2 bucket. ',
230
+ # 'R2 Bucket should exist.')
231
+ # elif self.source.startswith('cos://'):
232
+ # assert self.name == data_utils.split_cos_path(self.source)[0], (
233
+ # 'COS Bucket is specified as path, the name should be '
234
+ # 'the same as COS bucket.')
235
+ # assert data_utils.verify_ibm_cos_bucket(self.name), (
236
+ # f'Source specified as {self.source}, a COS bucket. ',
237
+ # 'COS Bucket should exist.')
238
+ # Validate name
239
+ self.name = self.validate_name(self.name)
240
+
241
+ @classmethod
242
+ def validate_name(cls, name: str) -> str:
243
+ """Validates the name of the GCS store.
244
+
245
+ Source for rules: https://cloud.google.com/storage/docs/buckets#naming
246
+ """
247
+
248
+ def _raise_no_traceback_name_error(err_str):
249
+ with ux_utils.print_exception_no_traceback():
250
+ raise exceptions.StorageNameError(err_str)
251
+
252
+ if name is not None and isinstance(name, str):
253
+ # Check for overall length
254
+ if not 3 <= len(name) <= 222:
255
+ _raise_no_traceback_name_error(
256
+ f'Invalid store name: name {name} must contain 3-222 ' 'characters.'
257
+ )
258
+
259
+ # Check for valid characters and start/end with a number or letter
260
+ pattern = r'^[a-z0-9][-a-z0-9._]*[a-z0-9]$'
261
+ if not re.match(pattern, name):
262
+ _raise_no_traceback_name_error(
263
+ f'Invalid store name: name {name} can only contain '
264
+ 'lowercase letters, numeric characters, dashes (-), '
265
+ 'underscores (_), and dots (.). Spaces are not allowed. '
266
+ 'Names must start and end with a number or letter.'
267
+ )
268
+
269
+ # Check for 'goog' prefix and 'google' in the name
270
+ if name.startswith('goog') or any(
271
+ s in name for s in ['google', 'g00gle', 'go0gle', 'g0ogle']
272
+ ):
273
+ _raise_no_traceback_name_error(
274
+ f'Invalid store name: name {name} cannot begin with the '
275
+ '"goog" prefix or contain "google" in various forms.'
276
+ )
277
+
278
+ # Check for dot-separated components length
279
+ components = name.split('.')
280
+ if any(len(component) > 63 for component in components):
281
+ _raise_no_traceback_name_error(
282
+ 'Invalid store name: Dot-separated components in name '
283
+ f'{name} can be no longer than 63 characters.'
284
+ )
285
+
286
+ if '..' in name or '.-' in name or '-.' in name:
287
+ _raise_no_traceback_name_error(
288
+ f'Invalid store name: name {name} must not contain two '
289
+ 'adjacent periods or a dot next to a hyphen.'
290
+ )
291
+
292
+ # Check for IP address format
293
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
294
+ if re.match(ip_pattern, name):
295
+ _raise_no_traceback_name_error(
296
+ f'Invalid store name: name {name} cannot be represented as '
297
+ 'an IP address in dotted-decimal notation '
298
+ '(for example, 192.168.5.4).'
299
+ )
300
+ else:
301
+ _raise_no_traceback_name_error('Store name must be specified.')
302
+ return name
303
+
304
+ def initialize(self):
305
+ """Initializes the GCS store object on the cloud.
306
+
307
+ Initialization involves fetching bucket if exists, or creating it if
308
+ it does not.
309
+
310
+ Raises:
311
+ StorageBucketCreateError: If bucket creation fails
312
+ StorageBucketGetError: If fetching existing bucket fails
313
+ StorageInitError: If general initialization fails.
314
+ """
315
+ self.client = gcp.storage_client()
316
+ self.bucket, is_new_bucket = self._get_bucket()
317
+ if self.is_sky_managed is None:
318
+ # If is_sky_managed is not specified, then this is a new storage
319
+ # object (i.e., did not exist in global_user_state) and we should
320
+ # set the is_sky_managed property.
321
+ # If is_sky_managed is specified, then we take no action.
322
+ self.is_sky_managed = is_new_bucket
323
+
324
+ def upload(self):
325
+ """Uploads source to store bucket.
326
+
327
+ Upload must be called by the Storage handler - it is not called on
328
+ Store initialization.
329
+
330
+ Raises:
331
+ StorageUploadError: if upload fails.
332
+ """
333
+ try:
334
+ if isinstance(self.source, list):
335
+ self.batch_gsutil_rsync(self.source, create_dirs=True)
336
+ elif self.source is not None:
337
+ if self.source.startswith('gs://'):
338
+ pass
339
+ elif self.source.startswith('s3://'):
340
+ self._transfer_to_gcs()
341
+ elif self.source.startswith('r2://'):
342
+ self._transfer_to_gcs()
343
+ else:
344
+ # If a single directory is specified in source, upload
345
+ # contents to root of bucket by suffixing /*.
346
+ self.batch_gsutil_rsync([self.source])
347
+ except exceptions.StorageUploadError:
348
+ raise
349
+ except Exception as e:
350
+ raise exceptions.StorageUploadError(
351
+ f'Upload failed for store {self.name}'
352
+ ) from e
353
+
354
+ def delete(self) -> None:
355
+ deleted_by_skypilot = self._delete_gcs_bucket(self.name)
356
+ if deleted_by_skypilot:
357
+ msg_str = f'Deleted GCS bucket {self.name}.'
358
+ else:
359
+ msg_str = (
360
+ f'GCS bucket {self.name} may have been deleted '
361
+ f'externally. Removing from local state.'
362
+ )
363
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}' f'{colorama.Style.RESET_ALL}')
364
+
365
+ def get_handle(self) -> 'constants.StorageHandle':
366
+ return self.client.get_bucket(self.name)
367
+
368
+ def batch_gsutil_cp(
369
+ self, source_path_list: List['constants.Path'], create_dirs: bool = False
370
+ ) -> None:
371
+ """Invokes gsutil cp -n to batch upload a list of local paths
372
+
373
+ -n flag to gsutil cp checks the existence of an object before uploading,
374
+ making it similar to gsutil rsync. Since it allows specification of a
375
+ list of files, it is faster than calling gsutil rsync on each file.
376
+ However, unlike rsync, files are compared based on just their filename,
377
+ and any updates to a file would not be copied to the bucket.
378
+ """
379
+ # Generate message for upload
380
+ if len(source_path_list) > 1:
381
+ source_message = f'{len(source_path_list)} paths'
382
+ else:
383
+ source_message = source_path_list[0]
384
+
385
+ # If the source_path list contains a directory, then gsutil cp -n
386
+ # copies the dir as is to the root of the bucket. To copy the
387
+ # contents of directory to the root, add /* to the directory path
388
+ # e.g., /mydir/*
389
+ source_path_list = [
390
+ str(path) + '/*' if (os.path.isdir(path) and not create_dirs) else str(path)
391
+ for path in source_path_list
392
+ ]
393
+ copy_list = '\n'.join(
394
+ os.path.abspath(os.path.expanduser(p)) for p in source_path_list
395
+ )
396
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
397
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
398
+ sync_command = (
399
+ f'{alias_gen}; echo "{copy_list}" | {gsutil_alias} '
400
+ f'cp -e -n -r -I gs://{self.name}{sub_path}'
401
+ )
402
+
403
+ log_path = logging.generate_tmp_logging_file_path(
404
+ constants._STORAGE_LOG_FILE_NAME
405
+ )
406
+
407
+ with rich_utils.safe_status(
408
+ ux_utils.spinner_message(
409
+ f'Syncing {source_message} -> ' f'gs://{self.name}{sub_path}'
410
+ )
411
+ ):
412
+ data_utils.run_upload_cli(
413
+ sync_command,
414
+ self._ACCESS_DENIED_MESSAGE,
415
+ bucket_name=self.name,
416
+ log_path=log_path,
417
+ )
418
+
419
+ def batch_gsutil_rsync(
420
+ self, source_path_list: List['constants.Path'], create_dirs: bool = False
421
+ ) -> None:
422
+ """Invokes gsutil rsync to batch upload a list of local paths
423
+
424
+ Since gsutil rsync does not support include commands, We use negative
425
+ look-ahead regex to exclude everything else than the path(s) we want to
426
+ upload.
427
+
428
+ Since gsutil rsync does not support batch operations, we construct
429
+ multiple commands to be run in parallel.
430
+
431
+ Args:
432
+ source_path_list: List of paths to local files or directories
433
+ create_dirs: If the local_path is a directory and this is set to
434
+ False, the contents of the directory are directly uploaded to
435
+ root of the bucket. If the local_path is a directory and this is
436
+ set to True, the directory is created in the bucket root and
437
+ contents are uploaded to it.
438
+ """
439
+
440
+ def get_file_sync_command(base_dir_path, file_names):
441
+ sync_format = '|'.join(file_names)
442
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
443
+ base_dir_path = shlex.quote(base_dir_path)
444
+ sync_command = (
445
+ f'{alias_gen}; {gsutil_alias} '
446
+ f"rsync -e -x '^(?!{sync_format}$).*' "
447
+ f'{base_dir_path} gs://{self.name}{sub_path}'
448
+ )
449
+ return sync_command
450
+
451
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
452
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
453
+ # we exclude .git directory from the sync
454
+ excluded_list.append(r'^\.git/.*$')
455
+ excludes = '|'.join(excluded_list)
456
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
457
+ src_dir_path = shlex.quote(src_dir_path)
458
+ sync_command = (
459
+ f'{alias_gen}; {gsutil_alias} '
460
+ f"rsync -e -r -x '({excludes})' {src_dir_path} "
461
+ f'gs://{self.name}{sub_path}/{dest_dir_name}'
462
+ )
463
+ return sync_command
464
+
465
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
466
+ # Generate message for upload
467
+ if len(source_path_list) > 1:
468
+ source_message = f'{len(source_path_list)} paths'
469
+ else:
470
+ source_message = source_path_list[0]
471
+
472
+ log_path = logging.generate_tmp_logging_file_path(
473
+ constants._STORAGE_LOG_FILE_NAME
474
+ )
475
+ sync_path = f'{source_message} -> gs://{self.name}{sub_path}/'
476
+ with rich_utils.safe_status(
477
+ ux_utils.spinner_message(f'Syncing {sync_path}', log_path=log_path)
478
+ ):
479
+ data_utils.parallel_upload(
480
+ source_path_list,
481
+ get_file_sync_command,
482
+ get_dir_sync_command,
483
+ log_path,
484
+ self.name,
485
+ self._ACCESS_DENIED_MESSAGE,
486
+ create_dirs=create_dirs,
487
+ max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS,
488
+ )
489
+ logger.info(
490
+ ux_utils.finishing_message(f'Storage synced: {sync_path}', log_path)
491
+ )
492
+
493
+ def _get_bucket(self) -> Tuple['constants.StorageHandle', bool]:
494
+ """Obtains the GCS bucket.
495
+ If the bucket exists, this method will connect to the bucket.
496
+
497
+ If the bucket does not exist, there are three cases:
498
+ 1) Raise an error if the bucket source starts with gs://
499
+ 2) Return None if bucket has been externally deleted and
500
+ sync_on_reconstruction is False
501
+ 3) Create and return a new bucket otherwise
502
+
503
+ Raises:
504
+ StorageSpecError: If externally created bucket is attempted to be
505
+ mounted without specifying storage source.
506
+ StorageBucketCreateError: If creating the bucket fails
507
+ StorageBucketGetError: If fetching a bucket fails
508
+ StorageExternalDeletionError: If externally deleted storage is
509
+ attempted to be fetched while reconstructing the storage for
510
+ 'sky storage delete' or 'sky start'
511
+ """
512
+ try:
513
+ bucket = self.client.get_bucket(self.name)
514
+ self._validate_existing_bucket()
515
+ return bucket, False
516
+ except gcp.not_found_exception() as e:
517
+ if isinstance(self.source, str) and self.source.startswith('gs://'):
518
+ with ux_utils.print_exception_no_traceback():
519
+ raise exceptions.StorageBucketGetError(
520
+ 'Attempted to use a non-existent bucket as a source: '
521
+ f'{self.source}'
522
+ ) from e
523
+ else:
524
+ # If bucket cannot be found (i.e., does not exist), it is to be
525
+ # created by Sky. However, creation is skipped if Store object
526
+ # is being reconstructed for deletion or re-mount with
527
+ # sky start, and error is raised instead.
528
+ if self.sync_on_reconstruction:
529
+ bucket = self._create_gcs_bucket(self.name, self.region)
530
+ return bucket, True
531
+ else:
532
+ # This is raised when Storage object is reconstructed for
533
+ # sky storage delete or to re-mount Storages with sky start
534
+ # but the storage is already removed externally.
535
+ raise exceptions.StorageExternalDeletionError(
536
+ 'Attempted to fetch a non-existent bucket: ' f'{self.name}'
537
+ ) from e
538
+ except gcp.forbidden_exception():
539
+ # Try public bucket to see if bucket exists
540
+ logger.info('External Bucket detected; Connecting to external bucket...')
541
+ try:
542
+ a_client = gcp.anonymous_storage_client()
543
+ bucket = a_client.bucket(self.name)
544
+ # Check if bucket can be listed/read from
545
+ next(bucket.list_blobs())
546
+ return bucket, False
547
+ except (gcp.not_found_exception(), ValueError) as e:
548
+ command = f'gsutil ls gs://{self.name}'
549
+ with ux_utils.print_exception_no_traceback():
550
+ raise exceptions.StorageBucketGetError(
551
+ f'Bucket {self.name} does not exist.'
552
+ + f' To debug, consider running `{command}`.'
553
+ ) from e
554
+
555
+ def _download_file(self, remote_path: str, local_path: str) -> None:
556
+ """Downloads file from remote to local on GS bucket
557
+
558
+ Args:
559
+ remote_path: str; Remote path on GS bucket
560
+ local_path: str; Local path on user's device
561
+ """
562
+ blob = self.bucket.blob(remote_path)
563
+ blob.download_to_filename(local_path, timeout=None)
564
+
565
+ def _create_gcs_bucket(
566
+ self, bucket_name: str, region='us-central1'
567
+ ) -> 'constants.StorageHandle':
568
+ """Creates GCS bucket with specific name in specific region
569
+
570
+ Args:
571
+ bucket_name: str; Name of bucket
572
+ region: str; Region name, e.g. us-central1, us-west1
573
+ """
574
+ try:
575
+ bucket = self.client.bucket(bucket_name)
576
+ bucket.storage_class = 'STANDARD'
577
+ new_bucket = self.client.create_bucket(bucket, location=region)
578
+ except gcp.conflict_exception():
579
+ # it's fine to pass this exception since
580
+ # this means the bucket already exists
581
+ pass
582
+ except Exception as e: # pylint: disable=broad-except
583
+ with ux_utils.print_exception_no_traceback():
584
+ raise exceptions.StorageBucketCreateError(
585
+ f'Attempted to create a bucket {self.name} but failed.'
586
+ ) from e
587
+ logger.info(
588
+ f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
589
+ f'{new_bucket.location} with storage class '
590
+ f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}'
591
+ )
592
+ return new_bucket
593
+
594
+ def _delete_gcs_bucket(self, bucket_name: str) -> bool:
595
+ """Deletes GCS bucket, including all objects in bucket
596
+
597
+ Args:
598
+ bucket_name: str; Name of bucket
599
+
600
+ Returns:
601
+ bool; True if bucket was deleted, False if it was deleted externally.
602
+ """
603
+
604
+ with rich_utils.safe_status(
605
+ ux_utils.spinner_message(f'Deleting GCS bucket [green]{bucket_name}')
606
+ ):
607
+ try:
608
+ self.client.get_bucket(bucket_name)
609
+ except gcp.forbidden_exception() as e:
610
+ # Try public bucket to see if bucket exists
611
+ with ux_utils.print_exception_no_traceback():
612
+ raise PermissionError(
613
+ 'External Bucket detected. User not allowed to delete '
614
+ 'external bucket.'
615
+ ) from e
616
+ except gcp.not_found_exception():
617
+ # If bucket does not exist, it may have been deleted externally.
618
+ # Do a no-op in that case.
619
+ logger.debug(f'Bucket {bucket_name} does not exist.')
620
+ return False
621
+ try:
622
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
623
+ remove_obj_command = (
624
+ f'{alias_gen};{gsutil_alias} ' f'rm -r gs://{bucket_name}'
625
+ )
626
+ subprocess.check_output(
627
+ remove_obj_command,
628
+ stderr=subprocess.STDOUT,
629
+ shell=True,
630
+ executable='/bin/bash',
631
+ )
632
+ return True
633
+ except subprocess.CalledProcessError as e:
634
+ with ux_utils.print_exception_no_traceback():
635
+ raise exceptions.StorageBucketDeleteError(
636
+ f'Failed to delete GCS bucket {bucket_name}.'
637
+ f'Detailed error: {e.output}'
638
+ )
639
+
640
+ @classmethod
641
+ def _find_application_key_path(cls) -> str:
642
+ # Check the application default credentials in the environment variable.
643
+ # If the file does not exist, fallback to the default path.
644
+ application_key_path = os.environ.get(_GCP_APPLICATION_CREDENTIAL_ENV, None)
645
+ if application_key_path is not None:
646
+ if not os.path.isfile(os.path.expanduser(application_key_path)):
647
+ raise FileNotFoundError(
648
+ f'{_GCP_APPLICATION_CREDENTIAL_ENV}={application_key_path},'
649
+ ' but the file does not exist.'
650
+ )
651
+ return application_key_path
652
+ if not os.path.isfile(
653
+ os.path.expanduser(DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH)
654
+ ):
655
+ # Fallback to the default application credential path.
656
+ raise FileNotFoundError(DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH)
657
+ return DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH
658
+
659
+ @classmethod
660
+ def _get_identity_type(cls) -> Optional[GCPIdentityType]:
661
+ try:
662
+ account = cls.get_active_user_identity()
663
+ except exceptions.CloudUserIdentityError:
664
+ return None
665
+ if account is None:
666
+ return None
667
+ assert account is not None
668
+ if GCPIdentityType.SERVICE_ACCOUNT.value in account[0]:
669
+ return GCPIdentityType.SERVICE_ACCOUNT
670
+ return GCPIdentityType.SHARED_CREDENTIALS_FILE
671
+
672
+ @classmethod
673
+ def get_project_id(cls, dryrun: bool = False) -> str:
674
+ if dryrun:
675
+ return 'dryrun-project-id'
676
+ # pylint: disable=import-outside-toplevel
677
+ from google import auth # type: ignore
678
+
679
+ _, project_id = auth.default()
680
+ if project_id is None:
681
+ raise exceptions.CloudUserIdentityError(
682
+ 'Failed to get GCP project id. Please make sure you have '
683
+ 'run the following: \n'
684
+ f'{cls._INDENT_PREFIX}gcloud init; \n'
685
+ f'{cls._INDENT_PREFIX}gcloud auth application-default login'
686
+ )
687
+ return project_id
688
+
689
+ @classmethod
690
+ def get_user_identities(cls) -> List[List[str]]:
691
+ """Returns the email address + project id of the active user."""
692
+ try:
693
+ account = _run_output(
694
+ 'gcloud auth list --filter=status:ACTIVE ' '--format="value(account)"'
695
+ )
696
+ account = account.strip()
697
+ except subprocess.CalledProcessError as e:
698
+ with ux_utils.print_exception_no_traceback():
699
+ raise exceptions.CloudUserIdentityError(
700
+ f'Failed to get GCP user identity with unknown '
701
+ f'exception.\n'
702
+ ' Reason: '
703
+ f'{common_utils.format_exception(e, use_bracket=True)}'
704
+ ) from e
705
+ if not account:
706
+ with ux_utils.print_exception_no_traceback():
707
+ raise exceptions.CloudUserIdentityError(
708
+ 'No GCP account is activated. Try running `gcloud '
709
+ 'auth list --filter=status:ACTIVE '
710
+ '--format="value(account)"` and ensure it correctly '
711
+ 'returns the current user.'
712
+ )
713
+ try:
714
+ project_id = cls.get_project_id()
715
+ except Exception as e: # pylint: disable=broad-except
716
+ with ux_utils.print_exception_no_traceback():
717
+ raise exceptions.CloudUserIdentityError(
718
+ f'Failed to get GCP user identity with unknown '
719
+ f'exception.\n'
720
+ ' Reason: '
721
+ f'{common_utils.format_exception(e, use_bracket=True)}'
722
+ ) from e
723
+ # TODO: Return a list of identities in the profile when we support
724
+ # automatic switching for GCP. Currently we only support one identity.
725
+ return [[f'{account} [project_id={project_id}]']]
726
+
727
+ def get_active_user_identity_str(cls) -> Optional[str]:
728
+ user_identity = cls.get_active_user_identity()
729
+ if user_identity is None:
730
+ return None
731
+ return user_identity[0].replace('\n', '')
732
+
733
+ @classmethod
734
+ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
735
+ """
736
+ Check if the credentials are valid for GCS store.
737
+ """
738
+ try:
739
+ # Check google-api-python-client installation.
740
+ import googleapiclient # noqa: F401
741
+ from google import auth # type: ignore
742
+
743
+ # Check the installation of google-cloud-sdk.
744
+ _run_output('gcloud --version')
745
+ except (ImportError, subprocess.CalledProcessError) as e:
746
+ return False, (
747
+ f'{cls._DEPENDENCY_HINT}\n'
748
+ f'{cls._INDENT_PREFIX}Credentials may also need to be set. '
749
+ f'{cls._CREDENTIAL_HINT}\n'
750
+ f'{cls._INDENT_PREFIX}Details: '
751
+ f'{common_utils.format_exception(e, use_bracket=True)}'
752
+ )
753
+
754
+ identity_type = cls._get_identity_type()
755
+ if identity_type == GCPIdentityType.SHARED_CREDENTIALS_FILE:
756
+ # This files are only required when using the shared credentials
757
+ # to access GCP. They are not required when using service account.
758
+ try:
759
+ # These files are required because they will be synced to remote
760
+ # VMs for `gsutil` to access private storage buckets.
761
+ # `auth.default()` does not guarantee these files exist.
762
+ for file in [
763
+ '~/.config/gcloud/access_tokens.db',
764
+ '~/.config/gcloud/credentials.db',
765
+ ]:
766
+ if not os.path.isfile(os.path.expanduser(file)):
767
+ raise FileNotFoundError(file)
768
+ except FileNotFoundError as e:
769
+ return False, (
770
+ f'Credentails are not set. '
771
+ f'{cls._CREDENTIAL_HINT}\n'
772
+ f'{cls._INDENT_PREFIX}Details: '
773
+ f'{common_utils.format_exception(e, use_bracket=True)}'
774
+ )
775
+
776
+ try:
777
+ cls._find_application_key_path()
778
+ except FileNotFoundError as e:
779
+ return False, (
780
+ f'Application credentials are not set. '
781
+ f'{cls._APPLICATION_CREDENTIAL_HINT}\n'
782
+ f'{cls._INDENT_PREFIX}Details: '
783
+ f'{common_utils.format_exception(e, use_bracket=True)}'
784
+ )
785
+
786
+ try:
787
+ # Check if application default credentials are set.
788
+ project_id = cls.get_project_id()
789
+
790
+ # Check if the user is activated.
791
+ identity = cls.get_active_user_identity()
792
+ except (
793
+ auth.exceptions.DefaultCredentialsError,
794
+ exceptions.CloudUserIdentityError,
795
+ ) as e:
796
+ # See also: https://stackoverflow.com/a/53307505/1165051
797
+ return False, (
798
+ 'Getting project ID or user identity failed. You can debug '
799
+ 'with `gcloud auth list`. To fix this, '
800
+ f'{cls._CREDENTIAL_HINT[0].lower()}'
801
+ f'{cls._CREDENTIAL_HINT[1:]}\n'
802
+ f'{cls._INDENT_PREFIX}Details: '
803
+ f'{common_utils.format_exception(e, use_bracket=True)}'
804
+ )
805
+
806
+ # Check APIs.
807
+ apis = (
808
+ ('cloudresourcemanager', 'Cloud Resource Manager'),
809
+ ('iam', 'Identity and Access Management (IAM)'),
810
+ ('storage', 'Cloud Storage'),
811
+ )
812
+ enabled_api = False
813
+ for endpoint, display_name in apis:
814
+ if is_api_disabled(endpoint, project_id):
815
+ # For 'compute': ~55-60 seconds for the first run. If already
816
+ # enabled, ~1s. Other API endpoints take ~1-5s to enable.
817
+ if endpoint == 'compute':
818
+ suffix = ' (free of charge; this may take a minute)'
819
+ else:
820
+ suffix = ' (free of charge)'
821
+ print(f'\nEnabling {display_name} API{suffix}...')
822
+ t1 = time.time()
823
+ proc = subprocess.run(
824
+ f'gcloud services enable {endpoint}.googleapis.com '
825
+ f'--project {project_id}',
826
+ check=False,
827
+ shell=True,
828
+ stdout=subprocess.PIPE,
829
+ stderr=subprocess.STDOUT,
830
+ )
831
+ if proc.returncode == 0:
832
+ enabled_api = True
833
+ print(f'Done. Took {time.time() - t1:.1f} secs.')
834
+
835
+ if enabled_api:
836
+ print(
837
+ '\nHint: Enabled GCP API(s) may take a few minutes to take '
838
+ 'effect. If any Konduktor commands/calls failed, retry after '
839
+ 'some time.'
840
+ )
841
+
842
+ # noqa: F401
843
+ import google.auth
844
+
845
+ # This takes user's credential info from "~/.config/gcloud/application_default_credentials.json". # noqa: E501
846
+ credentials, project = google.auth.default()
847
+ crm = gcp.build(
848
+ 'cloudresourcemanager', 'v1', credentials=credentials, cache_discovery=False
849
+ )
850
+ gcp_minimal_permissions = utils.get_minimal_permissions()
851
+ permissions = {'permissions': gcp_minimal_permissions}
852
+ request = crm.projects().testIamPermissions(resource=project, body=permissions)
853
+ with ux_utils.print_exception_no_traceback():
854
+ ret_permissions = request.execute().get('permissions', [])
855
+ diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
856
+ if diffs:
857
+ identity_str = identity[0] if identity else None
858
+ return False, (
859
+ 'The following permissions are not enabled for the current '
860
+ f'GCP identity ({identity_str}):\n '
861
+ f'{diffs}\n '
862
+ 'For more details, visit: https://konduktor.readthedocs.io//en/latest/cloud-setup/cloud-permissions/gcp.html'
863
+ ) # noqa: E501
864
+ logger.info(
865
+ f'GCP credentials are valid '
866
+ f'for the current identity {logging.CHECK_MARK_EMOJI}'
867
+ )
868
+ logger.info('Creating k8s secret with GCP credentials...')
869
+ set_ok, result = cls.set_secret_credentials()
870
+ if not set_ok:
871
+ logger.error(f'Failed to create k8s secret with GCP credentials: {result}')
872
+ return False, result
873
+ return True, None
874
+
875
+ @classmethod
876
+ def set_secret_credentials(cls) -> Tuple[bool, Optional[str]]:
877
+ """
878
+ Set the k8s secret storing the GCP credentials
879
+ """
880
+ context = kubernetes_utils.get_current_kube_config_context_name()
881
+ namespace = kubernetes_utils.get_kube_config_context_namespace()
882
+ credentials_dir = os.environ.get('CLOUDSDK_CONFIG', DEFAULT_GCP_CREDENTIALS_DIR)
883
+ credentials_files = [
884
+ os.path.expanduser(os.path.join(credentials_dir, f))
885
+ for f in _CREDENTIAL_FILES
886
+ ]
887
+
888
+ secret_metadata = {
889
+ 'labels': {
890
+ backend_constants.SECRET_KIND_LABEL: 'GCS',
891
+ },
892
+ }
893
+
894
+ ok, result = kubernetes_utils.set_secret(
895
+ secret_name=cls._GCP_SECRET_NAME,
896
+ namespace=namespace,
897
+ context=context,
898
+ data={
899
+ cls._GCP_CREDENTIALS_KEY: base64_utils.zip_base64encode(
900
+ credentials_files
901
+ )
902
+ },
903
+ secret_metadata=secret_metadata,
904
+ )
905
+ if not ok:
906
+ logger.error(f'Failed to set GCP credentials in k8s secret: \n{result}')
907
+ return False, result
908
+ else:
909
+ logger.info(
910
+ f'GCP credentials set in k8s secret: {cls._GCP_SECRET_NAME} '
911
+ f'in namespace {namespace} in context {context} '
912
+ f'{logging.CHECK_MARK_EMOJI}'
913
+ )
914
+ return True, None
915
+
916
+ @classmethod
917
+ def get_k8s_credential_name(cls) -> str:
918
+ return cls._GCP_SECRET_NAME
919
+
920
+
921
+ class GcsCloudStorage(storage_utils.CloudStorage):
922
+ """Google Cloud Storage."""
923
+
924
+ # We use gsutil as a basic implementation. One pro is that its -m
925
+ # multi-threaded download is nice, which frees us from implementing
926
+ # parellel workers on our end.
927
+ # The gsutil command is part of the Google Cloud SDK, and we reuse
928
+ # the installation logic here.
929
+ _INSTALL_GSUTIL = GOOGLE_SDK_INSTALLATION_COMMAND
930
+ _STORE: typing.Type[storage_utils.AbstractStore] = GcsStore
931
+
932
+ @property
933
+ def _gsutil_command(self):
934
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
935
+ return (
936
+ f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
937
+ f'{DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH}; '
938
+ # Explicitly activate service account. Unlike the gcp packages
939
+ # and other GCP commands, gsutil does not automatically pick up
940
+ # the default credential keys when it is a service account.
941
+ 'gcloud auth activate-service-account '
942
+ '--key-file=$GOOGLE_APPLICATION_CREDENTIALS '
943
+ '2> /dev/null || true; '
944
+ f'{gsutil_alias}'
945
+ )
946
+
947
+ def is_directory(self, url: str) -> bool:
948
+ """Returns whether 'url' is a directory.
949
+ In cloud object stores, a "directory" refers to a regular object whose
950
+ name is a prefix of other objects.
951
+ """
952
+ commands = [self._INSTALL_GSUTIL]
953
+ commands.append(f'{self._gsutil_command} ls -d {url}')
954
+ command = ' && '.join(commands)
955
+ p = subprocess.run(
956
+ command,
957
+ stdout=subprocess.PIPE,
958
+ shell=True,
959
+ check=True,
960
+ executable='/bin/bash',
961
+ )
962
+ out = p.stdout.decode().strip()
963
+ # Edge Case: Gcloud command is run for first time #437
964
+ out = out.split('\n')[-1]
965
+ # If <url> is a bucket root, then we only need `gsutil` to succeed
966
+ # to make sure the bucket exists. It is already a directory.
967
+ _, key = data_utils.split_gcs_path(url)
968
+ if not key:
969
+ return True
970
+ # Otherwise, gsutil ls -d url will return:
971
+ # --> url.rstrip('/') if url is not a directory
972
+ # --> url with an ending '/' if url is a directory
973
+ if not out.endswith('/'):
974
+ assert out == url.rstrip('/'), (out, url)
975
+ return False
976
+ url = url if url.endswith('/') else (url + '/')
977
+ assert out == url, (out, url)
978
+ return True
979
+
980
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
981
+ """Downloads a directory using gsutil."""
982
+ download_via_gsutil = (
983
+ f'{self._gsutil_command} ' f'rsync -e -r {source} {destination}'
984
+ )
985
+ all_commands = [self._INSTALL_GSUTIL]
986
+ all_commands.append(download_via_gsutil)
987
+ return ' && '.join(all_commands)
988
+
989
+ def make_sync_file_command(self, source: str, destination: str) -> str:
990
+ """Downloads a file using gsutil."""
991
+ download_via_gsutil = f'{self._gsutil_command} ' f'cp {source} {destination}'
992
+ all_commands = [self._INSTALL_GSUTIL]
993
+ all_commands.append(download_via_gsutil)
994
+ return ' && '.join(all_commands)