anemoi-utils 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

anemoi/utils/remote/s3.py CHANGED
@@ -19,13 +19,13 @@ to use a different S3 compatible service::
19
19
  Alternatively, the `endpoint_url`, and keys can be set in one of
20
20
  the `~/.config/anemoi/settings.toml`
21
21
  or `~/.config/anemoi/settings-secrets.toml` files.
22
-
23
22
  """
24
23
 
25
24
  import logging
26
25
  import os
27
26
  import threading
28
27
  from copy import deepcopy
28
+ from typing import Any
29
29
  from typing import Iterable
30
30
 
31
31
  import tqdm
@@ -43,7 +43,21 @@ LOGGER = logging.getLogger(__name__)
43
43
  thread_local = threading.local()
44
44
 
45
45
 
46
- def s3_client(bucket, region=None):
46
+ def s3_client(bucket: str, region: str = None) -> Any:
47
+ """Get an S3 client for the specified bucket and region.
48
+
49
+ Parameters
50
+ ----------
51
+ bucket : str
52
+ The name of the S3 bucket.
53
+ region : str, optional
54
+ The AWS region of the S3 bucket.
55
+
56
+ Returns
57
+ -------
58
+ Any
59
+ The S3 client.
60
+ """
47
61
  import boto3
48
62
  from botocore import UNSIGNED
49
63
  from botocore.client import Config
@@ -107,18 +121,78 @@ def s3_client(bucket, region=None):
107
121
 
108
122
  class S3Upload(BaseUpload):
109
123
 
110
- def get_temporary_target(self, target, pattern):
124
+ def get_temporary_target(self, target: str, pattern: str) -> str:
125
+ """Get a temporary target path based on the given pattern.
126
+
127
+ Parameters
128
+ ----------
129
+ target : str
130
+ The original target path.
131
+ pattern : str
132
+ The pattern to format the temporary path.
133
+
134
+ Returns
135
+ -------
136
+ str
137
+ The temporary target path.
138
+ """
111
139
  return target
112
140
 
113
- def rename_target(self, target, temporary_target):
141
+ def rename_target(self, target: str, temporary_target: str) -> None:
142
+ """Rename the target to a new target path.
143
+
144
+ Parameters
145
+ ----------
146
+ target : str
147
+ The original target path.
148
+ temporary_target : str
149
+ The new target path.
150
+ """
114
151
  pass
115
152
 
116
- def delete_target(self, target):
153
+ def delete_target(self, target: str) -> None:
154
+ """Delete the target path.
155
+
156
+ Parameters
157
+ ----------
158
+ target : str
159
+ The target path to delete.
160
+ """
117
161
  pass
118
162
  # delete(target)
119
163
 
120
- def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
121
-
164
+ def _transfer_file(
165
+ self, source: str, target: str, overwrite: bool, resume: bool, verbosity: int, threads: int, config: dict = None
166
+ ) -> int:
167
+ """Transfer a file to S3.
168
+
169
+ Parameters
170
+ ----------
171
+ source : str
172
+ The source file path.
173
+ target : str
174
+ The target S3 path.
175
+ overwrite : bool
176
+ Whether to overwrite the target if it exists.
177
+ resume : bool
178
+ Whether to resume the transfer if possible.
179
+ verbosity : int
180
+ The verbosity level.
181
+ threads : int
182
+ The number of threads to use.
183
+ config : dict, optional
184
+ Additional configuration options.
185
+
186
+ Returns
187
+ -------
188
+ int
189
+ The size of the transferred file.
190
+
191
+ Raises
192
+ ------
193
+ ValueError
194
+ If the target already exists and overwrite or resume is not specified.
195
+ """
122
196
  from botocore.exceptions import ClientError
123
197
 
124
198
  assert target.startswith("s3://")
@@ -162,7 +236,18 @@ class S3Upload(BaseUpload):
162
236
 
163
237
  class S3Download(BaseDownload):
164
238
 
165
- def copy(self, source, target, **kwargs):
239
+ def copy(self, source: str, target: str, **kwargs) -> None:
240
+ """Copy a file or folder from S3 to the local filesystem.
241
+
242
+ Parameters
243
+ ----------
244
+ source : str
245
+ The source S3 path.
246
+ target : str
247
+ The target local path.
248
+ kwargs : dict
249
+ Additional arguments for the transfer.
250
+ """
166
251
  assert source.startswith("s3://")
167
252
 
168
253
  if source.endswith("/"):
@@ -170,23 +255,108 @@ class S3Download(BaseDownload):
170
255
  else:
171
256
  self.transfer_file(source=source, target=target, **kwargs)
172
257
 
173
- def list_source(self, source):
258
+ def list_source(self, source: str) -> Iterable:
259
+ """List the objects in the source S3 path.
260
+
261
+ Parameters
262
+ ----------
263
+ source : str
264
+ The source S3 path.
265
+
266
+ Returns
267
+ -------
268
+ Iterable
269
+ An iterable of S3 objects.
270
+ """
174
271
  yield from _list_objects(source)
175
272
 
176
- def source_path(self, s3_object, source):
273
+ def source_path(self, s3_object: dict, source: str) -> str:
274
+ """Get the S3 path of the object.
275
+
276
+ Parameters
277
+ ----------
278
+ s3_object : dict
279
+ The S3 object.
280
+ source : str
281
+ The source S3 path.
282
+
283
+ Returns
284
+ -------
285
+ str
286
+ The S3 path of the object.
287
+ """
177
288
  _, _, bucket, _ = source.split("/", 3)
178
289
  return f"s3://{bucket}/{s3_object['Key']}"
179
290
 
180
- def target_path(self, s3_object, source, target):
291
+ def target_path(self, s3_object: dict, source: str, target: str) -> str:
292
+ """Get the local path for the S3 object.
293
+
294
+ Parameters
295
+ ----------
296
+ s3_object : dict
297
+ The S3 object.
298
+ source : str
299
+ The source S3 path.
300
+ target : str
301
+ The target local path.
302
+
303
+ Returns
304
+ -------
305
+ str
306
+ The local path for the S3 object.
307
+ """
181
308
  _, _, _, folder = source.split("/", 3)
182
309
  local_path = os.path.join(target, os.path.relpath(s3_object["Key"], folder))
183
310
  os.makedirs(os.path.dirname(local_path), exist_ok=True)
184
311
  return local_path
185
312
 
186
- def source_size(self, s3_object):
313
+ def source_size(self, s3_object: dict) -> int:
314
+ """Get the size of the S3 object.
315
+
316
+ Parameters
317
+ ----------
318
+ s3_object : dict
319
+ The S3 object.
320
+
321
+ Returns
322
+ -------
323
+ int
324
+ The size of the S3 object.
325
+ """
187
326
  return s3_object["Size"]
188
327
 
189
- def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
328
+ def _transfer_file(
329
+ self, source: str, target: str, overwrite: bool, resume: bool, verbosity: int, threads: int, config: dict = None
330
+ ) -> int:
331
+ """Transfer a file from S3 to the local filesystem.
332
+
333
+ Parameters
334
+ ----------
335
+ source : str
336
+ The source S3 path.
337
+ target : str
338
+ The target local path.
339
+ overwrite : bool
340
+ Whether to overwrite the target if it exists.
341
+ resume : bool
342
+ Whether to resume the transfer if possible.
343
+ verbosity : int
344
+ The verbosity level.
345
+ threads : int
346
+ The number of threads to use.
347
+ config : dict, optional
348
+ Additional configuration options.
349
+
350
+ Returns
351
+ -------
352
+ int
353
+ The size of the transferred file.
354
+
355
+ Raises
356
+ ------
357
+ ValueError
358
+ If the target does not exist on S3.
359
+ """
190
360
  # from boto3.s3.transfer import TransferConfig
191
361
 
192
362
  _, _, bucket, key = source.split("/", 3)
@@ -234,7 +404,21 @@ class S3Download(BaseDownload):
234
404
  return size
235
405
 
236
406
 
237
- def _list_objects(target, batch=False):
407
+ def _list_objects(target: str, batch: bool = False) -> Iterable:
408
+ """List the objects in the target S3 path.
409
+
410
+ Parameters
411
+ ----------
412
+ target : str
413
+ The target S3 path.
414
+ batch : bool, optional
415
+ Whether to return objects in batches, by default False.
416
+
417
+ Returns
418
+ -------
419
+ Iterable
420
+ An iterable of S3 objects.
421
+ """
238
422
  _, _, bucket, prefix = target.split("/", 3)
239
423
  s3 = s3_client(bucket)
240
424
 
@@ -249,7 +433,14 @@ def _list_objects(target, batch=False):
249
433
  yield from objects
250
434
 
251
435
 
252
- def _delete_folder(target) -> None:
436
+ def _delete_folder(target: str) -> None:
437
+ """Delete a folder from S3.
438
+
439
+ Parameters
440
+ ----------
441
+ target : str
442
+ The target S3 folder path.
443
+ """
253
444
  _, _, bucket, _ = target.split("/", 3)
254
445
  s3 = s3_client(bucket)
255
446
 
@@ -261,7 +452,14 @@ def _delete_folder(target) -> None:
261
452
  LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
262
453
 
263
454
 
264
- def _delete_file(target) -> None:
455
+ def _delete_file(target: str) -> None:
456
+ """Delete a file from S3.
457
+
458
+ Parameters
459
+ ----------
460
+ target : str
461
+ The target S3 file path.
462
+ """
265
463
  from botocore.exceptions import ClientError
266
464
 
267
465
  _, _, bucket, key = target.split("/", 3)
@@ -284,14 +482,13 @@ def _delete_file(target) -> None:
284
482
  LOGGER.info(f"{target} is deleted")
285
483
 
286
484
 
287
- def delete(target) -> None:
485
+ def delete(target: str) -> None:
288
486
  """Delete a file or a folder from S3.
289
487
 
290
488
  Parameters
291
489
  ----------
292
490
  target : str
293
- The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
294
- assumed to be a folder, otherwise it is assumed to be a file.
491
+ The URL of a file or a folder on S3. The URL should start with 's3://'.
295
492
  """
296
493
 
297
494
  assert target.startswith("s3://")
@@ -302,18 +499,18 @@ def delete(target) -> None:
302
499
  _delete_file(target)
303
500
 
304
501
 
305
- def list_folder(folder) -> Iterable:
306
- """List the sub folders in a folder on S3.
502
+ def list_folder(folder: str) -> Iterable:
503
+ """List the subfolders in a folder on S3.
307
504
 
308
505
  Parameters
309
506
  ----------
310
507
  folder : str
311
- The URL of a folder on S3. The url should start with 's3://'.
508
+ The URL of a folder on S3. The URL should start with 's3://'.
312
509
 
313
510
  Returns
314
511
  -------
315
512
  list
316
- A list of the subfolders names in the folder.
513
+ A list of the subfolder names in the folder.
317
514
  """
318
515
 
319
516
  assert folder.startswith("s3://")
@@ -330,13 +527,13 @@ def list_folder(folder) -> Iterable:
330
527
  yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")]
331
528
 
332
529
 
333
- def object_info(target) -> dict:
530
+ def object_info(target: str) -> dict:
334
531
  """Get information about an object on S3.
335
532
 
336
533
  Parameters
337
534
  ----------
338
535
  target : str
339
- The URL of a file or a folder on S3. The url should start with 's3://'.
536
+ The URL of a file or a folder on S3. The URL should start with 's3://'.
340
537
 
341
538
  Returns
342
539
  -------
@@ -355,13 +552,13 @@ def object_info(target) -> dict:
355
552
  raise
356
553
 
357
554
 
358
- def object_acl(target) -> dict:
555
+ def object_acl(target: str) -> dict:
359
556
  """Get information about an object's ACL on S3.
360
557
 
361
558
  Parameters
362
559
  ----------
363
560
  target : str
364
- The URL of a file or a folder on S3. The url should start with 's3://'.
561
+ The URL of a file or a folder on S3. The URL should start with 's3://'.
365
562
 
366
563
  Returns
367
564
  -------
@@ -375,14 +572,40 @@ def object_acl(target) -> dict:
375
572
  return s3.get_object_acl(Bucket=bucket, Key=key)
376
573
 
377
574
 
378
- def download(source, target, *args, **kwargs):
575
+ def download(source: str, target: str, *args, **kwargs) -> None:
576
+ """Download a file or folder from S3 to the local filesystem.
577
+
578
+ Parameters
579
+ ----------
580
+ source : str
581
+ The source S3 path.
582
+ target : str
583
+ The target local path.
584
+ args : tuple
585
+ Additional positional arguments.
586
+ kwargs : dict
587
+ Additional keyword arguments.
588
+ """
379
589
  from . import transfer
380
590
 
381
591
  assert source.startswith("s3://"), f"source {source} should start with 's3://'"
382
592
  return transfer(source, target, *args, **kwargs)
383
593
 
384
594
 
385
- def upload(source, target, *args, **kwargs):
595
+ def upload(source: str, target: str, *args, **kwargs) -> None:
596
+ """Upload a file or folder to S3.
597
+
598
+ Parameters
599
+ ----------
600
+ source : str
601
+ The source file or folder path.
602
+ target : str
603
+ The target S3 path.
604
+ args : tuple
605
+ Additional positional arguments.
606
+ kwargs : dict
607
+ Additional keyword arguments.
608
+ """
386
609
  from . import transfer
387
610
 
388
611
  assert target.startswith("s3://"), f"target {target} should start with 's3://'"
@@ -17,7 +17,24 @@ from . import BaseUpload
17
17
  LOGGER = logging.getLogger(__name__)
18
18
 
19
19
 
20
- def call_process(*args):
20
+ def call_process(*args: str) -> str:
21
+ """Execute a subprocess with the given arguments and return its output.
22
+
23
+ Parameters
24
+ ----------
25
+ args : str
26
+ The command and its arguments to execute.
27
+
28
+ Returns
29
+ -------
30
+ str
31
+ The standard output of the command.
32
+
33
+ Raises
34
+ ------
35
+ RuntimeError
36
+ If the command returns a non-zero exit code.
37
+ """
21
38
  proc = subprocess.Popen(
22
39
  args,
23
40
  stdout=subprocess.PIPE,
@@ -34,7 +51,24 @@ def call_process(*args):
34
51
 
35
52
  class SshBaseUpload(BaseUpload):
36
53
 
37
- def _parse_target(self, target):
54
+ def _parse_target(self, target: str) -> tuple[str, str]:
55
+ """Parse the SSH target string into hostname and path.
56
+
57
+ Parameters
58
+ ----------
59
+ target : str
60
+ The SSH target string in the format 'ssh://hostname:path'.
61
+
62
+ Returns
63
+ -------
64
+ tuple[str, str]
65
+ A tuple containing the hostname and the path.
66
+
67
+ Raises
68
+ ------
69
+ Exception
70
+ If the path contains suspicious '..'.
71
+ """
38
72
  assert target.startswith("ssh://"), target
39
73
 
40
74
  target = target[6:]
@@ -49,20 +83,50 @@ class SshBaseUpload(BaseUpload):
49
83
 
50
84
  return hostname, path
51
85
 
52
- def get_temporary_target(self, target, pattern):
86
+ def get_temporary_target(self, target: str, pattern: str) -> str:
87
+ """Get a temporary target path based on the given pattern.
88
+
89
+ Parameters
90
+ ----------
91
+ target : str
92
+ The original target path.
93
+ pattern : str
94
+ The pattern to format the temporary path.
95
+
96
+ Returns
97
+ -------
98
+ str
99
+ The temporary target path.
100
+ """
53
101
  hostname, path = self._parse_target(target)
54
102
  if pattern is not None:
55
103
  dirname, basename = os.path.split(path)
56
104
  path = pattern.format(dirname=dirname, basename=basename)
57
105
  return f"ssh://{hostname}:{path}"
58
106
 
59
- def rename_target(self, target, new_target):
107
+ def rename_target(self, target: str, new_target: str) -> None:
108
+ """Rename the target to a new target path.
109
+
110
+ Parameters
111
+ ----------
112
+ target : str
113
+ The original target path.
114
+ new_target : str
115
+ The new target path.
116
+ """
60
117
  hostname, path = self._parse_target(target)
61
118
  hostname, new_path = self._parse_target(new_target)
62
119
  call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(new_path)))
63
120
  call_process("ssh", hostname, "mv", shlex.quote(path), shlex.quote(new_path))
64
121
 
65
- def delete_target(self, target):
122
+ def delete_target(self, target: str) -> None:
123
+ """Delete the target path.
124
+
125
+ Parameters
126
+ ----------
127
+ target : str
128
+ The target path to delete.
129
+ """
66
130
  pass
67
131
  # hostname, path = self._parse_target(target)
68
132
  # LOGGER.info(f"Deleting {target}")
@@ -71,7 +135,33 @@ class SshBaseUpload(BaseUpload):
71
135
 
72
136
  class RsyncUpload(SshBaseUpload):
73
137
 
74
- def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
138
+ def _transfer_file(
139
+ self, source: str, target: str, overwrite: bool, resume: bool, verbosity: int, threads: int, config: dict = None
140
+ ) -> int:
141
+ """Transfer a file using rsync.
142
+
143
+ Parameters
144
+ ----------
145
+ source : str
146
+ The source file path.
147
+ target : str
148
+ The target file path.
149
+ overwrite : bool
150
+ Whether to overwrite the target if it exists.
151
+ resume : bool
152
+ Whether to resume the transfer if possible.
153
+ verbosity : int
154
+ The verbosity level.
155
+ threads : int
156
+ The number of threads to use.
157
+ config : dict, optional
158
+ Additional configuration options.
159
+
160
+ Returns
161
+ -------
162
+ int
163
+ The size of the transferred file.
164
+ """
75
165
  hostname, path = self._parse_target(target)
76
166
 
77
167
  size = os.path.getsize(source)
@@ -95,7 +185,38 @@ class RsyncUpload(SshBaseUpload):
95
185
 
96
186
  class ScpUpload(SshBaseUpload):
97
187
 
98
- def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
188
+ def _transfer_file(
189
+ self, source: str, target: str, overwrite: bool, resume: bool, verbosity: int, threads: int, config: dict = None
190
+ ) -> int:
191
+ """Transfer a file using scp.
192
+
193
+ Parameters
194
+ ----------
195
+ source : str
196
+ The source file path.
197
+ target : str
198
+ The target file path.
199
+ overwrite : bool
200
+ Whether to overwrite the target if it exists.
201
+ resume : bool
202
+ Whether to resume the transfer if possible.
203
+ verbosity : int
204
+ The verbosity level.
205
+ threads : int
206
+ The number of threads to use.
207
+ config : dict, optional
208
+ Additional configuration options.
209
+
210
+ Returns
211
+ -------
212
+ int
213
+ The size of the transferred file.
214
+
215
+ Raises
216
+ ------
217
+ ValueError
218
+ If the target already exists and overwrite or resume is not specified.
219
+ """
99
220
  hostname, path = self._parse_target(target)
100
221
 
101
222
  size = os.path.getsize(source)
@@ -128,7 +249,18 @@ class ScpUpload(SshBaseUpload):
128
249
  return size
129
250
 
130
251
 
131
- def upload(source, target, **kwargs) -> None:
252
+ def upload(source: str, target: str, **kwargs) -> None:
253
+ """Upload a file or folder to the target location using rsync.
254
+
255
+ Parameters
256
+ ----------
257
+ source : str
258
+ The source file or folder path.
259
+ target : str
260
+ The target path.
261
+ kwargs : dict
262
+ Additional arguments for the transfer.
263
+ """
132
264
  uploader = RsyncUpload()
133
265
 
134
266
  if os.path.isdir(source):