pos3 0.1.0rc1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pos3/__init__.py CHANGED
@@ -17,12 +17,98 @@ from typing import Any
17
17
  from urllib.parse import urlparse
18
18
 
19
19
  import boto3
20
+ from botocore import UNSIGNED
21
+ from botocore.config import Config
20
22
  from botocore.exceptions import ClientError
21
23
  from tqdm import tqdm
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
25
27
 
28
+ @dataclass(frozen=True)
29
+ class Profile:
30
+ """Configuration for an S3-compatible endpoint.
31
+
32
+ Attributes:
33
+ local_name: Identifier used in cache path (e.g., 'nebius'). Cannot be '_' (reserved).
34
+ endpoint: S3 endpoint URL (e.g., 'https://storage.eu-north1.nebius.cloud').
35
+ public: If True, use anonymous access (no credentials required).
36
+ region: Optional AWS region name.
37
+ """
38
+
39
+ local_name: str
40
+ endpoint: str
41
+ public: bool = False
42
+ region: str | None = None
43
+
44
+ def __post_init__(self):
45
+ if self.local_name == "_":
46
+ raise ValueError("Profile local_name cannot be '_' (reserved for default)")
47
+ if not self.local_name or not all(c.isalnum() or c in "-_" for c in self.local_name):
48
+ raise ValueError(f"Invalid local_name '{self.local_name}': use only alphanumeric, dash, underscore")
49
+
50
+
51
+ _PROFILES: dict[str, Profile] = {}
52
+
53
+
54
+ def register_profile(
55
+ name: str,
56
+ endpoint: str,
57
+ public: bool = False,
58
+ region: str | None = None,
59
+ local_name: str | None = None,
60
+ ) -> None:
61
+ """Register a named profile for S3 access.
62
+
63
+ Creates a Profile with the given parameters. See Profile class for field details.
64
+ The `local_name` defaults to the profile `name` if not specified.
65
+ """
66
+ config = Profile(local_name=local_name or name, endpoint=endpoint, public=public, region=region)
67
+ existing = _PROFILES.get(name)
68
+ if existing is not None and existing != config:
69
+ raise ValueError(f"Profile '{name}' already registered with different config")
70
+ _PROFILES[name] = config
71
+
72
+
73
+ def _resolve_profile(profile: str | Profile | None) -> Profile | None:
74
+ """Resolve a profile name to a Profile object.
75
+
76
+ Args:
77
+ profile: None, registered profile name (string), or Profile object.
78
+
79
+ Returns:
80
+ Profile object or None.
81
+
82
+ Raises:
83
+ ValueError: If profile is a string that is not registered.
84
+ """
85
+ if profile is None or isinstance(profile, Profile):
86
+ return profile
87
+ if profile not in _PROFILES:
88
+ raise ValueError(f"Unknown profile: '{profile}'. Register with pos3.register_profile() first.")
89
+ return _PROFILES[profile]
90
+
91
+
92
+ def _create_s3_client(profile: Profile | None = None):
93
+ """Create boto3 S3 client, optionally using a profile.
94
+
95
+ Args:
96
+ profile: None (use boto3 defaults) or Profile config.
97
+ """
98
+ if profile is None:
99
+ return boto3.client("s3")
100
+
101
+ kwargs: dict[str, Any] = {"endpoint_url": profile.endpoint}
102
+
103
+ if profile.region:
104
+ kwargs["region_name"] = profile.region
105
+
106
+ if profile.public:
107
+ kwargs["config"] = Config(signature_version=UNSIGNED)
108
+
109
+ return boto3.client("s3", **kwargs)
110
+
111
+
26
112
  class _NullTqdm(nullcontext):
27
113
  def update(self, *_args: Any, **_kwargs: Any) -> None: # pragma: no cover - trivial
28
114
  pass
@@ -153,11 +239,13 @@ class _Options:
153
239
  cache_root: str = "~/.cache/positronic/s3/"
154
240
  show_progress: bool = True
155
241
  max_workers: int = 10
242
+ default_profile: Profile | None = None
156
243
 
157
- def cache_path_for(self, remote: str) -> Path:
244
+ def cache_path_for(self, remote: str, profile: Profile | None = None) -> Path:
158
245
  bucket, key = _parse_s3_url(remote)
159
246
  cache_root = Path(self.cache_root).expanduser().resolve()
160
- return cache_root / bucket / key
247
+ local_name = profile.local_name if profile else "_"
248
+ return cache_root / local_name / bucket / key
161
249
 
162
250
 
163
251
  @dataclass
@@ -166,6 +254,7 @@ class _DownloadRegistration:
166
254
  local_path: Path
167
255
  delete: bool
168
256
  exclude: list[str] | None
257
+ profile: Profile | None = None
169
258
  ready: threading.Event = field(default_factory=threading.Event)
170
259
  error: Exception | None = None
171
260
 
@@ -177,6 +266,7 @@ class _DownloadRegistration:
177
266
  and self.local_path == other.local_path
178
267
  and self.delete == other.delete
179
268
  and self.exclude == other.exclude
269
+ and self.profile == other.profile
180
270
  )
181
271
 
182
272
 
@@ -188,6 +278,7 @@ class _UploadRegistration:
188
278
  delete: bool
189
279
  sync_on_error: bool
190
280
  exclude: list[str] | None
281
+ profile: Profile | None = None
191
282
  last_sync: float = 0.0
192
283
 
193
284
  def __eq__(self, other):
@@ -200,6 +291,7 @@ class _UploadRegistration:
200
291
  and self.delete == other.delete
201
292
  and self.sync_on_error == other.sync_on_error
202
293
  and self.exclude == other.exclude
294
+ and self.profile == other.profile
203
295
  )
204
296
 
205
297
 
@@ -214,15 +306,28 @@ class _Mirror:
214
306
  self.cache_root = Path(self.options.cache_root).expanduser().resolve()
215
307
  self.cache_root.mkdir(parents=True, exist_ok=True)
216
308
 
217
- self.s3_client = boto3.client("s3")
309
+ self._default_profile = options.default_profile
310
+ self._clients: dict[Profile | None, Any] = {}
218
311
 
219
- self._downloads: dict[str, _DownloadRegistration] = {}
220
- self._uploads: dict[str, _UploadRegistration] = {}
312
+ self._downloads: dict[tuple[str, Profile | None], _DownloadRegistration] = {}
313
+ self._uploads: dict[tuple[str, Profile | None], _UploadRegistration] = {}
221
314
  self._lock = threading.RLock()
222
315
 
223
316
  self._stop_event: threading.Event | None = None
224
317
  self._sync_thread: threading.Thread | None = None
225
318
 
319
+ def _effective_profile(self, profile: str | Profile | None) -> Profile | None:
320
+ """Resolve profile name and substitute default if None."""
321
+ resolved = _resolve_profile(profile)
322
+ return resolved if resolved is not None else self._default_profile
323
+
324
+ def _get_client(self, profile: Profile | None = None) -> Any:
325
+ """Get or create S3 client for the given profile."""
326
+ effective_profile = profile if profile is not None else self._default_profile
327
+ if effective_profile not in self._clients:
328
+ self._clients[effective_profile] = _create_s3_client(effective_profile)
329
+ return self._clients[effective_profile]
330
+
226
331
  @property
227
332
  def running(self) -> bool:
228
333
  return self._stop_event is not None
@@ -247,6 +352,7 @@ class _Mirror:
247
352
  local: str | Path | None,
248
353
  delete: bool,
249
354
  exclude: list[str] | None = None,
355
+ profile: str | Profile | None = None,
250
356
  ) -> Path:
251
357
  """
252
358
  Register (and perform if needed) a download from a remote S3 bucket path to a local directory or file.
@@ -257,6 +363,7 @@ class _Mirror:
257
363
  local (str | Path | None): Local directory or file destination. If None, uses cache path from options.
258
364
  delete (bool): If True, deletes local files not present in S3.
259
365
  exclude (list[str] | None): List of glob patterns to exclude from download.
366
+ profile: S3 profile name or Profile config for custom endpoints.
260
367
 
261
368
  Returns:
262
369
  Path: The canonical local path associated with this download registration.
@@ -265,37 +372,44 @@ class _Mirror:
265
372
  FileNotFoundError: If remote is a local path that does not exist.
266
373
  ValueError: If download registration conflicts with an existing download or upload or parameters differ.
267
374
  """
375
+ effective_profile = self._effective_profile(profile)
376
+
268
377
  if not _is_s3_path(remote):
269
378
  path = Path(remote).expanduser().resolve()
270
379
  return path
271
380
 
272
381
  normalized = _normalize_s3_url(remote)
273
- local_path = self.options.cache_path_for(remote) if local is None else Path(local).expanduser().resolve()
382
+ local_path = (
383
+ self.options.cache_path_for(remote, effective_profile)
384
+ if local is None
385
+ else Path(local).expanduser().resolve()
386
+ )
274
387
  new_registration = _DownloadRegistration(
275
- remote=normalized, local_path=local_path, delete=delete, exclude=exclude
388
+ remote=normalized, local_path=local_path, delete=delete, exclude=exclude, profile=effective_profile
276
389
  )
277
390
 
278
391
  with self._lock:
279
- existing = self._downloads.get(normalized)
392
+ reg_key = (normalized, effective_profile)
393
+ existing = self._downloads.get(reg_key)
280
394
  if existing:
281
395
  if existing != new_registration:
282
396
  raise ValueError(f"Download for '{normalized}' already registered with different parameters")
283
397
  registration = existing
284
398
  need_download = False
285
399
  else:
286
- self._check_download_conflicts(normalized)
287
- self._downloads[normalized] = new_registration
400
+ self._check_download_conflicts(normalized, effective_profile)
401
+ self._downloads[reg_key] = new_registration
288
402
  registration = new_registration
289
403
  need_download = True
290
404
 
291
405
  if need_download:
292
406
  try:
293
- self._perform_download(normalized, local_path, delete, exclude)
407
+ self._perform_download(normalized, local_path, delete, exclude, effective_profile)
294
408
  except Exception as exc:
295
409
  registration.error = exc
296
410
  registration.ready.set()
297
411
  with self._lock:
298
- self._downloads.pop(normalized, None)
412
+ self._downloads.pop(reg_key, None)
299
413
  raise
300
414
  else:
301
415
  registration.ready.set()
@@ -314,6 +428,7 @@ class _Mirror:
314
428
  delete,
315
429
  sync_on_error,
316
430
  exclude: list[str] | None = None,
431
+ profile: str | Profile | None = None,
317
432
  ) -> Path:
318
433
  """
319
434
  Register (and perform if needed) an upload from a local directory or file to a remote S3 bucket path.
@@ -325,6 +440,7 @@ class _Mirror:
325
440
  delete (bool): If True, deletes remote files not present locally.
326
441
  sync_on_error (bool): If True, attempts to sync files even when encountering errors.
327
442
  exclude (list[str] | None): List of glob patterns to exclude from upload.
443
+ profile: S3 profile name or Profile config for custom endpoints.
328
444
 
329
445
  Returns:
330
446
  Path: The canonical local path associated with this upload registration.
@@ -332,13 +448,19 @@ class _Mirror:
332
448
  Raises:
333
449
  ValueError: If upload registration conflicts with an existing download or upload or parameters differ.
334
450
  """
451
+ effective_profile = self._effective_profile(profile)
452
+
335
453
  if not _is_s3_path(remote):
336
454
  path = Path(remote).expanduser().resolve()
337
455
  path.mkdir(parents=True, exist_ok=True)
338
456
  return path
339
457
 
340
458
  normalized = _normalize_s3_url(remote)
341
- local_path = self.options.cache_path_for(remote) if local is None else Path(local).expanduser().resolve()
459
+ local_path = (
460
+ self.options.cache_path_for(remote, effective_profile)
461
+ if local is None
462
+ else Path(local).expanduser().resolve()
463
+ )
342
464
 
343
465
  new_registration = _UploadRegistration(
344
466
  remote=normalized,
@@ -347,18 +469,20 @@ class _Mirror:
347
469
  delete=delete,
348
470
  sync_on_error=sync_on_error,
349
471
  exclude=exclude,
472
+ profile=effective_profile,
350
473
  last_sync=0,
351
474
  )
352
475
 
353
476
  with self._lock:
354
- existing = self._uploads.get(normalized)
477
+ reg_key = (normalized, effective_profile)
478
+ existing = self._uploads.get(reg_key)
355
479
  if existing:
356
480
  if existing != new_registration:
357
481
  raise ValueError(f"Upload for '{normalized}' already registered with different parameters")
358
482
  return existing.local_path
359
483
 
360
484
  self._check_upload_conflicts(new_registration)
361
- self._uploads[normalized] = new_registration
485
+ self._uploads[reg_key] = new_registration
362
486
  if interval is not None:
363
487
  self._ensure_background_thread_unlocked()
364
488
 
@@ -373,18 +497,23 @@ class _Mirror:
373
497
  delete_remote: bool,
374
498
  sync_on_error: bool,
375
499
  exclude: list[str] | None = None,
500
+ profile: str | Profile | None = None,
376
501
  ) -> Path:
377
- local_path = self.download(remote, local, delete_local, exclude)
502
+ # Let download() and upload() handle profile resolution and normalization
503
+ local_path = self.download(remote, local, delete_local, exclude, profile)
378
504
  if not _is_s3_path(remote):
379
505
  return local_path
380
506
 
381
507
  normalized = _normalize_s3_url(remote)
508
+ effective_profile = self._effective_profile(profile)
382
509
  # Unregister the download to allow upload registration for the same remote
383
- self._downloads.pop(normalized, None)
384
- return self.upload(remote, local_path, interval, delete_remote, sync_on_error, exclude)
510
+ self._downloads.pop((normalized, effective_profile), None)
511
+ return self.upload(remote, local_path, interval, delete_remote, sync_on_error, exclude, profile)
385
512
 
386
- def ls(self, prefix: str, recursive: bool = False) -> list[str]:
513
+ def ls(self, prefix: str, recursive: bool = False, profile: str | Profile | None = None) -> list[str]:
387
514
  """Lists objects under the given prefix, working for both local directories and S3 prefixes."""
515
+ effective_profile = self._effective_profile(profile)
516
+
388
517
  if _is_s3_path(prefix):
389
518
  normalized = _normalize_s3_url(prefix)
390
519
  bucket, key = _parse_s3_url(normalized)
@@ -392,7 +521,7 @@ class _Mirror:
392
521
  if key:
393
522
  key = key + "/"
394
523
  items = []
395
- for info in self._scan_s3(bucket, key):
524
+ for info in self._scan_s3(bucket, key, effective_profile):
396
525
  if info.relative_path:
397
526
  # Skip nested items if not recursive
398
527
  if not recursive and "/" in info.relative_path:
@@ -416,18 +545,19 @@ class _Mirror:
416
545
  items.append(str(display_path.joinpath(Path(info.relative_path))))
417
546
  return items
418
547
 
419
- def _check_download_conflicts(self, candidate: str) -> None:
420
- for upload_remote in self._uploads:
421
- if _s3_paths_conflict(candidate, upload_remote):
548
+ def _check_download_conflicts(self, candidate: str, profile: Profile | None) -> None:
549
+ for (upload_remote, upload_profile), _reg in self._uploads.items():
550
+ if upload_profile == profile and _s3_paths_conflict(candidate, upload_remote):
422
551
  raise ValueError(f"Conflict: download '{candidate}' overlaps with upload '{upload_remote}'")
423
552
 
424
553
  def _check_upload_conflicts(self, new_registration) -> None:
425
554
  candidate = new_registration.remote
426
- for download_remote in self._downloads:
427
- if _s3_paths_conflict(candidate, download_remote):
555
+ candidate_profile = new_registration.profile
556
+ for (download_remote, download_profile), _reg in self._downloads.items():
557
+ if download_profile == candidate_profile and _s3_paths_conflict(candidate, download_remote):
428
558
  raise ValueError(f"Conflict: upload '{candidate}' overlaps with download '{download_remote}'")
429
- for upload_remote, reg in self._uploads.items():
430
- if _s3_paths_conflict(candidate, upload_remote):
559
+ for (upload_remote, upload_profile), reg in self._uploads.items():
560
+ if upload_profile == candidate_profile and _s3_paths_conflict(candidate, upload_remote):
431
561
  same_remote = candidate == upload_remote
432
562
  if not same_remote or reg != new_registration:
433
563
  raise ValueError(f"Conflict: upload '{candidate}' overlaps with upload '{upload_remote}'")
@@ -459,7 +589,7 @@ class _Mirror:
459
589
  self._sync_uploads(uploads)
460
590
 
461
591
  def _sync_uploads(self, registrations: Iterable[_UploadRegistration]) -> None:
462
- tasks: list[tuple[str, Path, bool, list[str] | None]] = []
592
+ tasks: list[tuple[str, Path, bool, list[str] | None, Profile | None]] = []
463
593
  for registration in registrations:
464
594
  if registration.local_path.exists():
465
595
  tasks.append(
@@ -468,32 +598,33 @@ class _Mirror:
468
598
  registration.local_path,
469
599
  registration.delete,
470
600
  registration.exclude,
601
+ registration.profile,
471
602
  )
472
603
  )
473
604
 
474
605
  if not tasks:
475
606
  return
476
607
 
477
- to_put: list[tuple[FileInfo, Path, str, str]] = []
478
- to_remove: list[tuple[str, str]] = []
608
+ to_put: list[tuple[FileInfo, Path, str, str, Profile | None]] = []
609
+ to_remove: list[tuple[str, str, Profile | None]] = []
479
610
  total_bytes = 0
480
611
 
481
- for remote, local_path, delete, exclude in tasks:
612
+ for remote, local_path, delete, exclude, profile in tasks:
482
613
  logger.debug("Syncing upload: %s from %s (delete=%s)", remote, local_path, delete)
483
614
  bucket, prefix = _parse_s3_url(remote)
484
615
  to_copy, to_delete = _compute_sync_diff(
485
616
  _filter_fileinfo(_scan_local(local_path), exclude),
486
- _filter_fileinfo(self._scan_s3(bucket, prefix), exclude),
617
+ _filter_fileinfo(self._scan_s3(bucket, prefix, profile), exclude),
487
618
  )
488
619
 
489
620
  for info in to_copy:
490
621
  s3_key = prefix + ("/" + info.relative_path if info.relative_path else "")
491
- to_put.append((info, local_path, bucket, s3_key))
622
+ to_put.append((info, local_path, bucket, s3_key, profile))
492
623
  total_bytes += info.size
493
624
 
494
625
  for info in to_delete if delete else []:
495
626
  s3_key = prefix + ("/" + info.relative_path if info.relative_path else "")
496
- to_remove.append((bucket, s3_key))
627
+ to_remove.append((bucket, s3_key, profile))
497
628
 
498
629
  if to_put:
499
630
  with (
@@ -501,15 +632,18 @@ class _Mirror:
501
632
  ThreadPoolExecutor(max_workers=self.options.max_workers) as executor,
502
633
  ):
503
634
  futures = [
504
- executor.submit(self._put_to_s3, info, local_path, bucket, key, pbar)
505
- for info, local_path, bucket, key in to_put
635
+ executor.submit(self._put_to_s3, info, local_path, bucket, key, pbar, profile)
636
+ for info, local_path, bucket, key, profile in to_put
506
637
  ]
507
638
  _process_futures(as_completed(futures), "Upload")
508
639
 
509
640
  if to_remove:
510
641
  to_remove_sorted = sorted(to_remove, key=lambda x: x[1].count("/"), reverse=True)
511
642
  with ThreadPoolExecutor(max_workers=self.options.max_workers) as executor:
512
- futures = [executor.submit(self._remove_from_s3, bucket, key) for bucket, key in to_remove_sorted]
643
+ futures = [
644
+ executor.submit(self._remove_from_s3, bucket, key, profile)
645
+ for bucket, key, profile in to_remove_sorted
646
+ ]
513
647
  iterator = as_completed(futures)
514
648
  if self.options.show_progress:
515
649
  iterator = tqdm(
@@ -519,7 +653,14 @@ class _Mirror:
519
653
  )
520
654
  _process_futures(iterator, "Delete")
521
655
 
522
- def _perform_download(self, remote: str, local_path: Path, delete: bool, exclude: list[str] | None) -> None:
656
+ def _perform_download(
657
+ self,
658
+ remote: str,
659
+ local_path: Path,
660
+ delete: bool,
661
+ exclude: list[str] | None,
662
+ profile: Profile | None = None,
663
+ ) -> None:
523
664
  bucket, prefix = _parse_s3_url(remote)
524
665
  logger.debug(
525
666
  "Performing download: s3://%s/%s to %s (delete=%s)",
@@ -529,7 +670,7 @@ class _Mirror:
529
670
  delete,
530
671
  )
531
672
  to_copy, to_delete = _compute_sync_diff(
532
- _filter_fileinfo(self._scan_s3(bucket, prefix), exclude),
673
+ _filter_fileinfo(self._scan_s3(bucket, prefix, profile), exclude),
533
674
  _filter_fileinfo(_scan_local(local_path), exclude),
534
675
  )
535
676
 
@@ -554,7 +695,7 @@ class _Mirror:
554
695
  self._progress_bar(total_bytes, f"Downloading {remote}") as pbar,
555
696
  ThreadPoolExecutor(max_workers=self.options.max_workers) as executor,
556
697
  ):
557
- futures = [executor.submit(self._put_locally, *args, pbar) for args in to_put]
698
+ futures = [executor.submit(self._put_locally, *args, pbar, profile) for args in to_put]
558
699
  _process_futures(as_completed(futures), "Download")
559
700
 
560
701
  if to_remove:
@@ -565,13 +706,14 @@ class _Mirror:
565
706
  for path in iterator:
566
707
  self._remove_locally(path)
567
708
 
568
- def _list_s3_objects(self, bucket: str, key: str) -> Iterator[dict]:
709
+ def _list_s3_objects(self, bucket: str, key: str, profile: Profile | None = None) -> Iterator[dict]:
569
710
  logger.debug("Listing S3 objects: bucket=%s, key=%s", bucket, key)
711
+ client = self._get_client(profile)
570
712
  # Skip head_object for directory-like keys ending with '/'
571
713
  # as we want to list contents, not check if the directory marker exists
572
714
  if not key.endswith("/"):
573
715
  try:
574
- obj = self.s3_client.head_object(Bucket=bucket, Key=key)
716
+ obj = client.head_object(Bucket=bucket, Key=key)
575
717
  except ClientError as exc:
576
718
  error_code = exc.response["Error"]["Code"]
577
719
  if error_code != "404":
@@ -583,18 +725,18 @@ class _Mirror:
583
725
  yield {**obj, "Key": key}
584
726
  return
585
727
 
586
- paginator = self.s3_client.get_paginator("list_objects_v2")
728
+ paginator = client.get_paginator("list_objects_v2")
587
729
  for page in paginator.paginate(Bucket=bucket, Prefix=key):
588
730
  objects = page.get("Contents", [])
589
731
  logger.debug("Listed %d objects with prefix %s", len(objects), key)
590
732
  yield from objects
591
733
 
592
- def _scan_s3(self, bucket: str, prefix: str) -> Iterator[FileInfo]:
734
+ def _scan_s3(self, bucket: str, prefix: str, profile: Profile | None = None) -> Iterator[FileInfo]:
593
735
  logger.debug("Scanning S3: s3://%s/%s", bucket, prefix)
594
736
  seen_dirs: set[str] = set()
595
737
  has_content = False
596
738
 
597
- for obj in self._list_s3_objects(bucket, prefix):
739
+ for obj in self._list_s3_objects(bucket, prefix, profile):
598
740
  has_content = True
599
741
  key = obj["Key"]
600
742
  relative = key[len(prefix) :].lstrip("/")
@@ -625,33 +767,40 @@ class _Mirror:
625
767
  return _NullTqdm()
626
768
  return tqdm(total=total_bytes, unit="B", unit_scale=True, unit_divisor=1024, desc=desc)
627
769
 
628
- def _put_to_s3(self, info: FileInfo, local_path: Path, bucket: str, key: str, pbar) -> None:
770
+ def _put_to_s3(
771
+ self, info: FileInfo, local_path: Path, bucket: str, key: str, pbar, profile: Profile | None = None
772
+ ) -> None:
629
773
  try:
774
+ client = self._get_client(profile)
630
775
  if info.is_dir:
631
776
  key += "/" if not key.endswith("/") else ""
632
- self.s3_client.put_object(Bucket=bucket, Key=key, Body=b"")
777
+ client.put_object(Bucket=bucket, Key=key, Body=b"")
633
778
  else:
634
779
  file_path = local_path / info.relative_path if info.relative_path else local_path
635
- self.s3_client.upload_file(str(file_path), bucket, key, Callback=pbar.update)
780
+ client.upload_file(str(file_path), bucket, key, Callback=pbar.update)
636
781
  except Exception as exc:
637
782
  logger.error("Failed to put %s to %s/%s: %s", local_path, bucket, key, exc)
638
783
  raise
639
784
 
640
- def _remove_from_s3(self, bucket: str, key: str) -> None:
785
+ def _remove_from_s3(self, bucket: str, key: str, profile: Profile | None = None) -> None:
641
786
  try:
642
- self.s3_client.delete_object(Bucket=bucket, Key=key)
787
+ client = self._get_client(profile)
788
+ client.delete_object(Bucket=bucket, Key=key)
643
789
  except Exception as exc:
644
790
  logger.error("Failed to remove %s/%s: %s", bucket, key, exc)
645
791
  raise
646
792
 
647
- def _put_locally(self, info: FileInfo, bucket: str, key: str, local_path: Path, pbar) -> None:
793
+ def _put_locally(
794
+ self, info: FileInfo, bucket: str, key: str, local_path: Path, pbar, profile: Profile | None = None
795
+ ) -> None:
648
796
  try:
649
797
  target = local_path / info.relative_path if info.relative_path else local_path
650
798
  if info.is_dir:
651
799
  target.mkdir(parents=True, exist_ok=True)
652
800
  else:
653
801
  target.parent.mkdir(parents=True, exist_ok=True)
654
- self.s3_client.download_file(bucket, key, str(target), Callback=pbar.update)
802
+ client = self._get_client(profile)
803
+ client.download_file(bucket, key, str(target), Callback=pbar.update)
655
804
  except Exception as exc:
656
805
  logger.error("Failed to put %s locally: %s", key, exc)
657
806
  raise
@@ -672,6 +821,7 @@ def mirror(
672
821
  cache_root: str = "~/.cache/positronic/s3/",
673
822
  show_progress: bool = True,
674
823
  max_workers: int = 10,
824
+ default_profile: str | Profile | None = None,
675
825
  ):
676
826
  """
677
827
  Context manager that activates the sync environment.
@@ -680,9 +830,16 @@ def mirror(
680
830
  cache_root: Base directory for caching downloaded files.
681
831
  show_progress: Display tqdm progress bars.
682
832
  max_workers: Threads for parallel S3 operations.
833
+ default_profile: Default S3 profile for all operations in this context.
683
834
  """
684
835
  global _GLOBAL_ACTIVE_MIRROR
685
- options = _Options(cache_root=cache_root, show_progress=show_progress, max_workers=max_workers)
836
+ resolved_default_profile = _resolve_profile(default_profile)
837
+ options = _Options(
838
+ cache_root=cache_root,
839
+ show_progress=show_progress,
840
+ max_workers=max_workers,
841
+ default_profile=resolved_default_profile,
842
+ )
686
843
 
687
844
  with _GLOBAL_MIRROR_LOCK:
688
845
  if _GLOBAL_ACTIVE_MIRROR is not None:
@@ -712,6 +869,7 @@ def with_mirror(
712
869
  cache_root: str = "~/.cache/positronic/s3/",
713
870
  show_progress: bool = True,
714
871
  max_workers: int = 10,
872
+ default_profile: str | Profile | None = None,
715
873
  ):
716
874
  """
717
875
  Decorator equivalent of mirror() for wrapping functions.
@@ -721,10 +879,12 @@ def with_mirror(
721
879
  def decorator(func):
722
880
  @wraps(func)
723
881
  def wrapper(*args, **kwargs):
882
+ # Resolve profile at call time, not decoration time
724
883
  with mirror(
725
884
  cache_root=cache_root,
726
885
  show_progress=show_progress,
727
886
  max_workers=max_workers,
887
+ default_profile=default_profile,
728
888
  ):
729
889
  return func(*args, **kwargs)
730
890
 
@@ -750,6 +910,7 @@ def download(
750
910
  local: str | Path | None = None,
751
911
  delete: bool = True,
752
912
  exclude: list[str] | None = None,
913
+ profile: str | Profile | None = None,
753
914
  ) -> Path:
754
915
  """
755
916
  Register a path for download. Ensures local copy matches S3 immediately.
@@ -759,12 +920,13 @@ def download(
759
920
  local: Explicit local destination. Defaults to standard cache path.
760
921
  delete: If True (default), deletes local files NOT in S3 ("mirror" behavior).
761
922
  exclude: List of glob patterns to skip.
923
+ profile: S3 profile name or Profile config for custom endpoints.
762
924
 
763
925
  Returns:
764
926
  Path to the local directory/file.
765
927
  """
766
928
  mirror_obj = _require_active_mirror()
767
- return mirror_obj.download(remote, local, delete, exclude)
929
+ return mirror_obj.download(remote, local, delete, exclude, profile)
768
930
 
769
931
 
770
932
  def upload(
@@ -774,6 +936,7 @@ def upload(
774
936
  delete: bool = True,
775
937
  sync_on_error: bool = False,
776
938
  exclude: list[str] | None = None,
939
+ profile: str | Profile | None = None,
777
940
  ) -> Path:
778
941
  """
779
942
  Register a local path for upload. Uploads on exit and optionally in background.
@@ -784,12 +947,13 @@ def upload(
784
947
  interval: Seconds between background syncs. None for exit-only.
785
948
  delete: If True (default), deletes S3 files NOT present locally.
786
949
  sync_on_error: If True, syncs even if the context exits with an exception.
950
+ profile: S3 profile name or Profile config for custom endpoints.
787
951
 
788
952
  Returns:
789
953
  Path to the local directory/file.
790
954
  """
791
955
  mirror_obj = _require_active_mirror()
792
- return mirror_obj.upload(remote, local, interval, delete, sync_on_error, exclude)
956
+ return mirror_obj.upload(remote, local, interval, delete, sync_on_error, exclude, profile)
793
957
 
794
958
 
795
959
  def sync(
@@ -800,6 +964,7 @@ def sync(
800
964
  delete_remote: bool = True,
801
965
  sync_on_error: bool = False,
802
966
  exclude: list[str] | None = None,
967
+ profile: str | Profile | None = None,
803
968
  ) -> Path:
804
969
  """
805
970
  Bi-directional helper. Performs download() then registers upload().
@@ -807,27 +972,29 @@ def sync(
807
972
  Args:
808
973
  delete_local: Cleanup local files during download.
809
974
  delete_remote: Cleanup remote files during upload.
975
+ profile: S3 profile name or Profile config for custom endpoints.
810
976
 
811
977
  Returns:
812
978
  Path to the local directory/file.
813
979
  """
814
980
  mirror_obj = _require_active_mirror()
815
- return mirror_obj.sync(remote, local, interval, delete_local, delete_remote, sync_on_error, exclude)
981
+ return mirror_obj.sync(remote, local, interval, delete_local, delete_remote, sync_on_error, exclude, profile)
816
982
 
817
983
 
818
- def ls(prefix: str, recursive: bool = False) -> list[str]:
984
+ def ls(prefix: str, recursive: bool = False, profile: str | Profile | None = None) -> list[str]:
819
985
  """
820
986
  Lists files/objects in a directory or S3 prefix.
821
987
 
822
988
  Args:
823
989
  prefix: S3 URL or local path.
824
990
  recursive: List subdirectories if True.
991
+ profile: S3 profile name or Profile config for custom endpoints.
825
992
 
826
993
  Returns:
827
994
  List of full S3 URLs or local paths.
828
995
  """
829
996
  mirror_obj = _require_active_mirror()
830
- return mirror_obj.ls(prefix, recursive)
997
+ return mirror_obj.ls(prefix, recursive, profile)
831
998
 
832
999
 
833
- __all__ = ["mirror", "download", "upload", "sync", "ls", "_parse_s3_url"]
1000
+ __all__ = ["mirror", "with_mirror", "download", "upload", "sync", "ls", "register_profile", "Profile", "_parse_s3_url"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pos3
3
- Version: 0.1.0rc1
3
+ Version: 0.2.0
4
4
  Summary: S3 Simple Sync - Make using S3 as simple as using local files
5
5
  Author-email: Positronic Robotics <hi@positronic.ro>
6
6
  License: Apache-2.0
@@ -12,6 +12,7 @@ Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Requires-Python: >=3.9
14
14
  Description-Content-Type: text/markdown
15
+ License-File: LICENSE
15
16
  Requires-Dist: boto3>=1.26.0
16
17
  Requires-Dist: tqdm>=4.65.0
17
18
  Provides-Extra: dev
@@ -19,12 +20,15 @@ Requires-Dist: pytest>=7.0; extra == "dev"
19
20
  Requires-Dist: pytest-cov; extra == "dev"
20
21
  Requires-Dist: ruff; extra == "dev"
21
22
  Requires-Dist: pre-commit; extra == "dev"
23
+ Dynamic: license-file
22
24
 
23
25
  # pos3
24
26
 
25
27
  **PO**sitronic **S3** — Make using S3 as simple as using local files.
26
28
 
27
- `pos3` provides a Pythonic context manager for syncing directories and files with S3. It is designed for data processing pipelines and machine learning workflows where you want to work with local files but persist data in S3.
29
+ `pos3` provides a Pythonic context manager for syncing directories and files with S3. It is designed for data processing pipelines and machine learning workflows where you need to integrate S3 with code that **only understands local files**.
30
+
31
+ > The main value of `pos3` is enabling you to pass S3 data to **third-party libraries or legacy scripts** that expect local file paths (e.g., `opencv`, `pandas.read_csv`, or model training scripts). Instead of rewriting their I/O logic to support S3, `pos3` transparently bridges the gap.
28
32
 
29
33
  ## Core Concepts
30
34
 
@@ -144,7 +148,52 @@ Why use `pos3` instead of other Python libraries?
144
148
  | **Lifecycle** | **Automated** (Open/Close) | Manual | Manual |
145
149
  | **Background Upload** | **Yes** (Non-blocking) | Manual Threading | No (Blocking) |
146
150
  | **Local I/O Speed** | **Native** (SSD) | Native | Network Bound (Virtual FS) |
147
- | **Use Case** | **ML / Pipelines** | App Development | DataFrames / Interactive |
151
+ | **Use Case** | **ML / Pipelines / 3rd Party Code** | App Development | DataFrames / Interactive |
148
152
 
149
153
  - **vs `boto3`**: `boto3` is the raw AWS SDK. `pos3` wraps it to provide "mirroring" logic, threading, and diffing out of the box.
150
154
  - **vs `s3fs`**: `s3fs` treats S3 as a filesystem. `pos3` treats S3 as a persistence layer for your high-speed local storage, ensuring you always get native IO performance.
155
+
156
+ ## Advanced Features
157
+
158
+ ### Profiles
159
+
160
+ Profiles enable accessing multiple S3-compatible endpoints simultaneously within the same context. This is useful when your workflow combines data from different sources:
161
+
162
+ ```python
163
+ import pos3
164
+ from pos3 import Profile
165
+
166
+ # Register profiles for different endpoints
167
+ pos3.register_profile('nebius-public',
168
+ endpoint='https://storage.eu-north1.nebius.cloud',
169
+ public=True # anonymous access, no credentials needed
170
+ )
171
+ pos3.register_profile('minio-local',
172
+ endpoint='http://localhost:9000',
173
+ region='us-east-1'
174
+ )
175
+
176
+ # Use multiple profiles in the same context
177
+ with pos3.mirror():
178
+ # Download public dataset from Nebius
179
+ dataset = pos3.download('s3://public-data/dataset/', profile='nebius-public')
180
+
181
+ # Download private config from local MinIO
182
+ config = pos3.download('s3://private/config/', profile='minio-local')
183
+
184
+ # Upload results to AWS (default boto3 credentials)
185
+ results = pos3.upload('s3://my-aws-bucket/results/')
186
+
187
+ train(dataset, config, results)
188
+
189
+ # You can also use inline Profile objects without registration
190
+ custom = Profile(local_name='custom', endpoint='https://custom.example.com', public=True)
191
+ with pos3.mirror():
192
+ data = pos3.download('s3://bucket/path', profile=custom)
193
+
194
+ # Or set a default profile for the entire context
195
+ with pos3.mirror(default_profile='nebius-public'):
196
+ data = pos3.download('s3://bucket/path') # uses nebius-public
197
+ ```
198
+
199
+ Each profile has a `local_name` used in the cache path to keep files from different endpoints separate. When registering profiles, `local_name` defaults to the profile name. The default AWS profile uses `_` as its local name.
@@ -0,0 +1,6 @@
1
+ pos3/__init__.py,sha256=ByRzIJ3ggRKFU6j8HtzDv7D3RW_6NZkbjp3etoMilys,37261
2
+ pos3-0.2.0.dist-info/licenses/LICENSE,sha256=e815_YqPTxHS3WrNI7dotEuLkgHFAgsf9avLhDYBj9s,11354
3
+ pos3-0.2.0.dist-info/METADATA,sha256=LF6o86VvogFSbkcsfy39yVmf-37IjbZdqJZ3guem4CA,8637
4
+ pos3-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ pos3-0.2.0.dist-info/top_level.txt,sha256=JWOpXHz1F6cbH0nfanGWLaozt8RJFRmv5H3eKkxz7e8,5
6
+ pos3-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2025 Positronic Robotics Inc.
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -1,5 +0,0 @@
1
- pos3/__init__.py,sha256=rxcmD1K5M9zvBqDhIX1guSFxgcm6XOSBhLTbY5wNzkk,30453
2
- pos3-0.1.0rc1.dist-info/METADATA,sha256=IpXNH8zCxJ1CbTxun7a6uNx1PmylSGGMRFXB2kHEbyU,6620
3
- pos3-0.1.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
4
- pos3-0.1.0rc1.dist-info/top_level.txt,sha256=JWOpXHz1F6cbH0nfanGWLaozt8RJFRmv5H3eKkxz7e8,5
5
- pos3-0.1.0rc1.dist-info/RECORD,,
File without changes