lamindb_setup 1.8.1__py3-none-any.whl → 1.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb_setup/__init__.py CHANGED
@@ -35,7 +35,7 @@ Modules & settings:
35
35
 
36
36
  """
37
37
 
38
- __version__ = "1.8.1" # denote a release candidate for 0.1.0 with 0.1rc1
38
+ __version__ = "1.8.2" # denote a release candidate for 0.1.0 with 0.1rc1
39
39
 
40
40
  import os
41
41
 
@@ -188,13 +188,32 @@ def _connect_instance(
188
188
  return isettings
189
189
 
190
190
 
191
+ def _connect_cli(instance: str) -> None:
192
+ from lamindb_setup import settings as settings_
193
+
194
+ settings_.auto_connect = True
195
+ owner, name = get_owner_name_from_identifier(instance)
196
+ isettings = _connect_instance(owner, name)
197
+ isettings._persist(write_to_disk=True)
198
+ if not isettings.is_on_hub or isettings._is_cloud_sqlite:
199
+ # there are two reasons to call the full-blown connect
200
+ # (1) if the instance is not on the hub, we need to register
201
+ # potential users through register_user()
202
+ # (2) if the instance is cloud sqlite, we need to lock it
203
+ connect(_write_settings=False, _reload_lamindb=False)
204
+ else:
205
+ logger.important(f"connected lamindb: {isettings.slug}")
206
+ return None
207
+
208
+
191
209
  @unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
192
210
  def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
193
211
  """Connect to an instance.
194
212
 
195
213
  Args:
196
214
  instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
197
- If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier. If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
215
+ If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
216
+ If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
198
217
  """
199
218
  # validate kwargs
200
219
  valid_kwargs = {
@@ -5,6 +5,7 @@ import os
5
5
  from typing import Literal
6
6
  from urllib.request import urlretrieve
7
7
 
8
+ from httpx import HTTPTransport
8
9
  from lamin_utils import logger
9
10
  from pydantic_settings import BaseSettings
10
11
  from supabase import Client, create_client # type: ignore
@@ -60,20 +61,29 @@ class Environment:
60
61
  self.supabase_anon_key: str = key
61
62
 
62
63
 
64
+ DEFAULT_TIMEOUT = 20
65
+
66
+
63
67
  # runs ~0.5s
64
68
  def connect_hub(
65
69
  fallback_env: bool = False, client_options: ClientOptions | None = None
66
70
  ) -> Client:
67
71
  env = Environment(fallback=fallback_env)
68
72
  if client_options is None:
69
- # function_client_timeout=5 by default
70
- # increase to avoid rare timeouts for edge functions
71
73
  client_options = ClientOptions(
72
74
  auto_refresh_token=False,
73
- function_client_timeout=30,
74
- postgrest_client_timeout=20,
75
+ function_client_timeout=DEFAULT_TIMEOUT,
76
+ postgrest_client_timeout=DEFAULT_TIMEOUT,
75
77
  )
76
- return create_client(env.supabase_api_url, env.supabase_anon_key, client_options)
78
+ client = create_client(env.supabase_api_url, env.supabase_anon_key, client_options)
79
+ # needed to enable retries for http requests in supabase
80
+ # these are separate clients and need separate transports
81
+ # retries are done only in case an httpx.ConnectError or an httpx.ConnectTimeout occurs
82
+ transport_kwargs = {"verify": True, "http2": True, "retries": 2}
83
+ client.auth._http_client._transport = HTTPTransport(**transport_kwargs)
84
+ client.functions._client._transport = HTTPTransport(**transport_kwargs)
85
+ client.postgrest.session._transport = HTTPTransport(**transport_kwargs)
86
+ return client
77
87
 
78
88
 
79
89
  def connect_hub_with_auth(
@@ -210,11 +220,16 @@ def request_with_auth(
210
220
  headers["Authorization"] = f"Bearer {access_token}"
211
221
 
212
222
  make_request = getattr(requests, method)
213
- response = make_request(url, headers=headers, **kwargs)
214
- # upate access_token and try again if failed
215
- if response.status_code != 200 and renew_token:
223
+ timeout = kwargs.pop("timeout", DEFAULT_TIMEOUT)
224
+
225
+ response = make_request(url, headers=headers, timeout=timeout, **kwargs)
226
+ status_code = response.status_code
227
+ # update access_token and try again if failed
228
+ if not (200 <= status_code < 300) and renew_token:
216
229
  from lamindb_setup import settings
217
230
 
231
+ logger.debug(f"{method} {url} failed: {status_code} {response.text}")
232
+
218
233
  access_token = get_access_token(
219
234
  settings.user.email, settings.user.password, settings.user.api_key
220
235
  )
@@ -224,5 +239,5 @@ def request_with_auth(
224
239
 
225
240
  headers["Authorization"] = f"Bearer {access_token}"
226
241
 
227
- response = make_request(url, headers=headers, **kwargs)
242
+ response = make_request(url, headers=headers, timeout=timeout, **kwargs)
228
243
  return response
@@ -531,11 +531,13 @@ def access_db(
531
531
  url = instance_api_url + url
532
532
 
533
533
  response = request_with_auth(url, "get", access_token, renew_token) # type: ignore
534
- response_json = response.json()
535
- if response.status_code != 200:
534
+ status_code = response.status_code
535
+ if not (200 <= status_code < 300):
536
536
  raise PermissionError(
537
- f"Fine-grained access to {instance_slug} failed: {response_json}"
537
+ f"Fine-grained access to {instance_slug} failed: {status_code} {response.text}"
538
538
  )
539
+
540
+ response_json = response.json()
539
541
  if "token" not in response_json:
540
542
  raise RuntimeError("The response of access_db does not contain a db token.")
541
543
  return response_json["token"]
@@ -350,7 +350,7 @@ class SetupPaths:
350
350
  local_filepath = SetupPaths.cloud_to_local_no_update(filepath, cache_key)
351
351
  if not isinstance(filepath, LocalPathClasses):
352
352
  local_filepath.parent.mkdir(parents=True, exist_ok=True)
353
- filepath.synchronize(local_filepath, **kwargs) # type: ignore
353
+ filepath.synchronize_to(local_filepath, **kwargs) # type: ignore
354
354
  return local_filepath
355
355
 
356
356
 
@@ -32,7 +32,7 @@ if TYPE_CHECKING:
32
32
 
33
33
  from ._settings_user import UserSettings
34
34
 
35
- LOCAL_STORAGE_MESSAGE = "No storage location found in current environment: create one via, e.g., ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
35
+ LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
36
36
 
37
37
 
38
38
  def sanitize_git_repo_url(repo_url: str) -> str:
@@ -156,9 +156,17 @@ class InstanceSettings:
156
156
  found = []
157
157
  for record in all_local_records:
158
158
  root_path = Path(record.root)
159
- if root_path.exists():
159
+ try:
160
+ root_path_exists = root_path.exists()
161
+ except PermissionError:
162
+ continue
163
+ if root_path_exists:
160
164
  marker_path = root_path / STORAGE_UID_FILE_KEY
161
- if not marker_path.exists():
165
+ try:
166
+ marker_path_exists = marker_path.exists()
167
+ except PermissionError:
168
+ continue
169
+ if not marker_path_exists:
162
170
  legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
163
171
  if legacy_filepath.exists():
164
172
  logger.warning(
@@ -193,15 +201,19 @@ class InstanceSettings:
193
201
  def keep_artifacts_local(self) -> bool:
194
202
  """Default to keeping artifacts local.
195
203
 
196
- Enable this optional setting for cloud instances on lamin.ai.
197
-
198
204
  Guide: :doc:`faq/keep-artifacts-local`
199
205
  """
200
206
  return self._keep_artifacts_local
201
207
 
208
+ @keep_artifacts_local.setter
209
+ def keep_artifacts_local(self, value: bool):
210
+ if not isinstance(value, bool):
211
+ raise ValueError("keep_artifacts_local must be a boolean value.")
212
+ self._keep_artifacts_local = value
213
+
202
214
  @property
203
215
  def storage(self) -> StorageSettings:
204
- """Default storage.
216
+ """Default storage of instance.
205
217
 
206
218
  For a cloud instance, this is cloud storage. For a local instance, this
207
219
  is a local directory.
@@ -210,13 +222,13 @@ class InstanceSettings:
210
222
 
211
223
  @property
212
224
  def local_storage(self) -> StorageSettings:
213
- """An additional local storage location.
225
+ """An alternative default local storage location in the current environment.
214
226
 
215
- Is only available if :attr:`keep_artifacts_local` is enabled.
227
+ Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
216
228
 
217
229
  Guide: :doc:`faq/keep-artifacts-local`
218
230
  """
219
- if not self._keep_artifacts_local:
231
+ if not self.keep_artifacts_local:
220
232
  raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
221
233
  if self._local_storage is None:
222
234
  self._local_storage = self._search_local_root()
@@ -235,7 +247,7 @@ class InstanceSettings:
235
247
  local_root, host = local_root_host
236
248
 
237
249
  local_root = Path(local_root)
238
- if not self._keep_artifacts_local:
250
+ if not self.keep_artifacts_local:
239
251
  raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
240
252
  local_storage = self._search_local_root(
241
253
  local_root=StorageSettings(local_root).root_as_str, mute_warning=True
@@ -370,7 +382,7 @@ class InstanceSettings:
370
382
  self._check_sqlite_lock()
371
383
  sqlite_file = self._sqlite_file
372
384
  cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
373
- sqlite_file.synchronize(cache_file, print_progress=True) # type: ignore
385
+ sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
374
386
 
375
387
  def _check_sqlite_lock(self):
376
388
  if not self._cloud_sqlite_locker.has_lock:
@@ -19,7 +19,13 @@ from ._aws_options import (
19
19
  from ._aws_storage import find_closest_aws_region
20
20
  from ._deprecated import deprecated
21
21
  from .hashing import hash_and_encode_as_b62
22
- from .upath import LocalPathClasses, UPath, _split_path_query, create_path
22
+ from .upath import (
23
+ LocalPathClasses,
24
+ UPath,
25
+ _split_path_query,
26
+ create_path,
27
+ get_storage_region,
28
+ )
23
29
 
24
30
  if TYPE_CHECKING:
25
31
  from lamindb_setup.types import StorageType, UPathStr
@@ -43,50 +49,6 @@ def instance_uid_from_uuid(instance_id: UUID) -> str:
43
49
  return hash_and_encode_as_b62(instance_id.hex)[:12]
44
50
 
45
51
 
46
- def get_storage_region(path: UPathStr) -> str | None:
47
- path_str = str(path)
48
- if path_str.startswith("s3://"):
49
- import botocore.session
50
- from botocore.config import Config
51
- from botocore.exceptions import ClientError
52
-
53
- # check for endpoint_url in storage options if upath
54
- if isinstance(path, UPath):
55
- endpoint_url = path.storage_options.get("endpoint_url", None)
56
- else:
57
- endpoint_url = None
58
- path_part = path_str.replace("s3://", "")
59
- # check for endpoint_url in the path string
60
- if "?" in path_part:
61
- assert endpoint_url is None
62
- path_part, query = _split_path_query(path_part)
63
- endpoint_url = query.get("endpoint_url", [None])[0]
64
- bucket = path_part.split("/")[0]
65
- session = botocore.session.get_session()
66
- credentials = session.get_credentials()
67
- if credentials is None or credentials.access_key is None:
68
- config = Config(signature_version=botocore.session.UNSIGNED)
69
- else:
70
- config = None
71
- s3_client = session.create_client(
72
- "s3", endpoint_url=endpoint_url, config=config
73
- )
74
- try:
75
- response = s3_client.head_bucket(Bucket=bucket)
76
- except ClientError as exc:
77
- response = getattr(exc, "response", {})
78
- if response.get("Error", {}).get("Code") == "404":
79
- raise exc
80
- region = (
81
- response.get("ResponseMetadata", {})
82
- .get("HTTPHeaders", {})
83
- .get("x-amz-bucket-region", None)
84
- )
85
- else:
86
- region = None
87
- return region
88
-
89
-
90
52
  def get_storage_type(root_as_str: str) -> StorageType:
91
53
  import fsspec
92
54
 
@@ -23,6 +23,7 @@ from upath.registry import register_implementation
23
23
  from lamindb_setup.errors import StorageNotEmpty
24
24
 
25
25
  from ._aws_options import HOSTED_BUCKETS, get_aws_options_manager
26
+ from ._deprecated import deprecated
26
27
  from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
27
28
 
28
29
  if TYPE_CHECKING:
@@ -381,42 +382,29 @@ def upload_from(
381
382
  return self
382
383
 
383
384
 
384
- def synchronize(
385
- self,
386
- objectpath: Path,
385
+ def synchronize_to(
386
+ origin: UPath,
387
+ destination: Path,
387
388
  error_no_origin: bool = True,
388
389
  print_progress: bool = False,
389
- callback: fsspec.callbacks.Callback | None = None,
390
- timestamp: float | None = None,
391
390
  just_check: bool = False,
391
+ **kwargs,
392
392
  ) -> bool:
393
393
  """Sync to a local destination path."""
394
- protocol = self.protocol
395
- # optimize the number of network requests
396
- if timestamp is not None:
397
- is_dir = False
394
+ destination = destination.resolve()
395
+ protocol = origin.protocol
396
+ try:
397
+ cloud_info = origin.stat().as_info()
398
398
  exists = True
399
- cloud_mts = timestamp
400
- else:
401
- try:
402
- cloud_stat = self.stat()
403
- cloud_info = cloud_stat.as_info()
404
- exists = True
405
- is_dir = cloud_info["type"] == "directory"
406
- if not is_dir:
407
- # hf requires special treatment
408
- if protocol == "hf":
409
- cloud_mts = cloud_info["last_commit"].date.timestamp()
410
- else:
411
- cloud_mts = cloud_stat.st_mtime
412
- except FileNotFoundError:
413
- exists = False
399
+ is_dir = cloud_info["type"] == "directory"
400
+ except FileNotFoundError:
401
+ exists = False
414
402
 
415
403
  if not exists:
416
- warn_or_error = f"The original path {self} does not exist anymore."
417
- if objectpath.exists():
404
+ warn_or_error = f"The original path {origin} does not exist anymore."
405
+ if destination.exists():
418
406
  warn_or_error += (
419
- f"\nHowever, the local path {objectpath} still exists, you might want"
407
+ f"\nHowever, the local path {destination} still exists, you might want"
420
408
  " to reupload the object back."
421
409
  )
422
410
  logger.warning(warn_or_error)
@@ -425,113 +413,114 @@ def synchronize(
425
413
  raise FileNotFoundError(warn_or_error)
426
414
  return False
427
415
 
428
- # synchronization logic for directories
429
- # to synchronize directories, it should be possible to get modification times
416
+ use_size: bool = False
417
+ # use casting to int to avoid problems when the local filesystem
418
+ # discards fractional parts of timestamps
419
+ if protocol == "s3":
420
+ get_modified = lambda file_stat: int(file_stat["LastModified"].timestamp())
421
+ elif protocol == "gs":
422
+ get_modified = lambda file_stat: int(file_stat["mtime"].timestamp())
423
+ elif protocol == "hf":
424
+ get_modified = lambda file_stat: int(file_stat["last_commit"].date.timestamp())
425
+ else: # http etc
426
+ use_size = True
427
+ get_modified = lambda file_stat: file_stat["size"]
428
+
429
+ if use_size:
430
+ is_sync_needed = lambda cloud_size, local_stat: cloud_size != local_stat.st_size
431
+ else:
432
+ # no need to cast local_stat.st_mtime to int
433
+ # because if it has the fractional part and cloud_mtime doesn't
434
+ # and they have the same integer part then cloud_mtime can't be bigger
435
+ is_sync_needed = (
436
+ lambda cloud_mtime, local_stat: cloud_mtime > local_stat.st_mtime
437
+ )
438
+
439
+ local_paths: list[Path] = []
440
+ cloud_stats: dict[str, int]
430
441
  if is_dir:
431
- files = self.fs.find(str(self), detail=True)
432
- if protocol == "s3":
433
- get_modified = lambda file_stat: file_stat["LastModified"]
434
- elif protocol == "gs":
435
- get_modified = lambda file_stat: file_stat["mtime"]
436
- elif protocol == "hf":
437
- get_modified = lambda file_stat: file_stat["last_commit"].date
438
- else:
439
- raise ValueError(f"Can't synchronize a directory for {protocol}.")
440
- if objectpath.exists():
441
- destination_exists = True
442
- cloud_mts_max = max(
443
- get_modified(file) for file in files.values()
444
- ).timestamp()
445
- local_mts = [
446
- file.stat().st_mtime for file in objectpath.rglob("*") if file.is_file()
447
- ]
448
- n_local_files = len(local_mts)
449
- local_mts_max = max(local_mts)
450
- if local_mts_max == cloud_mts_max:
451
- need_synchronize = n_local_files != len(files)
452
- elif local_mts_max > cloud_mts_max:
453
- need_synchronize = False
454
- else:
455
- need_synchronize = True
456
- else:
457
- destination_exists = False
458
- need_synchronize = True
459
- # just check if synchronization is needed
460
- if just_check:
461
- return need_synchronize
462
- if need_synchronize:
463
- callback = ProgressCallback.requires_progress(
464
- callback, print_progress, objectpath.name, "synchronizing"
465
- )
466
- callback.set_size(len(files))
467
- origin_file_keys = []
468
- for file, stat in callback.wrap(files.items()):
469
- file_key = PurePosixPath(file).relative_to(self.path).as_posix()
470
- origin_file_keys.append(file_key)
471
- timestamp = get_modified(stat).timestamp()
472
- origin = f"{protocol}://{file}"
473
- destination = objectpath / file_key
474
- child = callback.branched(origin, destination.as_posix())
475
- UPath(origin, **self.storage_options).synchronize(
476
- destination, callback=child, timestamp=timestamp
442
+ cloud_stats = {
443
+ file: get_modified(stat)
444
+ for file, stat in origin.fs.find(origin.as_posix(), detail=True).items()
445
+ }
446
+ for cloud_path in cloud_stats:
447
+ file_key = PurePosixPath(cloud_path).relative_to(origin.path).as_posix()
448
+ local_paths.append(destination / file_key)
449
+ else:
450
+ cloud_stats = {origin.path: get_modified(cloud_info)}
451
+ local_paths.append(destination)
452
+
453
+ local_paths_all: dict[Path, os.stat_result] = {}
454
+ if destination.exists():
455
+ if is_dir:
456
+ local_paths_all = {
457
+ path: path.stat() for path in destination.rglob("*") if path.is_file()
458
+ }
459
+ if not use_size:
460
+ # cast to int to remove the fractional parts
461
+ # there is a problem when a fractional part is allowed on one filesystem
462
+ # but not on the other
463
+ # so just normalize both to int
464
+ cloud_mts_max: int = max(cloud_stats.values())
465
+ local_mts_max: int = int(
466
+ max(stat.st_mtime for stat in local_paths_all.values())
477
467
  )
478
- child.close()
479
- if destination_exists:
480
- local_files = [file for file in objectpath.rglob("*") if file.is_file()]
481
- if len(local_files) > len(files):
482
- for file in local_files:
483
- if (
484
- file.relative_to(objectpath).as_posix()
485
- not in origin_file_keys
486
- ):
487
- file.unlink()
488
- parent = file.parent
489
- if next(parent.iterdir(), None) is None:
490
- parent.rmdir()
491
- return need_synchronize
492
-
493
- # synchronization logic for files
494
- callback = ProgressCallback.requires_progress(
495
- callback, print_progress, objectpath.name, "synchronizing"
496
- )
497
- objectpath_exists = objectpath.exists()
498
- if objectpath_exists:
499
- if cloud_mts != 0:
500
- local_mts_obj = objectpath.stat().st_mtime
501
- need_synchronize = cloud_mts > local_mts_obj
468
+ if local_mts_max > cloud_mts_max:
469
+ return False
470
+ elif local_mts_max == cloud_mts_max:
471
+ if len(local_paths_all) == len(cloud_stats):
472
+ return False
473
+ elif just_check:
474
+ return True
502
475
  else:
503
- # this is true for http for example
504
- # where size is present but st_mtime is not
505
- # we assume that any change without the change in size is unlikely
506
- cloud_size = cloud_stat.st_size
507
- local_size_obj = objectpath.stat().st_size
508
- need_synchronize = cloud_size != local_size_obj
476
+ local_paths_all = {destination: destination.stat()}
477
+
478
+ cloud_files_sync = []
479
+ local_files_sync = []
480
+ for i, (cloud_file, cloud_stat) in enumerate(cloud_stats.items()):
481
+ local_path = local_paths[i]
482
+ if local_path not in local_paths_all or is_sync_needed(
483
+ cloud_stat, local_paths_all[local_path]
484
+ ):
485
+ cloud_files_sync.append(cloud_file)
486
+ local_files_sync.append(local_path.as_posix())
509
487
  else:
510
- if not just_check:
511
- objectpath.parent.mkdir(parents=True, exist_ok=True)
512
- need_synchronize = True
513
- # just check if synchronization is needed
514
- if just_check:
515
- return need_synchronize
516
- if need_synchronize:
517
- # just to be sure that overwriting an existing file doesn't corrupt it
518
- # we saw some frequent corruption on some systems for unclear reasons
519
- if objectpath_exists:
520
- objectpath.unlink()
521
- # hf has sync filesystem
522
- # on sync filesystems ChildProgressCallback.branched()
523
- # returns the default callback
524
- # this is why a difference between s3 and hf in progress bars
525
- self.download_to(
526
- objectpath, recursive=False, print_progress=False, callback=callback
488
+ cloud_files_sync = list(cloud_stats.keys())
489
+ local_files_sync = [local_path.as_posix() for local_path in local_paths]
490
+
491
+ if cloud_files_sync:
492
+ if just_check:
493
+ return True
494
+
495
+ callback = ProgressCallback.requires_progress(
496
+ maybe_callback=kwargs.pop("callback", None),
497
+ print_progress=print_progress,
498
+ objectname=destination.name,
499
+ action="synchronizing",
500
+ adjust_size=False,
527
501
  )
528
- if cloud_mts != 0:
529
- os.utime(objectpath, times=(cloud_mts, cloud_mts))
502
+ origin.fs.download(
503
+ cloud_files_sync,
504
+ local_files_sync,
505
+ recursive=False,
506
+ callback=callback,
507
+ **kwargs,
508
+ )
509
+ if not use_size:
510
+ for i, cloud_file in enumerate(cloud_files_sync):
511
+ cloud_mtime = cloud_stats[cloud_file]
512
+ os.utime(local_files_sync[i], times=(cloud_mtime, cloud_mtime))
530
513
  else:
531
- # nothing happens if parent_update is not defined
532
- # because of Callback.no_op
533
- callback.parent_update()
534
- return need_synchronize
514
+ return False
515
+
516
+ if is_dir and local_paths_all:
517
+ for path in (path for path in local_paths_all if path not in local_paths):
518
+ path.unlink()
519
+ parent = path.parent
520
+ if next(parent.iterdir(), None) is None:
521
+ parent.rmdir()
522
+
523
+ return True
535
524
 
536
525
 
537
526
  def modified(self) -> datetime | None:
@@ -710,14 +699,7 @@ def to_url(upath):
710
699
  raise ValueError("The provided UPath must be an S3 path.")
711
700
  key = "/".join(upath.parts[1:])
712
701
  bucket = upath.drive
713
- if bucket == "scverse-spatial-eu-central-1":
714
- region = "eu-central-1"
715
- elif f"s3://{bucket}" not in HOSTED_BUCKETS:
716
- response = upath.fs.call_s3("head_bucket", Bucket=bucket)
717
- headers = response["ResponseMetadata"]["HTTPHeaders"]
718
- region = headers.get("x-amz-bucket-region")
719
- else:
720
- region = bucket.replace("lamin_", "")
702
+ region = get_storage_region(upath)
721
703
  if region == "us-east-1":
722
704
  return f"https://{bucket}.s3.amazonaws.com/{key}"
723
705
  else:
@@ -740,7 +722,8 @@ def to_url(upath):
740
722
 
741
723
  # add custom functions
742
724
  UPath.modified = property(modified)
743
- UPath.synchronize = synchronize
725
+ UPath.synchronize = deprecated("synchronize_to")(synchronize_to)
726
+ UPath.synchronize_to = synchronize_to
744
727
  UPath.upload_from = upload_from
745
728
  UPath.to_url = to_url
746
729
  UPath.download_to = download_to
@@ -823,6 +806,67 @@ class S3QueryPath(S3Path):
823
806
  register_implementation("s3", S3QueryPath, clobber=True)
824
807
 
825
808
 
809
+ def get_storage_region(path: UPathStr) -> str | None:
810
+ upath = UPath(path)
811
+
812
+ if upath.protocol != "s3":
813
+ return None
814
+
815
+ bucket = upath.drive
816
+
817
+ if bucket == "scverse-spatial-eu-central-1":
818
+ return "eu-central-1"
819
+ elif f"s3://{bucket}" in HOSTED_BUCKETS:
820
+ return bucket.replace("lamin-", "")
821
+
822
+ from botocore.exceptions import ClientError
823
+
824
+ if isinstance(path, str):
825
+ import botocore.session
826
+ from botocore.config import Config
827
+
828
+ path_part = path.replace("s3://", "")
829
+ # check for endpoint_url in the path string
830
+ if "?" in path_part:
831
+ path_part, query = _split_path_query(path_part)
832
+ endpoint_url = query.get("endpoint_url", [None])[0]
833
+ else:
834
+ endpoint_url = None
835
+ session = botocore.session.get_session()
836
+ credentials = session.get_credentials()
837
+ if credentials is None or credentials.access_key is None:
838
+ config = Config(signature_version=botocore.session.UNSIGNED)
839
+ else:
840
+ config = None
841
+ s3_client = session.create_client(
842
+ "s3", endpoint_url=endpoint_url, config=config
843
+ )
844
+ try:
845
+ response = s3_client.head_bucket(Bucket=bucket)
846
+ except ClientError as exc:
847
+ response = getattr(exc, "response", {})
848
+ if response.get("Error", {}).get("Code") == "404":
849
+ raise exc
850
+ else:
851
+ upath = get_aws_options_manager()._path_inject_options(upath, {})
852
+ try:
853
+ response = upath.fs.call_s3("head_bucket", Bucket=bucket)
854
+ except Exception as exc:
855
+ cause = getattr(exc, "__cause__", None)
856
+ if not isinstance(cause, ClientError):
857
+ raise exc
858
+ response = getattr(cause, "response", {})
859
+ if response.get("Error", {}).get("Code") == "404":
860
+ raise exc
861
+
862
+ region = (
863
+ response.get("ResponseMetadata", {})
864
+ .get("HTTPHeaders", {})
865
+ .get("x-amz-bucket-region", None)
866
+ )
867
+ return region
868
+
869
+
826
870
  def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
827
871
  upath = UPath(path).expanduser()
828
872
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 1.8.1
3
+ Version: 1.8.2
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10
@@ -1,8 +1,8 @@
1
- lamindb_setup/__init__.py,sha256=AxSsbRTefAmDAw_GNRIXygb7gedXU5pMXoRM2oXmvqY,2782
1
+ lamindb_setup/__init__.py,sha256=aROFrdb7k6cJGTETwMKBeRNp8LD5BurDRAfAp5lbAnM,2782
2
2
  lamindb_setup/_cache.py,sha256=5o749NuW6zi6uP4rmBtwxg7ifWpAHXVngzC0tEgXLgo,2776
3
3
  lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
4
4
  lamindb_setup/_check_setup.py,sha256=bXuqx2HEc178RM7gbKZQ65PEVJFu6uSOKiHAs_xz6GI,5575
5
- lamindb_setup/_connect_instance.py,sha256=wQARuWyPSc9V5gYSADi_GIrwkyxNlZmsQT8-SbDHh-8,12724
5
+ lamindb_setup/_connect_instance.py,sha256=PDvtAEHYJQVy-aMPNupN1u6PG9Rb_85JNKcjrOeHNy0,13478
6
6
  lamindb_setup/_delete.py,sha256=2KnZOqd5Kgr45XzjiDE9der35LODDUajZD6_hcurGtQ,5676
7
7
  lamindb_setup/_disconnect.py,sha256=p6tRLhixU4CuSxMKqzGTr-ovKmTRlZ8aID5dWQxOsg8,1092
8
8
  lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
@@ -25,16 +25,16 @@ lamindb_setup/core/_aws_options.py,sha256=JN6fJNcotdIuT-WkBRKDPdyDri9XmorEX2unbu
25
25
  lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
26
26
  lamindb_setup/core/_deprecated.py,sha256=HN7iUBdEgahw5e4NHCd1VJooUfieNb6GRzS5x8jU-q8,2549
27
27
  lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
28
- lamindb_setup/core/_hub_client.py,sha256=iMAEwhvhsI85DWZdNT9Vv1K-AlXHw5dmTFDZB3FyAKE,7718
29
- lamindb_setup/core/_hub_core.py,sha256=x77WpaPMr1uA1kJBSvM5JRNyRG0vYIVlxQbhjAdOhu8,23862
28
+ lamindb_setup/core/_hub_client.py,sha256=jICkfWW1eZoxh3ycviztBGqCJH53uVve5Xawbj8RZR4,8433
29
+ lamindb_setup/core/_hub_core.py,sha256=Jf7Wfu59XF3Q6S-GgF6osDToBinQsUa33n55P7Cq-TQ,23919
30
30
  lamindb_setup/core/_hub_crud.py,sha256=Jz0d8wFKM1Pv9B9byyUJPlCIMkIzk56Jd-c3Awpm9Xw,5730
31
31
  lamindb_setup/core/_hub_utils.py,sha256=6dyDGyzYFgVfR_lE3VN3CP1jGp98gxPtr-T91PAP05U,2687
32
32
  lamindb_setup/core/_private_django_api.py,sha256=By63l3vIEtK1pq246FhHq3tslxsaTJGKm5VakYluWp4,2656
33
- lamindb_setup/core/_settings.py,sha256=0la2eYAHcdMYophheGsgQaD2nJGQrPuX4jMV5GKDiC0,12946
34
- lamindb_setup/core/_settings_instance.py,sha256=ML5NUZzmDGPhEJdgK-gzQ4f9zwb0NeeXe13sgQrP-1A,20732
33
+ lamindb_setup/core/_settings.py,sha256=EtlxhtAdclS6rDRh5mrwh_q3gA7SJ1eF2rO1QtOYnnE,12949
34
+ lamindb_setup/core/_settings_instance.py,sha256=40ty37SbCCc6pufi2455s4LcMtCbYRLLLz6P4rYwmiU,21198
35
35
  lamindb_setup/core/_settings_load.py,sha256=JWd0_hBy04xjKo-tH4y8C9RkaywjrmoT0PsKzVme0n4,5176
36
36
  lamindb_setup/core/_settings_save.py,sha256=XZx-vow7BT6y3JpRBB2UOJp2vwc7jOGea4wSgOPqjPU,3262
37
- lamindb_setup/core/_settings_storage.py,sha256=wVwDDD51UsJz5gS6juERbWHCdyucP0R2DLaUGhnLUpQ,17079
37
+ lamindb_setup/core/_settings_storage.py,sha256=S9AvKLzJX0M_RsYcBKZB_P84CYtTY0hyeffYE3UqrQA,15478
38
38
  lamindb_setup/core/_settings_store.py,sha256=QmeWIGdIyq7UmjfHiEB_0xRD8hY-8-ZR2WntIKfwTKI,2714
39
39
  lamindb_setup/core/_settings_user.py,sha256=K2a6nQ0fhEiSb9mCY_p6ItNrHZ3J_j7EfO7CjZap9aA,1462
40
40
  lamindb_setup/core/_setup_bionty_sources.py,sha256=ox3X-SHiHa2lNPSWjwZhINypbLacX6kGwH6hVVrSFZc,1505
@@ -43,8 +43,8 @@ lamindb_setup/core/django.py,sha256=dOt1OkUnZeYOo-LTjatQWQFh_MnjRf9IwwvRZhCwdZQ,
43
43
  lamindb_setup/core/exceptions.py,sha256=qjMzqy_uzPA7mCOdnoWnS_fdA6OWbdZGftz-YYplrY0,84
44
44
  lamindb_setup/core/hashing.py,sha256=Y8Uc5uSGTfU6L2R_gb5w8DdHhGRog7RnkK-e9FEMjPY,3680
45
45
  lamindb_setup/core/types.py,sha256=T7NwspfRHgIIpYsXDcApks8jkOlGeGRW-YbVLB7jNIo,67
46
- lamindb_setup/core/upath.py,sha256=UDPcZaNp3WtZSYrX8hGQ1lq214dBn079bz0FPWFjIbA,34449
47
- lamindb_setup-1.8.1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
48
- lamindb_setup-1.8.1.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
49
- lamindb_setup-1.8.1.dist-info/METADATA,sha256=jV9tU71hkGNvtGWxfr_jcw9JgOnWgHkJWlolAHORMO4,1797
50
- lamindb_setup-1.8.1.dist-info/RECORD,,
46
+ lamindb_setup/core/upath.py,sha256=W47O9-Y205j29iWJ3RKKdomA587hGvoiD6_krASGFcM,35315
47
+ lamindb_setup-1.8.2.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
48
+ lamindb_setup-1.8.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
49
+ lamindb_setup-1.8.2.dist-info/METADATA,sha256=8JhQoVWB9z_rwHXtAaGzxZoeEvFy5EmSM_PyFPp2B5A,1797
50
+ lamindb_setup-1.8.2.dist-info/RECORD,,