lamindb_setup 0.70.0__py2.py3-none-any.whl → 0.71.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. lamindb_setup/__init__.py +15 -15
  2. lamindb_setup/_cache.py +4 -1
  3. lamindb_setup/_check.py +3 -0
  4. lamindb_setup/_check_setup.py +13 -7
  5. lamindb_setup/_close.py +2 -0
  6. lamindb_setup/_connect_instance.py +47 -26
  7. lamindb_setup/_delete.py +72 -40
  8. lamindb_setup/_django.py +4 -1
  9. lamindb_setup/_exportdb.py +4 -2
  10. lamindb_setup/_importdb.py +5 -1
  11. lamindb_setup/_init_instance.py +61 -45
  12. lamindb_setup/_migrate.py +16 -13
  13. lamindb_setup/_register_instance.py +10 -3
  14. lamindb_setup/_schema.py +6 -3
  15. lamindb_setup/_set_managed_storage.py +37 -0
  16. lamindb_setup/_setup_user.py +7 -7
  17. lamindb_setup/_silence_loggers.py +4 -2
  18. lamindb_setup/core/__init__.py +4 -3
  19. lamindb_setup/core/_aws_storage.py +3 -0
  20. lamindb_setup/core/_deprecated.py +2 -7
  21. lamindb_setup/core/_docs.py +2 -0
  22. lamindb_setup/core/_hub_client.py +12 -10
  23. lamindb_setup/core/_hub_core.py +203 -88
  24. lamindb_setup/core/_hub_crud.py +21 -12
  25. lamindb_setup/core/_hub_utils.py +11 -8
  26. lamindb_setup/core/_settings.py +23 -26
  27. lamindb_setup/core/_settings_instance.py +149 -81
  28. lamindb_setup/core/_settings_load.py +13 -7
  29. lamindb_setup/core/_settings_save.py +13 -8
  30. lamindb_setup/core/_settings_storage.py +76 -42
  31. lamindb_setup/core/_settings_store.py +4 -2
  32. lamindb_setup/core/_settings_user.py +10 -6
  33. lamindb_setup/core/_setup_bionty_sources.py +9 -2
  34. lamindb_setup/core/cloud_sqlite_locker.py +13 -10
  35. lamindb_setup/core/django.py +3 -1
  36. lamindb_setup/core/exceptions.py +4 -2
  37. lamindb_setup/core/hashing.py +15 -5
  38. lamindb_setup/core/types.py +5 -2
  39. lamindb_setup/core/upath.py +191 -88
  40. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/METADATA +6 -4
  41. lamindb_setup-0.71.1.dist-info/RECORD +43 -0
  42. lamindb_setup/_add_remote_storage.py +0 -50
  43. lamindb_setup-0.70.0.dist-info/RECORD +0 -43
  44. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/LICENSE +0 -0
  45. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/WHEEL +0 -0
@@ -4,21 +4,25 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import os
7
+ from collections import defaultdict
7
8
  from datetime import datetime, timezone
8
- import botocore.session
9
+ from functools import partial
10
+ from itertools import islice
9
11
  from pathlib import Path, PurePosixPath
10
- from typing import Literal, Dict
12
+ from typing import TYPE_CHECKING, Any, Literal
13
+
14
+ import botocore.session
11
15
  import fsspec
12
- from itertools import islice
13
- from typing import Optional, Set, Any, Tuple, List
14
- from collections import defaultdict
15
16
  from lamin_utils import logger
16
17
  from upath import UPath
17
- from upath.implementations.cloud import CloudPath, S3Path # noqa # keep CloudPath!
18
+ from upath.implementations.cloud import CloudPath, S3Path # keep CloudPath!
18
19
  from upath.implementations.local import LocalPath, PosixUPath, WindowsUPath
19
- from .types import UPathStr
20
+
20
21
  from .hashing import b16_to_b64, hash_md5s_from_dir
21
22
 
23
+ if TYPE_CHECKING:
24
+ from .types import UPathStr
25
+
22
26
  LocalPathClasses = (PosixUPath, WindowsUPath, LocalPath)
23
27
 
24
28
  # also see https://gist.github.com/securifera/e7eed730cbe1ce43d0c29d7cd2d582f4
@@ -52,12 +56,15 @@ VALID_SUFFIXES = {
52
56
  ".zarr",
53
57
  ".json",
54
58
  }
55
-
59
+ VALID_COMPOSITE_SUFFIXES = {
60
+ ".anndata.zarr",
61
+ ".spatialdata.zarr",
62
+ }
56
63
 
57
64
  TRAILING_SEP = (os.sep, os.altsep) if os.altsep is not None else os.sep
58
65
 
59
66
 
60
- def extract_suffix_from_path(path: Path, arg_name: Optional[str] = None) -> str:
67
+ def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
61
68
  def process_digits(suffix: str):
62
69
  if suffix[1:].isdigit(): # :1 to skip the dot
63
70
  return "" # digits are no valid suffixes
@@ -70,6 +77,12 @@ def extract_suffix_from_path(path: Path, arg_name: Optional[str] = None) -> str:
70
77
  total_suffix = "".join(path.suffixes)
71
78
  if total_suffix in VALID_SUFFIXES:
72
79
  return total_suffix
80
+ elif total_suffix.endswith(tuple(VALID_COMPOSITE_SUFFIXES)):
81
+ # below seems slow but OK for now
82
+ for suffix in VALID_COMPOSITE_SUFFIXES:
83
+ if total_suffix.endswith(suffix):
84
+ break
85
+ return suffix
73
86
  else:
74
87
  print_hint = True
75
88
  arg_name = "file" if arg_name is None else arg_name # for the warning
@@ -141,44 +154,100 @@ def create_mapper(
141
154
  )
142
155
 
143
156
 
144
- def print_hook(size: int, value: int, **kwargs):
157
+ def print_hook(size: int, value: int, objectname: str, action: str):
145
158
  progress_in_percent = (value / size) * 100
146
- out = (
147
- f"... {kwargs['action']} {Path(kwargs['filepath']).name}:"
148
- f" {min(progress_in_percent, 100):4.1f}%"
149
- )
150
- if progress_in_percent >= 100:
151
- out += "\n"
159
+ out = f"... {action} {objectname}:" f" {min(progress_in_percent, 100):4.1f}%"
152
160
  if "NBPRJ_TEST_NBPATH" not in os.environ:
153
161
  print(out, end="\r")
154
162
 
155
163
 
156
164
  class ProgressCallback(fsspec.callbacks.Callback):
157
- def __init__(self, action: Literal["uploading", "downloading"]):
165
+ def __init__(
166
+ self,
167
+ objectname: str,
168
+ action: Literal["uploading", "downloading", "synchronizing"],
169
+ adjust_size: bool = False,
170
+ ):
171
+ assert action in {"uploading", "downloading", "synchronizing"}
172
+
158
173
  super().__init__()
174
+
159
175
  self.action = action
176
+ print_progress = partial(print_hook, objectname=objectname, action=action)
177
+ self.hooks = {"print_progress": print_progress}
178
+
179
+ self.adjust_size = adjust_size
180
+
181
+ def absolute_update(self, value):
182
+ pass
183
+
184
+ def relative_update(self, inc=1):
185
+ pass
186
+
187
+ def update_relative_value(self, inc=1):
188
+ self.value += inc
189
+ self.call()
160
190
 
161
191
  def branch(self, path_1, path_2, kwargs):
162
- kwargs["callback"] = fsspec.callbacks.Callback(
163
- hooks=dict(print_hook=print_hook), filepath=path_1, action=self.action
164
- )
192
+ if self.adjust_size:
193
+ if Path(path_2 if self.action != "uploading" else path_1).is_dir():
194
+ self.size -= 1
195
+ kwargs["callback"] = ChildProgressCallback(self)
196
+
197
+ def branched(self, path_1, path_2, **kwargs):
198
+ self.branch(path_1, path_2, kwargs)
199
+ return kwargs["callback"]
200
+
201
+ def wrap(self, iterable):
202
+ if self.adjust_size:
203
+ paths = []
204
+ for lpath, rpath in iterable:
205
+ paths.append((lpath, rpath))
206
+ if Path(lpath).is_dir():
207
+ self.size -= 1
208
+ self.adjust_size = False
209
+ return paths
210
+ else:
211
+ return iterable
212
+
213
+ @classmethod
214
+ def requires_progress(
215
+ cls,
216
+ maybe_callback: fsspec.callbacks.Callback | None,
217
+ print_progress: bool,
218
+ objectname: str,
219
+ action: Literal["uploading", "downloading", "synchronizing"],
220
+ **kwargs,
221
+ ):
222
+ if maybe_callback is None:
223
+ if print_progress:
224
+ return cls(objectname, action, **kwargs)
225
+ else:
226
+ return fsspec.callbacks.NoOpCallback()
227
+ return maybe_callback
165
228
 
166
- def call(self, *args, **kwargs):
167
- return None
229
+
230
+ class ChildProgressCallback(fsspec.callbacks.Callback):
231
+ def __init__(self, parent: ProgressCallback):
232
+ super().__init__()
233
+
234
+ self.parent = parent
235
+
236
+ def parent_update(self, inc=1):
237
+ self.parent.update_relative_value(inc)
238
+
239
+ def relative_update(self, inc=1):
240
+ self.parent_update(inc / self.size)
168
241
 
169
242
 
170
243
  def download_to(self, path: UPathStr, print_progress: bool = False, **kwargs):
171
244
  """Download to a path."""
172
- if print_progress:
173
- # can't do path.is_dir() because path doesn't exist
174
- # so assume any destination without a suffix is a dir
175
- # this is temporary until we have a proper progress bar for directories
176
- if os.path.splitext(path)[-1] not in {"", ".zrad", ".zarr"}:
177
- cb = ProgressCallback("downloading")
178
- else:
179
- # todo: make proper progress bar for directories
180
- cb = fsspec.callbacks.NoOpCallback()
181
- kwargs["callback"] = cb
245
+ if print_progress and "callback" not in kwargs:
246
+ callback = ProgressCallback(
247
+ PurePosixPath(path).name, "downloading", adjust_size=True
248
+ )
249
+ kwargs["callback"] = callback
250
+
182
251
  self.fs.download(str(self), str(path), **kwargs)
183
252
 
184
253
 
@@ -190,20 +259,16 @@ def upload_from(
190
259
  **kwargs,
191
260
  ):
192
261
  """Upload from a local path."""
193
- path_is_dir = os.path.isdir(path)
262
+ path = Path(path)
263
+ path_is_dir = path.is_dir()
194
264
  if not path_is_dir:
195
265
  dir_inplace = False
196
266
 
197
- if print_progress:
198
- if not path_is_dir:
199
- cb = ProgressCallback("uploading")
200
- else:
201
- # todo: make proper progress bar for directories
202
- cb = fsspec.callbacks.NoOpCallback()
203
- kwargs["callback"] = cb
267
+ if print_progress and "callback" not in kwargs:
268
+ callback = ProgressCallback(path.name, "uploading")
269
+ kwargs["callback"] = callback
204
270
 
205
271
  if dir_inplace:
206
- path = Path(path)
207
272
  source = [f for f in path.rglob("*") if f.is_file()]
208
273
  destination = [str(self / f.relative_to(path)) for f in source]
209
274
  source = [str(f) for f in source] # type: ignore
@@ -233,7 +298,14 @@ def upload_from(
233
298
  del self.fs.dircache[bucket]
234
299
 
235
300
 
236
- def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
301
+ def synchronize(
302
+ self,
303
+ objectpath: Path,
304
+ error_no_origin: bool = True,
305
+ print_progress: bool = False,
306
+ callback: fsspec.callbacks.Callback | None = None,
307
+ **kwargs,
308
+ ):
237
309
  """Sync to a local destination path."""
238
310
  # optimize the number of network requests
239
311
  if "timestamp" in kwargs:
@@ -292,15 +364,23 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
292
364
  destination_exists = False
293
365
  need_synchronize = True
294
366
  if need_synchronize:
367
+ callback = ProgressCallback.requires_progress(
368
+ callback, print_progress, objectpath.name, "synchronizing"
369
+ )
370
+ callback.set_size(len(files))
295
371
  origin_file_keys = []
296
- for file, stat in files.items():
297
- destination = PurePosixPath(file).relative_to(self.path)
298
- origin_file_keys.append(destination.as_posix())
372
+ for file, stat in callback.wrap(files.items()):
373
+ file_key = PurePosixPath(file).relative_to(self.path)
374
+ origin_file_keys.append(file_key.as_posix())
299
375
  timestamp = stat[modified_key].timestamp()
300
- origin = UPath(f"{self.protocol}://{file}", **self._kwargs)
301
- origin.synchronize(
302
- objectpath / destination, timestamp=timestamp, **kwargs
376
+
377
+ origin = f"{self.protocol}://{file}"
378
+ destination = objectpath / file_key
379
+ child = callback.branched(origin, destination.as_posix())
380
+ UPath(origin, **self._kwargs).synchronize(
381
+ destination, timestamp=timestamp, callback=child, **kwargs
303
382
  )
383
+ child.close()
304
384
  if destination_exists:
305
385
  local_files = [file for file in objectpath.rglob("*") if file.is_file()]
306
386
  if len(local_files) > len(files):
@@ -316,6 +396,10 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
316
396
  return None
317
397
 
318
398
  # synchronization logic for files
399
+ callback = ProgressCallback.requires_progress(
400
+ callback, print_progress, objectpath.name, "synchronizing"
401
+ )
402
+ kwargs["callback"] = callback
319
403
  if objectpath.exists():
320
404
  local_mts = objectpath.stat().st_mtime # type: ignore
321
405
  need_synchronize = cloud_mts > local_mts
@@ -325,9 +409,13 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
325
409
  if need_synchronize:
326
410
  self.download_to(objectpath, **kwargs)
327
411
  os.utime(objectpath, times=(cloud_mts, cloud_mts))
412
+ else:
413
+ # nothing happens if parent_update is not defined
414
+ # because of Callback.no_op
415
+ callback.parent_update()
328
416
 
329
417
 
330
- def modified(self) -> Optional[datetime]:
418
+ def modified(self) -> datetime | None:
331
419
  """Return modified time stamp."""
332
420
  mtime = self.fs.modified(str(self))
333
421
  if mtime.tzinfo is None:
@@ -340,15 +428,15 @@ def compute_file_tree(
340
428
  *,
341
429
  level: int = -1,
342
430
  only_dirs: bool = False,
343
- limit: int = 1000,
344
- include_paths: Optional[Set[Any]] = None,
345
- skip_suffixes: Optional[List[str]] = None,
346
- ) -> Tuple[str, int]:
431
+ n_max_files_per_dir_and_type: int = 100,
432
+ n_max_files: int = 1000,
433
+ include_paths: set[Any] | None = None,
434
+ skip_suffixes: list[str] | None = None,
435
+ ) -> tuple[str, int]:
347
436
  space = " "
348
437
  branch = "│ "
349
438
  tee = "├── "
350
439
  last = "└── "
351
- max_files_per_dir_per_type = 7
352
440
  if skip_suffixes is None:
353
441
  skip_suffixes_tuple = ()
354
442
  else:
@@ -382,14 +470,14 @@ def compute_file_tree(
382
470
  if only_dirs:
383
471
  contents = [d for d in contents if d.is_dir()]
384
472
  pointers = [tee] * (len(contents) - 1) + [last]
385
- n_files_per_dir_per_type = defaultdict(lambda: 0) # type: ignore
386
- for pointer, child_path in zip(pointers, contents):
473
+ n_files_per_dir_and_type = defaultdict(lambda: 0) # type: ignore
474
+ for pointer, child_path in zip(pointers, contents, strict=False): # type: ignore
387
475
  if child_path.is_dir():
388
476
  if include_dirs and child_path not in include_dirs:
389
477
  continue
390
478
  yield prefix + pointer + child_path.name
391
479
  n_directories += 1
392
- n_files_per_dir_per_type = defaultdict(lambda: 0)
480
+ n_files_per_dir_and_type = defaultdict(lambda: 0)
393
481
  extension = branch if pointer == tee else space
394
482
  yield from inner(child_path, prefix=prefix + extension, level=level - 1)
395
483
  elif not only_dirs:
@@ -397,21 +485,21 @@ def compute_file_tree(
397
485
  continue
398
486
  suffix = extract_suffix_from_path(child_path)
399
487
  suffixes.add(suffix)
400
- n_files_per_dir_per_type[suffix] += 1
488
+ n_files_per_dir_and_type[suffix] += 1
401
489
  n_objects += 1
402
- if n_files_per_dir_per_type[suffix] == max_files_per_dir_per_type:
490
+ if n_files_per_dir_and_type[suffix] == n_max_files_per_dir_and_type:
403
491
  yield prefix + "..."
404
- elif n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type:
492
+ elif n_files_per_dir_and_type[suffix] > n_max_files_per_dir_and_type:
405
493
  continue
406
494
  else:
407
495
  yield prefix + pointer + child_path.name
408
496
 
409
497
  folder_tree = ""
410
498
  iterator = inner(path, level=level)
411
- for line in islice(iterator, limit):
499
+ for line in islice(iterator, n_max_files):
412
500
  folder_tree += f"\n{line}"
413
501
  if next(iterator, None):
414
- folder_tree += f"\n... only showing {limit} out of {n_objects} files"
502
+ folder_tree += f"\n... only showing {n_max_files} out of {n_objects} files"
415
503
  directory_info = "directory" if n_directories == 1 else "directories"
416
504
  display_suffixes = ", ".join([f"{suffix!r}" for suffix in suffixes])
417
505
  suffix_message = f" with suffixes {display_suffixes}" if n_objects > 0 else ""
@@ -426,11 +514,12 @@ def compute_file_tree(
426
514
  def view_tree(
427
515
  path: Path,
428
516
  *,
429
- level: int = -1,
517
+ level: int = 2,
430
518
  only_dirs: bool = False,
431
- limit: int = 1000,
432
- include_paths: Optional[Set[Any]] = None,
433
- skip_suffixes: Optional[List[str]] = None,
519
+ n_max_files_per_dir_and_type: int = 100,
520
+ n_max_files: int = 1000,
521
+ include_paths: set[Any] | None = None,
522
+ skip_suffixes: list[str] | None = None,
434
523
  ) -> None:
435
524
  """Print a visual tree structure of files & directories.
436
525
 
@@ -438,7 +527,7 @@ def view_tree(
438
527
  level: If `1`, only iterate through one level, if `2` iterate through 2
439
528
  levels, if `-1` iterate through entire hierarchy.
440
529
  only_dirs: Only iterate through directories.
441
- limit: Display limit. Will only show this many files. Doesn't affect count.
530
+ n_max_files: Display limit. Will only show this many files. Doesn't affect count.
442
531
  include_paths: Restrict to these paths.
443
532
  skip_suffixes: Skip directories with these suffixes.
444
533
 
@@ -472,7 +561,8 @@ def view_tree(
472
561
  path,
473
562
  level=level,
474
563
  only_dirs=only_dirs,
475
- limit=limit,
564
+ n_max_files=n_max_files,
565
+ n_max_files_per_dir_and_type=n_max_files_per_dir_and_type,
476
566
  include_paths=include_paths,
477
567
  skip_suffixes=skip_suffixes,
478
568
  )
@@ -497,9 +587,10 @@ def to_url(upath):
497
587
  bucket = upath._url.netloc
498
588
  if bucket == "scverse-spatial-eu-central-1":
499
589
  region = "eu-central-1"
500
- elif f"s3://{bucket}" not in hosted_buckets:
501
- metadata = upath.fs.call_s3("head_bucket", Bucket=upath._url.netloc)
502
- region = metadata["BucketRegion"]
590
+ elif f"s3://{bucket}" not in HOSTED_BUCKETS:
591
+ response = upath.fs.call_s3("head_bucket", Bucket=upath._url.netloc)
592
+ headers = response["ResponseMetadata"]["HTTPHeaders"]
593
+ region = headers.get("x-amz-bucket-region")
503
594
  else:
504
595
  region = bucket.replace("lamin_", "")
505
596
  if region == "us-east-1":
@@ -578,7 +669,7 @@ def convert_pathlike(pathlike: UPathStr) -> UPath:
578
669
  return path
579
670
 
580
671
 
581
- hosted_regions = [
672
+ HOSTED_REGIONS = [
582
673
  "eu-central-1",
583
674
  "eu-west-2",
584
675
  "us-east-1",
@@ -588,16 +679,16 @@ hosted_regions = [
588
679
  ]
589
680
  lamin_env = os.getenv("LAMIN_ENV")
590
681
  if lamin_env is None or lamin_env == "prod":
591
- hosted_buckets_list = [f"s3://lamin-{region}" for region in hosted_regions]
682
+ hosted_buckets_list = [f"s3://lamin-{region}" for region in HOSTED_REGIONS]
592
683
  hosted_buckets_list.append("s3://scverse-spatial-eu-central-1")
593
- hosted_buckets = tuple(hosted_buckets_list)
684
+ HOSTED_BUCKETS = tuple(hosted_buckets_list)
594
685
  else:
595
- hosted_buckets = ("s3://lamin-hosted-test",) # type: ignore
596
- credentials_cache: Dict[str, Dict[str, str]] = {}
686
+ HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
687
+ credentials_cache: dict[str, dict[str, str]] = {}
597
688
  AWS_CREDENTIALS_PRESENT = None
598
689
 
599
690
 
600
- def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
691
+ def create_path(path: UPath, access_token: str | None = None) -> UPath:
601
692
  path = convert_pathlike(path)
602
693
  # test whether we have an AWS S3 path
603
694
  if not isinstance(path, S3Path):
@@ -611,9 +702,8 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
611
702
  if path.fs.key is not None and path.fs.secret is not None:
612
703
  anon = False
613
704
  else:
614
- # we can do
615
- # path.fs.connect()
616
- # and check path.fs.session._credentials, but it is slower
705
+ # we could do path.fs.connect()
706
+ # and check path.fs.session._credentials, but it'd be slower
617
707
  session = botocore.session.get_session()
618
708
  credentials = session.get_credentials()
619
709
  if credentials is None or credentials.access_key is None:
@@ -625,7 +715,7 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
625
715
 
626
716
  # test whether we are on hosted storage or not
627
717
  path_str = path.as_posix()
628
- is_hosted_storage = path_str.startswith(hosted_buckets)
718
+ is_hosted_storage = path_str.startswith(HOSTED_BUCKETS)
629
719
 
630
720
  if not is_hosted_storage:
631
721
  # make anon request if no credentials present
@@ -652,7 +742,7 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
652
742
  )
653
743
 
654
744
 
655
- def get_stat_file_cloud(stat: Dict) -> Tuple[int, str, str]:
745
+ def get_stat_file_cloud(stat: dict) -> tuple[int, str, str]:
656
746
  size = stat["size"]
657
747
  # small files
658
748
  if "-" not in stat["ETag"]:
@@ -669,7 +759,7 @@ def get_stat_file_cloud(stat: Dict) -> Tuple[int, str, str]:
669
759
  return size, hash, hash_type
670
760
 
671
761
 
672
- def get_stat_dir_cloud(path: UPath) -> Tuple[int, str, str, int]:
762
+ def get_stat_dir_cloud(path: UPath) -> tuple[int, str, str, int]:
673
763
  sizes = []
674
764
  md5s = []
675
765
  objects = path.fs.find(path.as_posix(), detail=True)
@@ -701,9 +791,18 @@ def check_storage_is_empty(
701
791
  # since path.fs.find raises a PermissionError on empty hosted
702
792
  # subdirectories (see lamindb_setup/core/_settings_storage/init_storage).
703
793
  n_offset_objects = 1 # because of touched dummy file, see mark_storage_root()
704
- if account_for_sqlite_file:
705
- n_offset_objects += 1 # because of SQLite file
706
- objects = root_upath.fs.find(root_string)
794
+ if root_string.startswith(HOSTED_BUCKETS):
795
+ # in hosted buckets, count across entire root
796
+ directory_string = root_string
797
+ # the SQLite file is not in the ".lamindb" directory
798
+ if account_for_sqlite_file:
799
+ n_offset_objects += 1 # because of SQLite file
800
+ else:
801
+ # in any other storage location, only count in .lamindb
802
+ if not root_string.endswith("/"):
803
+ root_string += "/"
804
+ directory_string = root_string + ".lamindb"
805
+ objects = root_upath.fs.find(directory_string)
707
806
  n_objects = len(objects)
708
807
  n_diff = n_objects - n_offset_objects
709
808
  ask_for_deletion = (
@@ -711,9 +810,13 @@ def check_storage_is_empty(
711
810
  if raise_error
712
811
  else "consider deleting them"
713
812
  )
813
+ hint = "'./lamindb/_is_initialized' "
814
+ if n_offset_objects == 2:
815
+ hint += "& SQLite file"
816
+ hint += " ignored"
714
817
  message = (
715
- f"Storage location contains {n_objects} objects "
716
- f"({n_offset_objects} ignored) - {ask_for_deletion}\n{objects}"
818
+ f"Storage {directory_string} contains {n_objects} objects "
819
+ f"({hint}) - {ask_for_deletion}\n{objects}"
717
820
  )
718
821
  if n_diff > 0:
719
822
  if raise_error:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb_setup
3
- Version: 0.70.0
3
+ Version: 0.71.1
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <laminlabs@gmail.com>
6
6
  Description-Content-Type: text/markdown
@@ -14,7 +14,9 @@ Requires-Dist: requests
14
14
  Requires-Dist: universal_pathlib==0.1.4
15
15
  Requires-Dist: botocore<2.0.0
16
16
  Requires-Dist: supabase==2.2.1
17
- Requires-Dist: s3fs ; extra == "aws"
17
+ Requires-Dist: urllib3<2 ; extra == "aws"
18
+ Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
19
+ Requires-Dist: s3fs>=2023.12.2,<=2024.3.1 ; extra == "aws"
18
20
  Requires-Dist: pyjwt<3.0.0 ; extra == "dev"
19
21
  Requires-Dist: psycopg2-binary ; extra == "dev"
20
22
  Requires-Dist: python-dotenv ; extra == "dev"
@@ -25,12 +27,12 @@ Requires-Dist: pytest-xdist ; extra == "dev"
25
27
  Requires-Dist: nbproject-test>=0.4.3 ; extra == "dev"
26
28
  Requires-Dist: pandas ; extra == "dev"
27
29
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
28
- Requires-Dist: faker ; extra == "hub"
30
+ Requires-Dist: gcsfs>=2023.12.2,<=2024.3.1 ; extra == "gcp"
29
31
  Project-URL: Home, https://github.com/laminlabs/lamindb-setup
30
32
  Provides-Extra: aws
31
33
  Provides-Extra: dev
32
34
  Provides-Extra: erdiagram
33
- Provides-Extra: hub
35
+ Provides-Extra: gcp
34
36
 
35
37
  [![codecov](https://codecov.io/gh/laminlabs/lamindb-setup/branch/main/graph/badge.svg)](https://codecov.io/gh/laminlabs/lamindb-setup)
36
38
 
@@ -0,0 +1,43 @@
1
+ lamindb_setup/__init__.py,sha256=EIyvRHHlYOX7WsAFGdKp63ho-ceFW05z_n_GV1oJkaU,1542
2
+ lamindb_setup/_cache.py,sha256=wA7mbysANwe8hPNbjDo9bOmXJ0xIyaS5iyxIpxSWji4,846
3
+ lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
4
+ lamindb_setup/_check_setup.py,sha256=cNEL9Q4yPpmEkGKHH8JgullWl1VUZwALJ4RHn9wZypY,2613
5
+ lamindb_setup/_close.py,sha256=1QS9p2SCacgovYn6xqWU4zFvwHN1RgIccvzwJgFvKgU,1186
6
+ lamindb_setup/_connect_instance.py,sha256=c0qO0dn4hnEc-toRh6pxFaAxkhzM9LVN-72XK11JtAU,12616
7
+ lamindb_setup/_delete.py,sha256=hf8zfVJfW74QR7eK4xJNQ6HbkkZBsl5eTqj-Ni5jPo0,7232
8
+ lamindb_setup/_django.py,sha256=EoyWvFzH0i9wxjy4JZhcoXCTckztP_Mrl6FbYQnMmLE,1534
9
+ lamindb_setup/_exportdb.py,sha256=uTIZjKKTB7arzEr1j0O6lONiT2pRBKeOFdLvOV8ZwzE,2120
10
+ lamindb_setup/_importdb.py,sha256=yYYShzUajTsR-cTW4CZ-UNDWZY2uE5PAgNbp-wn8Ogc,1874
11
+ lamindb_setup/_init_instance.py,sha256=Hy4PsPpXCdl5ik3Q0ODltVbbvYjAqnLnfFza2ai8nX8,11921
12
+ lamindb_setup/_migrate.py,sha256=4nBTFg5-BK4A2gH-D3_tcFf8EtvMnIo5Mq0e_C6_9-U,8815
13
+ lamindb_setup/_register_instance.py,sha256=Jeu0wyvJVSVQ_n-A_7yn7xOZIP0ncJD92DRABqzPIjA,940
14
+ lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
15
+ lamindb_setup/_set_managed_storage.py,sha256=BUUJzKNWNEA5KnKnFZsas0ANU6w-LBZL-CKRu-sNLPE,1268
16
+ lamindb_setup/_setup_user.py,sha256=6Oc7Rke-yRQSZbuntdUAz8QbJ6UuPzYHI9FnYlf_q-A,3670
17
+ lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
18
+ lamindb_setup/core/__init__.py,sha256=dV9S-rQpNK9JcBn4hiEmiLnmNqfpPFJD9pqagMCaIew,416
19
+ lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
20
+ lamindb_setup/core/_deprecated.py,sha256=3qxUI1dnDlSeR0BYrv7ucjqRBEojbqotPgpShXs4KF8,2520
21
+ lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
22
+ lamindb_setup/core/_hub_client.py,sha256=V0qKDsCdRn-tQy2YIk4VgXcpJFmuum6N3gcavAC7gBQ,5504
23
+ lamindb_setup/core/_hub_core.py,sha256=FpEXPqSHXAgYegyQmsma54S3bv_mtceXrrpHXa7UeKE,15970
24
+ lamindb_setup/core/_hub_crud.py,sha256=b1XF7AJpM9Q-ttm9nPG-r3OTRWHQaGzAGIyvmb83NTo,4859
25
+ lamindb_setup/core/_hub_utils.py,sha256=b_M1LkdCjiMWm1EOlSb9GuPdLijwVgQDtATTpeZuXI0,1875
26
+ lamindb_setup/core/_settings.py,sha256=jjZ_AxRXB3Y3UP6m04BAw_dhFbJbdg2-nZWmEv2LNZ8,3141
27
+ lamindb_setup/core/_settings_instance.py,sha256=RFUcnBBUp303dbVEHcAaIm_q7lzlWg56OrKLwdam8Pg,16588
28
+ lamindb_setup/core/_settings_load.py,sha256=NGgCDpN85j1EqoKlrYFIlZBMlBJm33gx2-wc96CP_ZQ,3922
29
+ lamindb_setup/core/_settings_save.py,sha256=d1A-Ex-7H08mb8l7I0Oe0j0GilrfaDuprh_NMxhQAsQ,2704
30
+ lamindb_setup/core/_settings_storage.py,sha256=VgsqdIImQRfOZ6FGNY6DLVohaSxerj_F-sWtjD9hzcs,12382
31
+ lamindb_setup/core/_settings_store.py,sha256=dagS5c7wAMRnuZTRfCU4sKaIOyF_HwAP5Fnnn8vphno,2084
32
+ lamindb_setup/core/_settings_user.py,sha256=P2lC4WDRAFfT-Xq3MlXJ-wMKIHCoGNhMTQfRGIAyUNQ,1344
33
+ lamindb_setup/core/_setup_bionty_sources.py,sha256=OgPpZxN2_Wffy-ogEBz_97c_k8d2bD-DDVt89-u9GLY,3002
34
+ lamindb_setup/core/cloud_sqlite_locker.py,sha256=NIBNAGq7TTRrip9OzMdiQKj8QOuwhL9esyM0aehUqBA,6893
35
+ lamindb_setup/core/django.py,sha256=m0AKg2lJ1EYCtEtZ8frFFJbAR9qX0gnFcgqp7aeC2k0,3450
36
+ lamindb_setup/core/exceptions.py,sha256=eoI7AXgATgDVzgArtN7CUvpaMUC067vsBg5LHCsWzDM,305
37
+ lamindb_setup/core/hashing.py,sha256=mv9UCvAsSrdHYQAv3Kz7UOvjd5tIjvDTIYv_ettBuVY,2218
38
+ lamindb_setup/core/types.py,sha256=bcYnZ0uM_2NXKJCl94Mmc-uYrQlRUUVKG3sK2N-F-N4,532
39
+ lamindb_setup/core/upath.py,sha256=XBiHm-gxtfDIHnQmH5WjjmZzmAg5S421fjAfRrEg710,28286
40
+ lamindb_setup-0.71.1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
41
+ lamindb_setup-0.71.1.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
42
+ lamindb_setup-0.71.1.dist-info/METADATA,sha256=fijK20hR12pIxHzmECdbyguUSsHvVjIYTmfumfuLReQ,1620
43
+ lamindb_setup-0.71.1.dist-info/RECORD,,
@@ -1,50 +0,0 @@
1
- from lamin_utils import logger
2
-
3
- from lamindb_setup.core.types import UPathStr
4
-
5
- from ._init_instance import register_user_and_storage
6
- from .core._settings import settings
7
- from .core._settings_instance import InstanceSettings
8
- from .core._settings_storage import StorageSettings
9
-
10
-
11
- def switch_default_storage(root: UPathStr, **fs_kwargs):
12
- """Add a remote default storage location to a local instance.
13
-
14
- This can be used to selectively share data.
15
-
16
- Args:
17
- root: `UPathStr` - The new storage root, e.g., an S3 bucket.
18
- **fs_kwargs: Additional fsspec arguments for cloud root, e.g., profile.
19
-
20
- Example:
21
- >>> ln.setup.set.storage(
22
- >>> "s3://some-bucket",
23
- >>> profile="some_profile", # fsspec arg
24
- >>> cache_regions=True # fsspec arg for s3
25
- >>> )
26
-
27
- """
28
- if settings.instance.dialect == "sqlite":
29
- logger.error("can't set storage for sqlite instances.")
30
- return "set-storage-failed"
31
- ssettings = StorageSettings(root=root)
32
- new_isettings = InstanceSettings(
33
- owner=settings.instance.owner,
34
- name=settings.instance.name,
35
- storage=ssettings,
36
- db=settings.instance.db,
37
- schema=settings.instance._schema_str,
38
- id=settings.instance.id,
39
- )
40
-
41
- new_isettings._persist() # this also updates the settings object
42
- register_user_and_storage(new_isettings, settings.user)
43
- # we are not doing this for now because of difficulties to define the right RLS policy # noqa
44
- # https://laminlabs.slack.com/archives/C04FPE8V01W/p1687948324601929?thread_ts=1687531921.394119&cid=C04FPE8V01W
45
- # if settings.instance.is_remote:
46
- # init_storage_hub(
47
- # root, account_handle=settings.instance.owner # type: ignore
48
- # )
49
-
50
- settings.storage._set_fs_kwargs(**fs_kwargs)