lamindb_setup 0.70.0__py2.py3-none-any.whl → 0.71.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +15 -15
- lamindb_setup/_cache.py +4 -1
- lamindb_setup/_check.py +3 -0
- lamindb_setup/_check_setup.py +13 -7
- lamindb_setup/_close.py +2 -0
- lamindb_setup/_connect_instance.py +47 -26
- lamindb_setup/_delete.py +72 -40
- lamindb_setup/_django.py +4 -1
- lamindb_setup/_exportdb.py +4 -2
- lamindb_setup/_importdb.py +5 -1
- lamindb_setup/_init_instance.py +61 -45
- lamindb_setup/_migrate.py +16 -13
- lamindb_setup/_register_instance.py +10 -3
- lamindb_setup/_schema.py +6 -3
- lamindb_setup/_set_managed_storage.py +37 -0
- lamindb_setup/_setup_user.py +7 -7
- lamindb_setup/_silence_loggers.py +4 -2
- lamindb_setup/core/__init__.py +4 -3
- lamindb_setup/core/_aws_storage.py +3 -0
- lamindb_setup/core/_deprecated.py +2 -7
- lamindb_setup/core/_docs.py +2 -0
- lamindb_setup/core/_hub_client.py +12 -10
- lamindb_setup/core/_hub_core.py +203 -88
- lamindb_setup/core/_hub_crud.py +21 -12
- lamindb_setup/core/_hub_utils.py +11 -8
- lamindb_setup/core/_settings.py +23 -26
- lamindb_setup/core/_settings_instance.py +149 -81
- lamindb_setup/core/_settings_load.py +13 -7
- lamindb_setup/core/_settings_save.py +13 -8
- lamindb_setup/core/_settings_storage.py +76 -42
- lamindb_setup/core/_settings_store.py +4 -2
- lamindb_setup/core/_settings_user.py +10 -6
- lamindb_setup/core/_setup_bionty_sources.py +9 -2
- lamindb_setup/core/cloud_sqlite_locker.py +13 -10
- lamindb_setup/core/django.py +3 -1
- lamindb_setup/core/exceptions.py +4 -2
- lamindb_setup/core/hashing.py +15 -5
- lamindb_setup/core/types.py +5 -2
- lamindb_setup/core/upath.py +191 -88
- {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/METADATA +6 -4
- lamindb_setup-0.71.1.dist-info/RECORD +43 -0
- lamindb_setup/_add_remote_storage.py +0 -50
- lamindb_setup-0.70.0.dist-info/RECORD +0 -43
- {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/LICENSE +0 -0
- {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.1.dist-info}/WHEEL +0 -0
lamindb_setup/core/upath.py
CHANGED
|
@@ -4,21 +4,25 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
+
from collections import defaultdict
|
|
7
8
|
from datetime import datetime, timezone
|
|
8
|
-
import
|
|
9
|
+
from functools import partial
|
|
10
|
+
from itertools import islice
|
|
9
11
|
from pathlib import Path, PurePosixPath
|
|
10
|
-
from typing import
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
13
|
+
|
|
14
|
+
import botocore.session
|
|
11
15
|
import fsspec
|
|
12
|
-
from itertools import islice
|
|
13
|
-
from typing import Optional, Set, Any, Tuple, List
|
|
14
|
-
from collections import defaultdict
|
|
15
16
|
from lamin_utils import logger
|
|
16
17
|
from upath import UPath
|
|
17
|
-
from upath.implementations.cloud import CloudPath, S3Path #
|
|
18
|
+
from upath.implementations.cloud import CloudPath, S3Path # keep CloudPath!
|
|
18
19
|
from upath.implementations.local import LocalPath, PosixUPath, WindowsUPath
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
from .hashing import b16_to_b64, hash_md5s_from_dir
|
|
21
22
|
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from .types import UPathStr
|
|
25
|
+
|
|
22
26
|
LocalPathClasses = (PosixUPath, WindowsUPath, LocalPath)
|
|
23
27
|
|
|
24
28
|
# also see https://gist.github.com/securifera/e7eed730cbe1ce43d0c29d7cd2d582f4
|
|
@@ -52,12 +56,15 @@ VALID_SUFFIXES = {
|
|
|
52
56
|
".zarr",
|
|
53
57
|
".json",
|
|
54
58
|
}
|
|
55
|
-
|
|
59
|
+
VALID_COMPOSITE_SUFFIXES = {
|
|
60
|
+
".anndata.zarr",
|
|
61
|
+
".spatialdata.zarr",
|
|
62
|
+
}
|
|
56
63
|
|
|
57
64
|
TRAILING_SEP = (os.sep, os.altsep) if os.altsep is not None else os.sep
|
|
58
65
|
|
|
59
66
|
|
|
60
|
-
def extract_suffix_from_path(path: Path, arg_name:
|
|
67
|
+
def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
|
|
61
68
|
def process_digits(suffix: str):
|
|
62
69
|
if suffix[1:].isdigit(): # :1 to skip the dot
|
|
63
70
|
return "" # digits are no valid suffixes
|
|
@@ -70,6 +77,12 @@ def extract_suffix_from_path(path: Path, arg_name: Optional[str] = None) -> str:
|
|
|
70
77
|
total_suffix = "".join(path.suffixes)
|
|
71
78
|
if total_suffix in VALID_SUFFIXES:
|
|
72
79
|
return total_suffix
|
|
80
|
+
elif total_suffix.endswith(tuple(VALID_COMPOSITE_SUFFIXES)):
|
|
81
|
+
# below seems slow but OK for now
|
|
82
|
+
for suffix in VALID_COMPOSITE_SUFFIXES:
|
|
83
|
+
if total_suffix.endswith(suffix):
|
|
84
|
+
break
|
|
85
|
+
return suffix
|
|
73
86
|
else:
|
|
74
87
|
print_hint = True
|
|
75
88
|
arg_name = "file" if arg_name is None else arg_name # for the warning
|
|
@@ -141,44 +154,100 @@ def create_mapper(
|
|
|
141
154
|
)
|
|
142
155
|
|
|
143
156
|
|
|
144
|
-
def print_hook(size: int, value: int,
|
|
157
|
+
def print_hook(size: int, value: int, objectname: str, action: str):
|
|
145
158
|
progress_in_percent = (value / size) * 100
|
|
146
|
-
out = (
|
|
147
|
-
f"... {kwargs['action']} {Path(kwargs['filepath']).name}:"
|
|
148
|
-
f" {min(progress_in_percent, 100):4.1f}%"
|
|
149
|
-
)
|
|
150
|
-
if progress_in_percent >= 100:
|
|
151
|
-
out += "\n"
|
|
159
|
+
out = f"... {action} {objectname}:" f" {min(progress_in_percent, 100):4.1f}%"
|
|
152
160
|
if "NBPRJ_TEST_NBPATH" not in os.environ:
|
|
153
161
|
print(out, end="\r")
|
|
154
162
|
|
|
155
163
|
|
|
156
164
|
class ProgressCallback(fsspec.callbacks.Callback):
|
|
157
|
-
def __init__(
|
|
165
|
+
def __init__(
|
|
166
|
+
self,
|
|
167
|
+
objectname: str,
|
|
168
|
+
action: Literal["uploading", "downloading", "synchronizing"],
|
|
169
|
+
adjust_size: bool = False,
|
|
170
|
+
):
|
|
171
|
+
assert action in {"uploading", "downloading", "synchronizing"}
|
|
172
|
+
|
|
158
173
|
super().__init__()
|
|
174
|
+
|
|
159
175
|
self.action = action
|
|
176
|
+
print_progress = partial(print_hook, objectname=objectname, action=action)
|
|
177
|
+
self.hooks = {"print_progress": print_progress}
|
|
178
|
+
|
|
179
|
+
self.adjust_size = adjust_size
|
|
180
|
+
|
|
181
|
+
def absolute_update(self, value):
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
def relative_update(self, inc=1):
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
def update_relative_value(self, inc=1):
|
|
188
|
+
self.value += inc
|
|
189
|
+
self.call()
|
|
160
190
|
|
|
161
191
|
def branch(self, path_1, path_2, kwargs):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
192
|
+
if self.adjust_size:
|
|
193
|
+
if Path(path_2 if self.action != "uploading" else path_1).is_dir():
|
|
194
|
+
self.size -= 1
|
|
195
|
+
kwargs["callback"] = ChildProgressCallback(self)
|
|
196
|
+
|
|
197
|
+
def branched(self, path_1, path_2, **kwargs):
|
|
198
|
+
self.branch(path_1, path_2, kwargs)
|
|
199
|
+
return kwargs["callback"]
|
|
200
|
+
|
|
201
|
+
def wrap(self, iterable):
|
|
202
|
+
if self.adjust_size:
|
|
203
|
+
paths = []
|
|
204
|
+
for lpath, rpath in iterable:
|
|
205
|
+
paths.append((lpath, rpath))
|
|
206
|
+
if Path(lpath).is_dir():
|
|
207
|
+
self.size -= 1
|
|
208
|
+
self.adjust_size = False
|
|
209
|
+
return paths
|
|
210
|
+
else:
|
|
211
|
+
return iterable
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def requires_progress(
|
|
215
|
+
cls,
|
|
216
|
+
maybe_callback: fsspec.callbacks.Callback | None,
|
|
217
|
+
print_progress: bool,
|
|
218
|
+
objectname: str,
|
|
219
|
+
action: Literal["uploading", "downloading", "synchronizing"],
|
|
220
|
+
**kwargs,
|
|
221
|
+
):
|
|
222
|
+
if maybe_callback is None:
|
|
223
|
+
if print_progress:
|
|
224
|
+
return cls(objectname, action, **kwargs)
|
|
225
|
+
else:
|
|
226
|
+
return fsspec.callbacks.NoOpCallback()
|
|
227
|
+
return maybe_callback
|
|
165
228
|
|
|
166
|
-
|
|
167
|
-
|
|
229
|
+
|
|
230
|
+
class ChildProgressCallback(fsspec.callbacks.Callback):
|
|
231
|
+
def __init__(self, parent: ProgressCallback):
|
|
232
|
+
super().__init__()
|
|
233
|
+
|
|
234
|
+
self.parent = parent
|
|
235
|
+
|
|
236
|
+
def parent_update(self, inc=1):
|
|
237
|
+
self.parent.update_relative_value(inc)
|
|
238
|
+
|
|
239
|
+
def relative_update(self, inc=1):
|
|
240
|
+
self.parent_update(inc / self.size)
|
|
168
241
|
|
|
169
242
|
|
|
170
243
|
def download_to(self, path: UPathStr, print_progress: bool = False, **kwargs):
|
|
171
244
|
"""Download to a path."""
|
|
172
|
-
if print_progress:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
else:
|
|
179
|
-
# todo: make proper progress bar for directories
|
|
180
|
-
cb = fsspec.callbacks.NoOpCallback()
|
|
181
|
-
kwargs["callback"] = cb
|
|
245
|
+
if print_progress and "callback" not in kwargs:
|
|
246
|
+
callback = ProgressCallback(
|
|
247
|
+
PurePosixPath(path).name, "downloading", adjust_size=True
|
|
248
|
+
)
|
|
249
|
+
kwargs["callback"] = callback
|
|
250
|
+
|
|
182
251
|
self.fs.download(str(self), str(path), **kwargs)
|
|
183
252
|
|
|
184
253
|
|
|
@@ -190,20 +259,16 @@ def upload_from(
|
|
|
190
259
|
**kwargs,
|
|
191
260
|
):
|
|
192
261
|
"""Upload from a local path."""
|
|
193
|
-
|
|
262
|
+
path = Path(path)
|
|
263
|
+
path_is_dir = path.is_dir()
|
|
194
264
|
if not path_is_dir:
|
|
195
265
|
dir_inplace = False
|
|
196
266
|
|
|
197
|
-
if print_progress:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
else:
|
|
201
|
-
# todo: make proper progress bar for directories
|
|
202
|
-
cb = fsspec.callbacks.NoOpCallback()
|
|
203
|
-
kwargs["callback"] = cb
|
|
267
|
+
if print_progress and "callback" not in kwargs:
|
|
268
|
+
callback = ProgressCallback(path.name, "uploading")
|
|
269
|
+
kwargs["callback"] = callback
|
|
204
270
|
|
|
205
271
|
if dir_inplace:
|
|
206
|
-
path = Path(path)
|
|
207
272
|
source = [f for f in path.rglob("*") if f.is_file()]
|
|
208
273
|
destination = [str(self / f.relative_to(path)) for f in source]
|
|
209
274
|
source = [str(f) for f in source] # type: ignore
|
|
@@ -233,7 +298,14 @@ def upload_from(
|
|
|
233
298
|
del self.fs.dircache[bucket]
|
|
234
299
|
|
|
235
300
|
|
|
236
|
-
def synchronize(
|
|
301
|
+
def synchronize(
|
|
302
|
+
self,
|
|
303
|
+
objectpath: Path,
|
|
304
|
+
error_no_origin: bool = True,
|
|
305
|
+
print_progress: bool = False,
|
|
306
|
+
callback: fsspec.callbacks.Callback | None = None,
|
|
307
|
+
**kwargs,
|
|
308
|
+
):
|
|
237
309
|
"""Sync to a local destination path."""
|
|
238
310
|
# optimize the number of network requests
|
|
239
311
|
if "timestamp" in kwargs:
|
|
@@ -292,15 +364,23 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
|
|
|
292
364
|
destination_exists = False
|
|
293
365
|
need_synchronize = True
|
|
294
366
|
if need_synchronize:
|
|
367
|
+
callback = ProgressCallback.requires_progress(
|
|
368
|
+
callback, print_progress, objectpath.name, "synchronizing"
|
|
369
|
+
)
|
|
370
|
+
callback.set_size(len(files))
|
|
295
371
|
origin_file_keys = []
|
|
296
|
-
for file, stat in files.items():
|
|
297
|
-
|
|
298
|
-
origin_file_keys.append(
|
|
372
|
+
for file, stat in callback.wrap(files.items()):
|
|
373
|
+
file_key = PurePosixPath(file).relative_to(self.path)
|
|
374
|
+
origin_file_keys.append(file_key.as_posix())
|
|
299
375
|
timestamp = stat[modified_key].timestamp()
|
|
300
|
-
|
|
301
|
-
origin.
|
|
302
|
-
|
|
376
|
+
|
|
377
|
+
origin = f"{self.protocol}://{file}"
|
|
378
|
+
destination = objectpath / file_key
|
|
379
|
+
child = callback.branched(origin, destination.as_posix())
|
|
380
|
+
UPath(origin, **self._kwargs).synchronize(
|
|
381
|
+
destination, timestamp=timestamp, callback=child, **kwargs
|
|
303
382
|
)
|
|
383
|
+
child.close()
|
|
304
384
|
if destination_exists:
|
|
305
385
|
local_files = [file for file in objectpath.rglob("*") if file.is_file()]
|
|
306
386
|
if len(local_files) > len(files):
|
|
@@ -316,6 +396,10 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
|
|
|
316
396
|
return None
|
|
317
397
|
|
|
318
398
|
# synchronization logic for files
|
|
399
|
+
callback = ProgressCallback.requires_progress(
|
|
400
|
+
callback, print_progress, objectpath.name, "synchronizing"
|
|
401
|
+
)
|
|
402
|
+
kwargs["callback"] = callback
|
|
319
403
|
if objectpath.exists():
|
|
320
404
|
local_mts = objectpath.stat().st_mtime # type: ignore
|
|
321
405
|
need_synchronize = cloud_mts > local_mts
|
|
@@ -325,9 +409,13 @@ def synchronize(self, objectpath: Path, error_no_origin: bool = True, **kwargs):
|
|
|
325
409
|
if need_synchronize:
|
|
326
410
|
self.download_to(objectpath, **kwargs)
|
|
327
411
|
os.utime(objectpath, times=(cloud_mts, cloud_mts))
|
|
412
|
+
else:
|
|
413
|
+
# nothing happens if parent_update is not defined
|
|
414
|
+
# because of Callback.no_op
|
|
415
|
+
callback.parent_update()
|
|
328
416
|
|
|
329
417
|
|
|
330
|
-
def modified(self) ->
|
|
418
|
+
def modified(self) -> datetime | None:
|
|
331
419
|
"""Return modified time stamp."""
|
|
332
420
|
mtime = self.fs.modified(str(self))
|
|
333
421
|
if mtime.tzinfo is None:
|
|
@@ -340,15 +428,15 @@ def compute_file_tree(
|
|
|
340
428
|
*,
|
|
341
429
|
level: int = -1,
|
|
342
430
|
only_dirs: bool = False,
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
431
|
+
n_max_files_per_dir_and_type: int = 100,
|
|
432
|
+
n_max_files: int = 1000,
|
|
433
|
+
include_paths: set[Any] | None = None,
|
|
434
|
+
skip_suffixes: list[str] | None = None,
|
|
435
|
+
) -> tuple[str, int]:
|
|
347
436
|
space = " "
|
|
348
437
|
branch = "│ "
|
|
349
438
|
tee = "├── "
|
|
350
439
|
last = "└── "
|
|
351
|
-
max_files_per_dir_per_type = 7
|
|
352
440
|
if skip_suffixes is None:
|
|
353
441
|
skip_suffixes_tuple = ()
|
|
354
442
|
else:
|
|
@@ -382,14 +470,14 @@ def compute_file_tree(
|
|
|
382
470
|
if only_dirs:
|
|
383
471
|
contents = [d for d in contents if d.is_dir()]
|
|
384
472
|
pointers = [tee] * (len(contents) - 1) + [last]
|
|
385
|
-
|
|
386
|
-
for pointer, child_path in zip(pointers, contents):
|
|
473
|
+
n_files_per_dir_and_type = defaultdict(lambda: 0) # type: ignore
|
|
474
|
+
for pointer, child_path in zip(pointers, contents, strict=False): # type: ignore
|
|
387
475
|
if child_path.is_dir():
|
|
388
476
|
if include_dirs and child_path not in include_dirs:
|
|
389
477
|
continue
|
|
390
478
|
yield prefix + pointer + child_path.name
|
|
391
479
|
n_directories += 1
|
|
392
|
-
|
|
480
|
+
n_files_per_dir_and_type = defaultdict(lambda: 0)
|
|
393
481
|
extension = branch if pointer == tee else space
|
|
394
482
|
yield from inner(child_path, prefix=prefix + extension, level=level - 1)
|
|
395
483
|
elif not only_dirs:
|
|
@@ -397,21 +485,21 @@ def compute_file_tree(
|
|
|
397
485
|
continue
|
|
398
486
|
suffix = extract_suffix_from_path(child_path)
|
|
399
487
|
suffixes.add(suffix)
|
|
400
|
-
|
|
488
|
+
n_files_per_dir_and_type[suffix] += 1
|
|
401
489
|
n_objects += 1
|
|
402
|
-
if
|
|
490
|
+
if n_files_per_dir_and_type[suffix] == n_max_files_per_dir_and_type:
|
|
403
491
|
yield prefix + "..."
|
|
404
|
-
elif
|
|
492
|
+
elif n_files_per_dir_and_type[suffix] > n_max_files_per_dir_and_type:
|
|
405
493
|
continue
|
|
406
494
|
else:
|
|
407
495
|
yield prefix + pointer + child_path.name
|
|
408
496
|
|
|
409
497
|
folder_tree = ""
|
|
410
498
|
iterator = inner(path, level=level)
|
|
411
|
-
for line in islice(iterator,
|
|
499
|
+
for line in islice(iterator, n_max_files):
|
|
412
500
|
folder_tree += f"\n{line}"
|
|
413
501
|
if next(iterator, None):
|
|
414
|
-
folder_tree += f"\n... only showing {
|
|
502
|
+
folder_tree += f"\n... only showing {n_max_files} out of {n_objects} files"
|
|
415
503
|
directory_info = "directory" if n_directories == 1 else "directories"
|
|
416
504
|
display_suffixes = ", ".join([f"{suffix!r}" for suffix in suffixes])
|
|
417
505
|
suffix_message = f" with suffixes {display_suffixes}" if n_objects > 0 else ""
|
|
@@ -426,11 +514,12 @@ def compute_file_tree(
|
|
|
426
514
|
def view_tree(
|
|
427
515
|
path: Path,
|
|
428
516
|
*,
|
|
429
|
-
level: int =
|
|
517
|
+
level: int = 2,
|
|
430
518
|
only_dirs: bool = False,
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
519
|
+
n_max_files_per_dir_and_type: int = 100,
|
|
520
|
+
n_max_files: int = 1000,
|
|
521
|
+
include_paths: set[Any] | None = None,
|
|
522
|
+
skip_suffixes: list[str] | None = None,
|
|
434
523
|
) -> None:
|
|
435
524
|
"""Print a visual tree structure of files & directories.
|
|
436
525
|
|
|
@@ -438,7 +527,7 @@ def view_tree(
|
|
|
438
527
|
level: If `1`, only iterate through one level, if `2` iterate through 2
|
|
439
528
|
levels, if `-1` iterate through entire hierarchy.
|
|
440
529
|
only_dirs: Only iterate through directories.
|
|
441
|
-
|
|
530
|
+
n_max_files: Display limit. Will only show this many files. Doesn't affect count.
|
|
442
531
|
include_paths: Restrict to these paths.
|
|
443
532
|
skip_suffixes: Skip directories with these suffixes.
|
|
444
533
|
|
|
@@ -472,7 +561,8 @@ def view_tree(
|
|
|
472
561
|
path,
|
|
473
562
|
level=level,
|
|
474
563
|
only_dirs=only_dirs,
|
|
475
|
-
|
|
564
|
+
n_max_files=n_max_files,
|
|
565
|
+
n_max_files_per_dir_and_type=n_max_files_per_dir_and_type,
|
|
476
566
|
include_paths=include_paths,
|
|
477
567
|
skip_suffixes=skip_suffixes,
|
|
478
568
|
)
|
|
@@ -497,9 +587,10 @@ def to_url(upath):
|
|
|
497
587
|
bucket = upath._url.netloc
|
|
498
588
|
if bucket == "scverse-spatial-eu-central-1":
|
|
499
589
|
region = "eu-central-1"
|
|
500
|
-
elif f"s3://{bucket}" not in
|
|
501
|
-
|
|
502
|
-
|
|
590
|
+
elif f"s3://{bucket}" not in HOSTED_BUCKETS:
|
|
591
|
+
response = upath.fs.call_s3("head_bucket", Bucket=upath._url.netloc)
|
|
592
|
+
headers = response["ResponseMetadata"]["HTTPHeaders"]
|
|
593
|
+
region = headers.get("x-amz-bucket-region")
|
|
503
594
|
else:
|
|
504
595
|
region = bucket.replace("lamin_", "")
|
|
505
596
|
if region == "us-east-1":
|
|
@@ -578,7 +669,7 @@ def convert_pathlike(pathlike: UPathStr) -> UPath:
|
|
|
578
669
|
return path
|
|
579
670
|
|
|
580
671
|
|
|
581
|
-
|
|
672
|
+
HOSTED_REGIONS = [
|
|
582
673
|
"eu-central-1",
|
|
583
674
|
"eu-west-2",
|
|
584
675
|
"us-east-1",
|
|
@@ -588,16 +679,16 @@ hosted_regions = [
|
|
|
588
679
|
]
|
|
589
680
|
lamin_env = os.getenv("LAMIN_ENV")
|
|
590
681
|
if lamin_env is None or lamin_env == "prod":
|
|
591
|
-
hosted_buckets_list = [f"s3://lamin-{region}" for region in
|
|
682
|
+
hosted_buckets_list = [f"s3://lamin-{region}" for region in HOSTED_REGIONS]
|
|
592
683
|
hosted_buckets_list.append("s3://scverse-spatial-eu-central-1")
|
|
593
|
-
|
|
684
|
+
HOSTED_BUCKETS = tuple(hosted_buckets_list)
|
|
594
685
|
else:
|
|
595
|
-
|
|
596
|
-
credentials_cache:
|
|
686
|
+
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
687
|
+
credentials_cache: dict[str, dict[str, str]] = {}
|
|
597
688
|
AWS_CREDENTIALS_PRESENT = None
|
|
598
689
|
|
|
599
690
|
|
|
600
|
-
def create_path(path: UPath, access_token:
|
|
691
|
+
def create_path(path: UPath, access_token: str | None = None) -> UPath:
|
|
601
692
|
path = convert_pathlike(path)
|
|
602
693
|
# test whether we have an AWS S3 path
|
|
603
694
|
if not isinstance(path, S3Path):
|
|
@@ -611,9 +702,8 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
|
|
|
611
702
|
if path.fs.key is not None and path.fs.secret is not None:
|
|
612
703
|
anon = False
|
|
613
704
|
else:
|
|
614
|
-
# we
|
|
615
|
-
# path.fs.
|
|
616
|
-
# and check path.fs.session._credentials, but it is slower
|
|
705
|
+
# we could do path.fs.connect()
|
|
706
|
+
# and check path.fs.session._credentials, but it'd be slower
|
|
617
707
|
session = botocore.session.get_session()
|
|
618
708
|
credentials = session.get_credentials()
|
|
619
709
|
if credentials is None or credentials.access_key is None:
|
|
@@ -625,7 +715,7 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
|
|
|
625
715
|
|
|
626
716
|
# test whether we are on hosted storage or not
|
|
627
717
|
path_str = path.as_posix()
|
|
628
|
-
is_hosted_storage = path_str.startswith(
|
|
718
|
+
is_hosted_storage = path_str.startswith(HOSTED_BUCKETS)
|
|
629
719
|
|
|
630
720
|
if not is_hosted_storage:
|
|
631
721
|
# make anon request if no credentials present
|
|
@@ -652,7 +742,7 @@ def create_path(path: UPath, access_token: Optional[str] = None) -> UPath:
|
|
|
652
742
|
)
|
|
653
743
|
|
|
654
744
|
|
|
655
|
-
def get_stat_file_cloud(stat:
|
|
745
|
+
def get_stat_file_cloud(stat: dict) -> tuple[int, str, str]:
|
|
656
746
|
size = stat["size"]
|
|
657
747
|
# small files
|
|
658
748
|
if "-" not in stat["ETag"]:
|
|
@@ -669,7 +759,7 @@ def get_stat_file_cloud(stat: Dict) -> Tuple[int, str, str]:
|
|
|
669
759
|
return size, hash, hash_type
|
|
670
760
|
|
|
671
761
|
|
|
672
|
-
def get_stat_dir_cloud(path: UPath) ->
|
|
762
|
+
def get_stat_dir_cloud(path: UPath) -> tuple[int, str, str, int]:
|
|
673
763
|
sizes = []
|
|
674
764
|
md5s = []
|
|
675
765
|
objects = path.fs.find(path.as_posix(), detail=True)
|
|
@@ -701,9 +791,18 @@ def check_storage_is_empty(
|
|
|
701
791
|
# since path.fs.find raises a PermissionError on empty hosted
|
|
702
792
|
# subdirectories (see lamindb_setup/core/_settings_storage/init_storage).
|
|
703
793
|
n_offset_objects = 1 # because of touched dummy file, see mark_storage_root()
|
|
704
|
-
if
|
|
705
|
-
|
|
706
|
-
|
|
794
|
+
if root_string.startswith(HOSTED_BUCKETS):
|
|
795
|
+
# in hosted buckets, count across entire root
|
|
796
|
+
directory_string = root_string
|
|
797
|
+
# the SQLite file is not in the ".lamindb" directory
|
|
798
|
+
if account_for_sqlite_file:
|
|
799
|
+
n_offset_objects += 1 # because of SQLite file
|
|
800
|
+
else:
|
|
801
|
+
# in any other storage location, only count in .lamindb
|
|
802
|
+
if not root_string.endswith("/"):
|
|
803
|
+
root_string += "/"
|
|
804
|
+
directory_string = root_string + ".lamindb"
|
|
805
|
+
objects = root_upath.fs.find(directory_string)
|
|
707
806
|
n_objects = len(objects)
|
|
708
807
|
n_diff = n_objects - n_offset_objects
|
|
709
808
|
ask_for_deletion = (
|
|
@@ -711,9 +810,13 @@ def check_storage_is_empty(
|
|
|
711
810
|
if raise_error
|
|
712
811
|
else "consider deleting them"
|
|
713
812
|
)
|
|
813
|
+
hint = "'./lamindb/_is_initialized' "
|
|
814
|
+
if n_offset_objects == 2:
|
|
815
|
+
hint += "& SQLite file"
|
|
816
|
+
hint += " ignored"
|
|
714
817
|
message = (
|
|
715
|
-
f"Storage
|
|
716
|
-
f"({
|
|
818
|
+
f"Storage {directory_string} contains {n_objects} objects "
|
|
819
|
+
f"({hint}) - {ask_for_deletion}\n{objects}"
|
|
717
820
|
)
|
|
718
821
|
if n_diff > 0:
|
|
719
822
|
if raise_error:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: lamindb_setup
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.71.1
|
|
4
4
|
Summary: Setup & configure LaminDB.
|
|
5
5
|
Author-email: Lamin Labs <laminlabs@gmail.com>
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -14,7 +14,9 @@ Requires-Dist: requests
|
|
|
14
14
|
Requires-Dist: universal_pathlib==0.1.4
|
|
15
15
|
Requires-Dist: botocore<2.0.0
|
|
16
16
|
Requires-Dist: supabase==2.2.1
|
|
17
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: urllib3<2 ; extra == "aws"
|
|
18
|
+
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
|
19
|
+
Requires-Dist: s3fs>=2023.12.2,<=2024.3.1 ; extra == "aws"
|
|
18
20
|
Requires-Dist: pyjwt<3.0.0 ; extra == "dev"
|
|
19
21
|
Requires-Dist: psycopg2-binary ; extra == "dev"
|
|
20
22
|
Requires-Dist: python-dotenv ; extra == "dev"
|
|
@@ -25,12 +27,12 @@ Requires-Dist: pytest-xdist ; extra == "dev"
|
|
|
25
27
|
Requires-Dist: nbproject-test>=0.4.3 ; extra == "dev"
|
|
26
28
|
Requires-Dist: pandas ; extra == "dev"
|
|
27
29
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
|
28
|
-
Requires-Dist:
|
|
30
|
+
Requires-Dist: gcsfs>=2023.12.2,<=2024.3.1 ; extra == "gcp"
|
|
29
31
|
Project-URL: Home, https://github.com/laminlabs/lamindb-setup
|
|
30
32
|
Provides-Extra: aws
|
|
31
33
|
Provides-Extra: dev
|
|
32
34
|
Provides-Extra: erdiagram
|
|
33
|
-
Provides-Extra:
|
|
35
|
+
Provides-Extra: gcp
|
|
34
36
|
|
|
35
37
|
[](https://codecov.io/gh/laminlabs/lamindb-setup)
|
|
36
38
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
lamindb_setup/__init__.py,sha256=EIyvRHHlYOX7WsAFGdKp63ho-ceFW05z_n_GV1oJkaU,1542
|
|
2
|
+
lamindb_setup/_cache.py,sha256=wA7mbysANwe8hPNbjDo9bOmXJ0xIyaS5iyxIpxSWji4,846
|
|
3
|
+
lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
|
|
4
|
+
lamindb_setup/_check_setup.py,sha256=cNEL9Q4yPpmEkGKHH8JgullWl1VUZwALJ4RHn9wZypY,2613
|
|
5
|
+
lamindb_setup/_close.py,sha256=1QS9p2SCacgovYn6xqWU4zFvwHN1RgIccvzwJgFvKgU,1186
|
|
6
|
+
lamindb_setup/_connect_instance.py,sha256=c0qO0dn4hnEc-toRh6pxFaAxkhzM9LVN-72XK11JtAU,12616
|
|
7
|
+
lamindb_setup/_delete.py,sha256=hf8zfVJfW74QR7eK4xJNQ6HbkkZBsl5eTqj-Ni5jPo0,7232
|
|
8
|
+
lamindb_setup/_django.py,sha256=EoyWvFzH0i9wxjy4JZhcoXCTckztP_Mrl6FbYQnMmLE,1534
|
|
9
|
+
lamindb_setup/_exportdb.py,sha256=uTIZjKKTB7arzEr1j0O6lONiT2pRBKeOFdLvOV8ZwzE,2120
|
|
10
|
+
lamindb_setup/_importdb.py,sha256=yYYShzUajTsR-cTW4CZ-UNDWZY2uE5PAgNbp-wn8Ogc,1874
|
|
11
|
+
lamindb_setup/_init_instance.py,sha256=Hy4PsPpXCdl5ik3Q0ODltVbbvYjAqnLnfFza2ai8nX8,11921
|
|
12
|
+
lamindb_setup/_migrate.py,sha256=4nBTFg5-BK4A2gH-D3_tcFf8EtvMnIo5Mq0e_C6_9-U,8815
|
|
13
|
+
lamindb_setup/_register_instance.py,sha256=Jeu0wyvJVSVQ_n-A_7yn7xOZIP0ncJD92DRABqzPIjA,940
|
|
14
|
+
lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
|
|
15
|
+
lamindb_setup/_set_managed_storage.py,sha256=BUUJzKNWNEA5KnKnFZsas0ANU6w-LBZL-CKRu-sNLPE,1268
|
|
16
|
+
lamindb_setup/_setup_user.py,sha256=6Oc7Rke-yRQSZbuntdUAz8QbJ6UuPzYHI9FnYlf_q-A,3670
|
|
17
|
+
lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
|
|
18
|
+
lamindb_setup/core/__init__.py,sha256=dV9S-rQpNK9JcBn4hiEmiLnmNqfpPFJD9pqagMCaIew,416
|
|
19
|
+
lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
|
|
20
|
+
lamindb_setup/core/_deprecated.py,sha256=3qxUI1dnDlSeR0BYrv7ucjqRBEojbqotPgpShXs4KF8,2520
|
|
21
|
+
lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
|
|
22
|
+
lamindb_setup/core/_hub_client.py,sha256=V0qKDsCdRn-tQy2YIk4VgXcpJFmuum6N3gcavAC7gBQ,5504
|
|
23
|
+
lamindb_setup/core/_hub_core.py,sha256=FpEXPqSHXAgYegyQmsma54S3bv_mtceXrrpHXa7UeKE,15970
|
|
24
|
+
lamindb_setup/core/_hub_crud.py,sha256=b1XF7AJpM9Q-ttm9nPG-r3OTRWHQaGzAGIyvmb83NTo,4859
|
|
25
|
+
lamindb_setup/core/_hub_utils.py,sha256=b_M1LkdCjiMWm1EOlSb9GuPdLijwVgQDtATTpeZuXI0,1875
|
|
26
|
+
lamindb_setup/core/_settings.py,sha256=jjZ_AxRXB3Y3UP6m04BAw_dhFbJbdg2-nZWmEv2LNZ8,3141
|
|
27
|
+
lamindb_setup/core/_settings_instance.py,sha256=RFUcnBBUp303dbVEHcAaIm_q7lzlWg56OrKLwdam8Pg,16588
|
|
28
|
+
lamindb_setup/core/_settings_load.py,sha256=NGgCDpN85j1EqoKlrYFIlZBMlBJm33gx2-wc96CP_ZQ,3922
|
|
29
|
+
lamindb_setup/core/_settings_save.py,sha256=d1A-Ex-7H08mb8l7I0Oe0j0GilrfaDuprh_NMxhQAsQ,2704
|
|
30
|
+
lamindb_setup/core/_settings_storage.py,sha256=VgsqdIImQRfOZ6FGNY6DLVohaSxerj_F-sWtjD9hzcs,12382
|
|
31
|
+
lamindb_setup/core/_settings_store.py,sha256=dagS5c7wAMRnuZTRfCU4sKaIOyF_HwAP5Fnnn8vphno,2084
|
|
32
|
+
lamindb_setup/core/_settings_user.py,sha256=P2lC4WDRAFfT-Xq3MlXJ-wMKIHCoGNhMTQfRGIAyUNQ,1344
|
|
33
|
+
lamindb_setup/core/_setup_bionty_sources.py,sha256=OgPpZxN2_Wffy-ogEBz_97c_k8d2bD-DDVt89-u9GLY,3002
|
|
34
|
+
lamindb_setup/core/cloud_sqlite_locker.py,sha256=NIBNAGq7TTRrip9OzMdiQKj8QOuwhL9esyM0aehUqBA,6893
|
|
35
|
+
lamindb_setup/core/django.py,sha256=m0AKg2lJ1EYCtEtZ8frFFJbAR9qX0gnFcgqp7aeC2k0,3450
|
|
36
|
+
lamindb_setup/core/exceptions.py,sha256=eoI7AXgATgDVzgArtN7CUvpaMUC067vsBg5LHCsWzDM,305
|
|
37
|
+
lamindb_setup/core/hashing.py,sha256=mv9UCvAsSrdHYQAv3Kz7UOvjd5tIjvDTIYv_ettBuVY,2218
|
|
38
|
+
lamindb_setup/core/types.py,sha256=bcYnZ0uM_2NXKJCl94Mmc-uYrQlRUUVKG3sK2N-F-N4,532
|
|
39
|
+
lamindb_setup/core/upath.py,sha256=XBiHm-gxtfDIHnQmH5WjjmZzmAg5S421fjAfRrEg710,28286
|
|
40
|
+
lamindb_setup-0.71.1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
|
41
|
+
lamindb_setup-0.71.1.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
|
|
42
|
+
lamindb_setup-0.71.1.dist-info/METADATA,sha256=fijK20hR12pIxHzmECdbyguUSsHvVjIYTmfumfuLReQ,1620
|
|
43
|
+
lamindb_setup-0.71.1.dist-info/RECORD,,
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
from lamin_utils import logger
|
|
2
|
-
|
|
3
|
-
from lamindb_setup.core.types import UPathStr
|
|
4
|
-
|
|
5
|
-
from ._init_instance import register_user_and_storage
|
|
6
|
-
from .core._settings import settings
|
|
7
|
-
from .core._settings_instance import InstanceSettings
|
|
8
|
-
from .core._settings_storage import StorageSettings
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def switch_default_storage(root: UPathStr, **fs_kwargs):
|
|
12
|
-
"""Add a remote default storage location to a local instance.
|
|
13
|
-
|
|
14
|
-
This can be used to selectively share data.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
root: `UPathStr` - The new storage root, e.g., an S3 bucket.
|
|
18
|
-
**fs_kwargs: Additional fsspec arguments for cloud root, e.g., profile.
|
|
19
|
-
|
|
20
|
-
Example:
|
|
21
|
-
>>> ln.setup.set.storage(
|
|
22
|
-
>>> "s3://some-bucket",
|
|
23
|
-
>>> profile="some_profile", # fsspec arg
|
|
24
|
-
>>> cache_regions=True # fsspec arg for s3
|
|
25
|
-
>>> )
|
|
26
|
-
|
|
27
|
-
"""
|
|
28
|
-
if settings.instance.dialect == "sqlite":
|
|
29
|
-
logger.error("can't set storage for sqlite instances.")
|
|
30
|
-
return "set-storage-failed"
|
|
31
|
-
ssettings = StorageSettings(root=root)
|
|
32
|
-
new_isettings = InstanceSettings(
|
|
33
|
-
owner=settings.instance.owner,
|
|
34
|
-
name=settings.instance.name,
|
|
35
|
-
storage=ssettings,
|
|
36
|
-
db=settings.instance.db,
|
|
37
|
-
schema=settings.instance._schema_str,
|
|
38
|
-
id=settings.instance.id,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
new_isettings._persist() # this also updates the settings object
|
|
42
|
-
register_user_and_storage(new_isettings, settings.user)
|
|
43
|
-
# we are not doing this for now because of difficulties to define the right RLS policy # noqa
|
|
44
|
-
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1687948324601929?thread_ts=1687531921.394119&cid=C04FPE8V01W
|
|
45
|
-
# if settings.instance.is_remote:
|
|
46
|
-
# init_storage_hub(
|
|
47
|
-
# root, account_handle=settings.instance.owner # type: ignore
|
|
48
|
-
# )
|
|
49
|
-
|
|
50
|
-
settings.storage._set_fs_kwargs(**fs_kwargs)
|