lamindb 0.68.0__py3-none-any.whl → 0.68.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +11 -24
- lamindb/_artifact.py +29 -82
- lamindb/_collection.py +9 -7
- lamindb/_feature.py +3 -3
- lamindb/_feature_set.py +4 -4
- lamindb/_is_versioned.py +3 -3
- lamindb/_parents.py +2 -2
- lamindb/_query_manager.py +1 -1
- lamindb/_query_set.py +1 -1
- lamindb/_registry.py +39 -21
- lamindb/_save.py +1 -1
- lamindb/_storage.py +2 -2
- lamindb/_transform.py +10 -6
- lamindb/_ulabel.py +3 -3
- lamindb/_validate.py +3 -3
- lamindb/core/__init__.py +0 -1
- lamindb/core/_data.py +1 -1
- lamindb/core/_feature_manager.py +6 -2
- lamindb/core/_label_manager.py +6 -2
- lamindb/core/_mapped_collection.py +45 -18
- lamindb/core/_run_context.py +53 -77
- lamindb/core/_settings.py +5 -1
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_backed_access.py +1 -1
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/file.py +13 -8
- lamindb/core/storage/object.py +3 -4
- lamindb/core/types.py +2 -2
- lamindb/core/versioning.py +1 -1
- lamindb/setup/__init__.py +8 -2
- lamindb/setup/{dev → core}/__init__.py +2 -4
- {lamindb-0.68.0.dist-info → lamindb-0.68.2.dist-info}/METADATA +9 -11
- lamindb-0.68.2.dist-info/RECORD +49 -0
- lamindb/core/hashing.py +0 -62
- lamindb-0.68.0.dist-info/RECORD +0 -50
- {lamindb-0.68.0.dist-info → lamindb-0.68.2.dist-info}/LICENSE +0 -0
- {lamindb-0.68.0.dist-info → lamindb-0.68.2.dist-info}/WHEEL +0 -0
lamindb/_validate.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
from typing import Dict, Iterable, List, Literal, Optional, Set, Union
|
2
2
|
|
3
|
+
import lamindb_setup as ln_setup
|
3
4
|
import numpy as np
|
4
5
|
import pandas as pd
|
5
6
|
from django.core.exceptions import FieldDoesNotExist
|
6
7
|
from django.db.models import QuerySet
|
7
8
|
from lamin_utils import colors, logger
|
8
9
|
from lamin_utils._inspect import InspectResult
|
9
|
-
from lamindb_setup.
|
10
|
+
from lamindb_setup.core._docs import doc_args
|
10
11
|
from lnschema_core import CanValidate, Registry
|
11
12
|
from lnschema_core.types import ListLike, StrField
|
12
13
|
|
13
14
|
from lamindb._utils import attach_func_to_class_method
|
14
15
|
|
15
|
-
from . import _TESTING
|
16
16
|
from ._from_values import _has_organism_field, _print_values
|
17
17
|
from ._registry import _queryset, get_default_str_field
|
18
18
|
|
@@ -468,7 +468,7 @@ METHOD_NAMES = [
|
|
468
468
|
"set_abbr",
|
469
469
|
]
|
470
470
|
|
471
|
-
if _TESTING: # type: ignore
|
471
|
+
if ln_setup._TESTING: # type: ignore
|
472
472
|
from inspect import signature
|
473
473
|
|
474
474
|
SIGS = {
|
lamindb/core/__init__.py
CHANGED
lamindb/core/_data.py
CHANGED
@@ -2,7 +2,7 @@ from collections import defaultdict
|
|
2
2
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
3
3
|
|
4
4
|
from lamin_utils import colors, logger
|
5
|
-
from lamindb_setup.
|
5
|
+
from lamindb_setup.core._docs import doc_args
|
6
6
|
from lnschema_core.models import (
|
7
7
|
Artifact,
|
8
8
|
Collection,
|
lamindb/core/_feature_manager.py
CHANGED
@@ -4,7 +4,7 @@ from typing import Dict, Iterable, Optional, Union
|
|
4
4
|
import anndata as ad
|
5
5
|
from anndata import AnnData
|
6
6
|
from lamin_utils import colors, logger
|
7
|
-
from lamindb_setup.
|
7
|
+
from lamindb_setup.core.upath import create_path
|
8
8
|
from lnschema_core.models import Artifact, Collection, Data, Feature, Registry
|
9
9
|
from lnschema_core.types import AnnDataLike, FieldAttr
|
10
10
|
|
@@ -325,7 +325,11 @@ class FeatureManager:
|
|
325
325
|
for feature in new_members:
|
326
326
|
# not calling save=True here as in labels, because want to
|
327
327
|
# bulk save below
|
328
|
-
|
328
|
+
# transfer_fk is set to False because they are already transferred
|
329
|
+
# in the previous step transfer_fk_to_default_db_bulk
|
330
|
+
transfer_to_default_db(
|
331
|
+
feature, using_key, mute=mute, transfer_fk=False
|
332
|
+
)
|
329
333
|
logger.info(
|
330
334
|
f"saving {new_members.count()} new {registry.__name__} records"
|
331
335
|
)
|
lamindb/core/_label_manager.py
CHANGED
@@ -58,7 +58,9 @@ def transfer_add_labels(labels, features_lookup_self, self, row, parents: bool =
|
|
58
58
|
if len(new_labels) > 0:
|
59
59
|
transfer_fk_to_default_db_bulk(new_labels, using_key=None)
|
60
60
|
for label in new_labels:
|
61
|
-
transfer_to_default_db(
|
61
|
+
transfer_to_default_db(
|
62
|
+
label, using_key=None, mute=True, transfer_fk=False
|
63
|
+
)
|
62
64
|
# not saving parents for Organism during transfer
|
63
65
|
registry = new_labels[0].__class__
|
64
66
|
logger.info(f"saving {len(new_labels)} new {registry.__name__} records")
|
@@ -219,7 +221,9 @@ class LabelManager:
|
|
219
221
|
if len(new_labels) > 0:
|
220
222
|
transfer_fk_to_default_db_bulk(new_labels, using_key)
|
221
223
|
for label in new_labels:
|
222
|
-
transfer_to_default_db(
|
224
|
+
transfer_to_default_db(
|
225
|
+
label, using_key, mute=True, transfer_fk=False
|
226
|
+
)
|
223
227
|
save(new_labels, parents=parents)
|
224
228
|
# this should not occur as file and collection should have the same attributes
|
225
229
|
# but this might not be true for custom schema
|
@@ -1,12 +1,13 @@
|
|
1
1
|
from collections import Counter
|
2
2
|
from functools import reduce
|
3
|
-
from
|
3
|
+
from pathlib import Path
|
4
4
|
from typing import Dict, List, Literal, Optional, Union
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
8
|
from lamin_utils import logger
|
9
|
-
from lamindb_setup.
|
9
|
+
from lamindb_setup.core.types import UPathStr
|
10
|
+
from lamindb_setup.core.upath import UPath
|
10
11
|
|
11
12
|
from .storage._backed_access import (
|
12
13
|
ArrayTypes,
|
@@ -52,11 +53,26 @@ class MappedCollection:
|
|
52
53
|
|
53
54
|
A similar data loader exists `here
|
54
55
|
<https://github.com/Genentech/scimilarity>`__.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
path_list: A list of paths to `AnnData` objects stored in `h5ad` or `zrad` formats.
|
59
|
+
label_keys: Columns of the ``.obs`` slot - the names of the metadata
|
60
|
+
features storing labels.
|
61
|
+
join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
|
62
|
+
does not join.
|
63
|
+
encode_labels: Encode labels into integers.
|
64
|
+
Can be a list with elements from ``label_keys```.
|
65
|
+
unknown_label: Encode this label to -1.
|
66
|
+
Can be a dictionary with keys from ``label_keys`` if ``encode_labels=True```
|
67
|
+
or from ``encode_labels`` if it is a list.
|
68
|
+
cache_categories: Enable caching categories of ``label_keys`` for faster access.
|
69
|
+
parallel: Enable sampling with multiple processes.
|
70
|
+
dtype: Convert numpy arrays from ``.X`` to this dtype on selection.
|
55
71
|
"""
|
56
72
|
|
57
73
|
def __init__(
|
58
74
|
self,
|
59
|
-
path_list: List[
|
75
|
+
path_list: List[UPathStr],
|
60
76
|
label_keys: Optional[Union[str, List[str]]] = None,
|
61
77
|
join: Optional[Literal["inner", "outer"]] = "inner",
|
62
78
|
encode_labels: Union[bool, List[str]] = True,
|
@@ -147,7 +163,7 @@ class MappedCollection:
|
|
147
163
|
self._cache_cats[label] = []
|
148
164
|
for storage in self.storages:
|
149
165
|
with _Connect(storage) as store:
|
150
|
-
cats = self.
|
166
|
+
cats = self._get_categories(store, label)
|
151
167
|
if cats is not None:
|
152
168
|
cats = decode(cats) if isinstance(cats[0], bytes) else cats[...]
|
153
169
|
self._cache_cats[label].append(cats)
|
@@ -203,7 +219,7 @@ class MappedCollection:
|
|
203
219
|
var_idxs_join = None
|
204
220
|
|
205
221
|
with _Connect(self.storages[storage_idx]) as store:
|
206
|
-
out = {"x": self.
|
222
|
+
out = {"x": self._get_data_idx(store, obs_idx, var_idxs_join)}
|
207
223
|
if self.label_keys is not None:
|
208
224
|
for label in self.label_keys:
|
209
225
|
if label in self._cache_cats:
|
@@ -212,13 +228,13 @@ class MappedCollection:
|
|
212
228
|
cats = []
|
213
229
|
else:
|
214
230
|
cats = None
|
215
|
-
label_idx = self.
|
231
|
+
label_idx = self._get_label_idx(store, obs_idx, label, cats)
|
216
232
|
if label in self.encoders:
|
217
233
|
label_idx = self.encoders[label][label_idx]
|
218
234
|
out[label] = label_idx
|
219
235
|
return out
|
220
236
|
|
221
|
-
def
|
237
|
+
def _get_data_idx(
|
222
238
|
self,
|
223
239
|
storage: StorageType, # type: ignore
|
224
240
|
idx: int,
|
@@ -259,7 +275,7 @@ class MappedCollection:
|
|
259
275
|
layer_idx = layer_idx[var_idxs_join]
|
260
276
|
return layer_idx
|
261
277
|
|
262
|
-
def
|
278
|
+
def _get_label_idx(
|
263
279
|
self,
|
264
280
|
storage: StorageType,
|
265
281
|
idx: int,
|
@@ -280,7 +296,7 @@ class MappedCollection:
|
|
280
296
|
if categories is not None:
|
281
297
|
cats = categories
|
282
298
|
else:
|
283
|
-
cats = self.
|
299
|
+
cats = self._get_categories(storage, label_key)
|
284
300
|
if cats is not None and len(cats) > 0:
|
285
301
|
label = cats[label]
|
286
302
|
if isinstance(label, bytes):
|
@@ -305,17 +321,17 @@ class MappedCollection:
|
|
305
321
|
return weights
|
306
322
|
|
307
323
|
def get_merged_labels(self, label_key: str):
|
308
|
-
"""Get merged labels
|
324
|
+
"""Get merged labels for `label_key` from all `.obs`."""
|
309
325
|
labels_merge = []
|
310
326
|
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
311
327
|
for i, storage in enumerate(self.storages):
|
312
328
|
with _Connect(storage) as store:
|
313
|
-
codes = self.
|
329
|
+
codes = self._get_codes(store, label_key)
|
314
330
|
labels = decode(codes) if isinstance(codes[0], bytes) else codes
|
315
331
|
if label_key in self._cache_cats:
|
316
332
|
cats = self._cache_cats[label_key][i]
|
317
333
|
else:
|
318
|
-
cats = self.
|
334
|
+
cats = self._get_categories(store, label_key)
|
319
335
|
if cats is not None:
|
320
336
|
cats = decode(cats) if isinstance(cats[0], bytes) else cats
|
321
337
|
labels = cats[labels]
|
@@ -323,7 +339,7 @@ class MappedCollection:
|
|
323
339
|
return np.hstack(labels_merge)
|
324
340
|
|
325
341
|
def get_merged_categories(self, label_key: str):
|
326
|
-
"""Get merged categories
|
342
|
+
"""Get merged categories for `label_key` from all `.obs`."""
|
327
343
|
cats_merge = set()
|
328
344
|
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
329
345
|
for i, storage in enumerate(self.storages):
|
@@ -331,17 +347,17 @@ class MappedCollection:
|
|
331
347
|
if label_key in self._cache_cats:
|
332
348
|
cats = self._cache_cats[label_key][i]
|
333
349
|
else:
|
334
|
-
cats = self.
|
350
|
+
cats = self._get_categories(store, label_key)
|
335
351
|
if cats is not None:
|
336
352
|
cats = decode(cats) if isinstance(cats[0], bytes) else cats
|
337
353
|
cats_merge.update(cats)
|
338
354
|
else:
|
339
|
-
codes = self.
|
355
|
+
codes = self._get_codes(store, label_key)
|
340
356
|
codes = decode(codes) if isinstance(codes[0], bytes) else codes
|
341
357
|
cats_merge.update(codes)
|
342
358
|
return cats_merge
|
343
359
|
|
344
|
-
def
|
360
|
+
def _get_categories(self, storage: StorageType, label_key: str): # type: ignore
|
345
361
|
"""Get categories."""
|
346
362
|
obs = storage["obs"] # type: ignore
|
347
363
|
if isinstance(obs, ArrayTypes): # type: ignore
|
@@ -370,7 +386,7 @@ class MappedCollection:
|
|
370
386
|
return None
|
371
387
|
return None
|
372
388
|
|
373
|
-
def
|
389
|
+
def _get_codes(self, storage: StorageType, label_key: str): # type: ignore
|
374
390
|
"""Get codes."""
|
375
391
|
obs = storage["obs"] # type: ignore
|
376
392
|
if isinstance(obs, ArrayTypes): # type: ignore
|
@@ -383,7 +399,10 @@ class MappedCollection:
|
|
383
399
|
return label["codes"][...]
|
384
400
|
|
385
401
|
def close(self):
|
386
|
-
"""Close
|
402
|
+
"""Close connections to array streaming backend.
|
403
|
+
|
404
|
+
No effect if `parallel=True`.
|
405
|
+
"""
|
387
406
|
for storage in self.storages:
|
388
407
|
if hasattr(storage, "close"):
|
389
408
|
storage.close()
|
@@ -394,6 +413,10 @@ class MappedCollection:
|
|
394
413
|
|
395
414
|
@property
|
396
415
|
def closed(self):
|
416
|
+
"""Check if connections to array streaming backend are closed.
|
417
|
+
|
418
|
+
Does not matter if `parallel=True`.
|
419
|
+
"""
|
397
420
|
return self._closed
|
398
421
|
|
399
422
|
def __enter__(self):
|
@@ -404,6 +427,10 @@ class MappedCollection:
|
|
404
427
|
|
405
428
|
@staticmethod
|
406
429
|
def torch_worker_init_fn(worker_id):
|
430
|
+
"""`worker_init_fn` for `torch.utils.data.DataLoader`.
|
431
|
+
|
432
|
+
Improves performance for `num_workers > 1`.
|
433
|
+
"""
|
407
434
|
from torch.utils.data import get_worker_info
|
408
435
|
|
409
436
|
mapped = get_worker_info().dataset
|
lamindb/core/_run_context.py
CHANGED
@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|
9
9
|
|
10
10
|
from lamin_utils import logger
|
11
11
|
from lamindb_setup import settings
|
12
|
-
from lamindb_setup.
|
12
|
+
from lamindb_setup.core import InstanceSettings
|
13
13
|
from lnschema_core import Run, Transform, ids
|
14
14
|
from lnschema_core.types import TransformType
|
15
15
|
from lnschema_core.users import current_user_id
|
@@ -25,12 +25,6 @@ msg_path_failed = (
|
|
25
25
|
)
|
26
26
|
|
27
27
|
|
28
|
-
# we don't want a real error here, as this is so frequent
|
29
|
-
# in VSCode
|
30
|
-
class UpdateNbWithNonInteractiveEditor(SystemExit):
|
31
|
-
pass
|
32
|
-
|
33
|
-
|
34
28
|
class NotebookNotSavedError(Exception):
|
35
29
|
pass
|
36
30
|
|
@@ -48,19 +42,6 @@ def get_uid_ext(version: str) -> str:
|
|
48
42
|
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
|
49
43
|
|
50
44
|
|
51
|
-
def get_transform_kwargs_from_stem_uid(
|
52
|
-
stem_uid: str,
|
53
|
-
version: str,
|
54
|
-
) -> Tuple[Optional[Transform], str, str]:
|
55
|
-
uid_ext = get_uid_ext(version)
|
56
|
-
new_uid = stem_uid + uid_ext
|
57
|
-
assert len(new_uid) == 16
|
58
|
-
transform = Transform.filter(
|
59
|
-
uid__startswith=stem_uid, version=version
|
60
|
-
).one_or_none()
|
61
|
-
return transform, new_uid, version
|
62
|
-
|
63
|
-
|
64
45
|
def get_stem_uid_and_version_from_file(file_path: str) -> Tuple[str, str]:
|
65
46
|
# line-by-line matching might be faster, but let's go with this for now
|
66
47
|
with open(file_path) as file:
|
@@ -90,33 +71,26 @@ def get_stem_uid_and_version_from_file(file_path: str) -> Tuple[str, str]:
|
|
90
71
|
version = version_match.group(1) if version_match else None
|
91
72
|
|
92
73
|
if stem_uid is None or version is None:
|
93
|
-
|
74
|
+
raise SystemExit(
|
94
75
|
f"ln.transform.stem_uid and ln.transform.version aren't set in {file_path}\n"
|
95
76
|
"Call ln.track() and copy/paste the output into the notebook"
|
96
77
|
)
|
97
|
-
# we're not using `raise SystemExit` here to have the right return code on the CLI
|
98
|
-
sys.exit(1)
|
99
78
|
return stem_uid, version
|
100
79
|
|
101
80
|
|
102
|
-
|
103
|
-
|
104
|
-
|
81
|
+
def update_stem_uid_or_version(
|
82
|
+
stem_uid: str,
|
83
|
+
version: str,
|
105
84
|
bump_version: bool = False,
|
106
85
|
) -> (bool, str, str): # type:ignore
|
107
|
-
|
108
|
-
|
109
|
-
uid_ext = get_uid_ext(version)
|
86
|
+
get_uid_ext(version)
|
110
87
|
updated = False
|
111
|
-
|
112
|
-
|
113
|
-
|
88
|
+
if bump_version:
|
89
|
+
response = "bump"
|
90
|
+
else:
|
91
|
+
# ask for generating a new stem uid
|
114
92
|
# it simply looks better here to not use the logger because we won't have an
|
115
93
|
# emoji also for the subsequent input question
|
116
|
-
print(
|
117
|
-
f"Transform is tracked with stem_uid='{stem_uid}' & version='{version}'"
|
118
|
-
f" (uid='{stem_uid}{uid_ext}')"
|
119
|
-
)
|
120
94
|
if os.getenv("LAMIN_TESTING") is None:
|
121
95
|
response = input(
|
122
96
|
"To create a new stem uid, type 'new'. To bump the version, type 'bump'"
|
@@ -291,16 +265,15 @@ class run_context:
|
|
291
265
|
and transform_settings.version is not None
|
292
266
|
)
|
293
267
|
if transform_settings_are_set:
|
294
|
-
(
|
295
|
-
transform,
|
296
|
-
uid,
|
297
|
-
version,
|
298
|
-
) = get_transform_kwargs_from_stem_uid(
|
268
|
+
stem_uid, version = (
|
299
269
|
transform_settings.stem_uid,
|
300
270
|
transform_settings.version,
|
301
271
|
)
|
272
|
+
transform = Transform.filter(
|
273
|
+
uid__startswith=stem_uid, version=version
|
274
|
+
).one_or_none()
|
302
275
|
if is_run_from_ipython:
|
303
|
-
short_name, name,
|
276
|
+
short_name, name, _ = cls._track_notebook(path=path)
|
304
277
|
else:
|
305
278
|
import inspect
|
306
279
|
|
@@ -308,19 +281,22 @@ class run_context:
|
|
308
281
|
module = inspect.getmodule(frame[0])
|
309
282
|
name = Path(module.__file__).name # type: ignore
|
310
283
|
short_name = name
|
311
|
-
|
312
|
-
|
313
|
-
|
284
|
+
transform_type = (
|
285
|
+
TransformType.notebook
|
286
|
+
if is_run_from_ipython
|
287
|
+
else TransformType.script
|
288
|
+
)
|
289
|
+
cls._create_or_load_transform(
|
290
|
+
stem_uid=stem_uid,
|
314
291
|
version=version,
|
315
292
|
name=name,
|
316
293
|
reference=reference,
|
317
|
-
transform_type=
|
318
|
-
if is_run_from_ipython
|
319
|
-
else TransformType.pipeline,
|
294
|
+
transform_type=transform_type,
|
320
295
|
short_name=short_name,
|
321
|
-
filepath=filepath,
|
322
296
|
transform=transform,
|
323
297
|
)
|
298
|
+
# if no error is raised, the transform is tracked
|
299
|
+
is_tracked = True
|
324
300
|
if not is_tracked:
|
325
301
|
raise_transform_settings_error()
|
326
302
|
else:
|
@@ -437,17 +413,17 @@ class run_context:
|
|
437
413
|
def _create_or_load_transform(
|
438
414
|
cls,
|
439
415
|
*,
|
440
|
-
|
416
|
+
stem_uid: str,
|
441
417
|
version: Optional[str],
|
442
418
|
name: str,
|
443
|
-
reference: Optional[str],
|
444
|
-
short_name: Optional[str],
|
445
|
-
transform_type: TransformType,
|
446
|
-
filepath: str,
|
419
|
+
reference: Optional[str] = None,
|
420
|
+
short_name: Optional[str] = None,
|
421
|
+
transform_type: TransformType = None,
|
447
422
|
transform: Optional[Transform] = None,
|
448
|
-
)
|
423
|
+
):
|
449
424
|
# make a new transform record
|
450
425
|
if transform is None:
|
426
|
+
uid = f"{stem_uid}{get_uid_ext(version)}"
|
451
427
|
transform = Transform(
|
452
428
|
uid=uid,
|
453
429
|
version=version,
|
@@ -459,7 +435,25 @@ class run_context:
|
|
459
435
|
transform.save()
|
460
436
|
logger.important(f"saved: {transform}")
|
461
437
|
else:
|
462
|
-
# check whether there was an update
|
438
|
+
# check whether there was an update to the transform, like
|
439
|
+
# renaming the file or updating the title
|
440
|
+
if transform.name != name or transform.short_name != short_name:
|
441
|
+
if os.getenv("LAMIN_TESTING") is None:
|
442
|
+
response = input(
|
443
|
+
"Updated transform filename and/or title: Do you want to assign a"
|
444
|
+
" new stem_uid or version? (y/n)"
|
445
|
+
)
|
446
|
+
else:
|
447
|
+
response = "y"
|
448
|
+
if response == "y":
|
449
|
+
# will raise SystemExit
|
450
|
+
update_stem_uid_or_version(stem_uid, version)
|
451
|
+
else:
|
452
|
+
transform.name = name
|
453
|
+
transform.short_name = short_name
|
454
|
+
transform.save()
|
455
|
+
logger.important(f"updated: {transform}")
|
456
|
+
# check whether the transform artifacts were already saved
|
463
457
|
if (
|
464
458
|
transform.source_code_id is not None
|
465
459
|
or transform.latest_report_id is not None
|
@@ -472,28 +466,10 @@ class run_context:
|
|
472
466
|
else:
|
473
467
|
response = "y"
|
474
468
|
if response == "y":
|
475
|
-
|
469
|
+
update_stem_uid_or_version(stem_uid, version, bump_version=True)
|
476
470
|
else:
|
477
|
-
|
478
|
-
|
479
|
-
" the saved transform.source_code and transform.latest_report"
|
480
|
-
)
|
481
|
-
return False
|
482
|
-
if transform.name != name or transform.short_name != short_name:
|
483
|
-
response = input(
|
484
|
-
"Updated notebook name and/or title: Do you want to assign a"
|
485
|
-
" new uid prefix or version? (y/n)"
|
486
|
-
)
|
487
|
-
if response == "y":
|
488
|
-
update_transform_source(filepath)
|
489
|
-
transform.name = name
|
490
|
-
transform.short_name = short_name
|
491
|
-
transform.save()
|
492
|
-
if response == "y":
|
493
|
-
logger.important(f"saved: {transform}")
|
494
|
-
else:
|
495
|
-
logger.important(f"updated: {transform}")
|
471
|
+
# we want a new stem_uid in this case, hence raise the error
|
472
|
+
raise_transform_settings_error()
|
496
473
|
else:
|
497
474
|
logger.important(f"loaded: {transform}")
|
498
475
|
cls.transform = transform
|
499
|
-
return True
|
lamindb/core/_settings.py
CHANGED
@@ -3,6 +3,7 @@ from typing import Dict, Literal, Mapping, Optional, Tuple, Union
|
|
3
3
|
|
4
4
|
import lamindb_setup as ln_setup
|
5
5
|
from lamin_utils import logger
|
6
|
+
from lamindb_setup._add_remote_storage import switch_default_storage
|
6
7
|
from upath import UPath
|
7
8
|
|
8
9
|
VERBOSITY_TO_INT = {
|
@@ -112,11 +113,14 @@ class Settings:
|
|
112
113
|
def storage(
|
113
114
|
self, path_kwargs: Union[str, Path, UPath, Tuple[Union[str, UPath], Mapping]]
|
114
115
|
):
|
116
|
+
logger.warning(
|
117
|
+
"you'll no longer be able to set arbitrary storage locations soon"
|
118
|
+
)
|
115
119
|
if isinstance(path_kwargs, tuple):
|
116
120
|
path, kwargs = path_kwargs
|
117
121
|
else:
|
118
122
|
path, kwargs = path_kwargs, {}
|
119
|
-
|
123
|
+
switch_default_storage(path, **kwargs)
|
120
124
|
|
121
125
|
@property
|
122
126
|
def verbosity(self) -> str:
|
lamindb/core/storage/__init__.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
AnnDataAccessor
|
7
7
|
BackedAccessor
|
8
8
|
"""
|
9
|
-
from lamindb_setup.
|
9
|
+
from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
10
10
|
|
11
11
|
from ._anndata_sizes import size_adata
|
12
12
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
@@ -18,7 +18,7 @@ from anndata.compat import _read_attr
|
|
18
18
|
from fsspec.core import OpenFile
|
19
19
|
from fsspec.implementations.local import LocalFileSystem
|
20
20
|
from lamin_utils import logger
|
21
|
-
from lamindb_setup.
|
21
|
+
from lamindb_setup.core.upath import UPath, create_mapper, infer_filesystem
|
22
22
|
from lnschema_core import Artifact
|
23
23
|
from packaging import version
|
24
24
|
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -6,7 +6,7 @@ import zarr
|
|
6
6
|
from anndata import AnnData
|
7
7
|
from anndata._io import read_zarr
|
8
8
|
from anndata._io.specs import write_elem
|
9
|
-
from lamindb_setup.
|
9
|
+
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
10
10
|
|
11
11
|
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
12
12
|
|
lamindb/core/storage/file.py
CHANGED
@@ -8,8 +8,9 @@ import anndata as ad
|
|
8
8
|
import pandas as pd
|
9
9
|
from lamin_utils import logger
|
10
10
|
from lamindb_setup import settings as setup_settings
|
11
|
-
from lamindb_setup.
|
12
|
-
from lamindb_setup.
|
11
|
+
from lamindb_setup.core import StorageSettings
|
12
|
+
from lamindb_setup.core.types import UPathStr
|
13
|
+
from lamindb_setup.core.upath import (
|
13
14
|
LocalPathClasses,
|
14
15
|
UPath,
|
15
16
|
create_path,
|
@@ -103,7 +104,7 @@ def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
|
|
103
104
|
return adata
|
104
105
|
|
105
106
|
|
106
|
-
def store_artifact(localpath:
|
107
|
+
def store_artifact(localpath: UPathStr, storagepath: UPath) -> None:
|
107
108
|
"""Store directory or file to configured storage location.
|
108
109
|
|
109
110
|
Returns size in bytes.
|
@@ -132,8 +133,12 @@ def delete_storage_using_key(
|
|
132
133
|
delete_storage(filepath)
|
133
134
|
|
134
135
|
|
135
|
-
def delete_storage(storagepath:
|
136
|
+
def delete_storage(storagepath: Path):
|
136
137
|
"""Delete arbitrary artifact."""
|
138
|
+
if not storagepath.is_relative_to(settings.storage):
|
139
|
+
logger.warning("couldn't delete files outside of default storage")
|
140
|
+
return "did-not-delete"
|
141
|
+
# only delete files in the default storage
|
137
142
|
if storagepath.is_file():
|
138
143
|
storagepath.unlink()
|
139
144
|
elif storagepath.is_dir():
|
@@ -156,12 +161,12 @@ def read_fcs(*args, **kwargs):
|
|
156
161
|
return readfcs.read(*args, **kwargs)
|
157
162
|
|
158
163
|
|
159
|
-
def read_tsv(path:
|
164
|
+
def read_tsv(path: UPathStr) -> pd.DataFrame:
|
160
165
|
path_sanitized = Path(path)
|
161
166
|
return pd.read_csv(path_sanitized, sep="\t")
|
162
167
|
|
163
168
|
|
164
|
-
def load_html(path:
|
169
|
+
def load_html(path: UPathStr):
|
165
170
|
if is_run_from_ipython:
|
166
171
|
with open(path, encoding="utf-8") as f:
|
167
172
|
html_content = f.read()
|
@@ -180,7 +185,7 @@ def load_html(path: Union[str, Path, UPath]):
|
|
180
185
|
return path
|
181
186
|
|
182
187
|
|
183
|
-
def load_json(path:
|
188
|
+
def load_json(path: UPathStr):
|
184
189
|
import json
|
185
190
|
|
186
191
|
with open(path) as f:
|
@@ -188,7 +193,7 @@ def load_json(path: Union[str, Path, UPath]):
|
|
188
193
|
return data
|
189
194
|
|
190
195
|
|
191
|
-
def load_to_memory(filepath:
|
196
|
+
def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
|
192
197
|
"""Load a file into memory.
|
193
198
|
|
194
199
|
Returns the filepath if no in-memory form is found.
|
lamindb/core/storage/object.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from typing import Optional, Union
|
1
|
+
from typing import Optional
|
3
2
|
|
4
3
|
from anndata import AnnData
|
5
|
-
from lamindb_setup.
|
4
|
+
from lamindb_setup.core.types import UPathStr
|
6
5
|
from pandas import DataFrame
|
7
6
|
|
8
7
|
|
@@ -25,7 +24,7 @@ def infer_suffix(dmem, adata_format: Optional[str] = None):
|
|
25
24
|
raise NotImplementedError
|
26
25
|
|
27
26
|
|
28
|
-
def write_to_file(dmem, filepath:
|
27
|
+
def write_to_file(dmem, filepath: UPathStr):
|
29
28
|
if isinstance(dmem, AnnData):
|
30
29
|
dmem.write(filepath)
|
31
30
|
elif isinstance(dmem, DataFrame):
|
lamindb/core/types.py
CHANGED
@@ -3,16 +3,16 @@
|
|
3
3
|
.. autosummary::
|
4
4
|
:toctree: .
|
5
5
|
|
6
|
-
|
6
|
+
UPathStr
|
7
7
|
DataLike
|
8
8
|
StrField
|
9
9
|
ListLike
|
10
10
|
TransformType
|
11
11
|
"""
|
12
|
+
from lamindb_setup.core.types import UPathStr
|
12
13
|
from lnschema_core.types import (
|
13
14
|
DataLike,
|
14
15
|
ListLike,
|
15
|
-
PathLike,
|
16
16
|
StrField,
|
17
17
|
TransformType,
|
18
18
|
)
|
lamindb/core/versioning.py
CHANGED