lamindb 0.76.3__py3-none-any.whl → 0.76.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +6 -9
- lamindb/_artifact.py +43 -30
- lamindb/_can_validate.py +20 -4
- lamindb/_curate.py +23 -6
- lamindb/_finish.py +117 -106
- lamindb/_query_set.py +41 -33
- lamindb/_record.py +3 -2
- lamindb/_transform.py +5 -3
- lamindb/core/__init__.py +0 -2
- lamindb/core/_context.py +71 -37
- lamindb/core/_label_manager.py +1 -3
- lamindb/core/storage/_anndata_accessor.py +6 -1
- lamindb/core/storage/_tiledbsoma.py +24 -20
- lamindb/core/versioning.py +4 -0
- lamindb/integrations/_vitessce.py +9 -15
- {lamindb-0.76.3.dist-info → lamindb-0.76.5.dist-info}/METADATA +7 -7
- {lamindb-0.76.3.dist-info → lamindb-0.76.5.dist-info}/RECORD +19 -19
- {lamindb-0.76.3.dist-info → lamindb-0.76.5.dist-info}/LICENSE +0 -0
- {lamindb-0.76.3.dist-info → lamindb-0.76.5.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""A data framework for biology.
|
2
2
|
|
3
|
-
|
4
|
-
=======
|
3
|
+
Core registries.
|
5
4
|
|
6
5
|
.. autosummary::
|
7
6
|
:toctree: .
|
@@ -17,8 +16,7 @@ Records
|
|
17
16
|
FeatureSet
|
18
17
|
Param
|
19
18
|
|
20
|
-
Key functionality
|
21
|
-
=================
|
19
|
+
Key functionality.
|
22
20
|
|
23
21
|
.. autosummary::
|
24
22
|
:toctree: .
|
@@ -29,8 +27,7 @@ Key functionality
|
|
29
27
|
view
|
30
28
|
save
|
31
29
|
|
32
|
-
Modules
|
33
|
-
==================
|
30
|
+
Modules and settings.
|
34
31
|
|
35
32
|
.. autosummary::
|
36
33
|
:toctree: .
|
@@ -44,7 +41,7 @@ Modules & settings
|
|
44
41
|
"""
|
45
42
|
|
46
43
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
47
|
-
__version__ = "0.76.
|
44
|
+
__version__ = "0.76.5"
|
48
45
|
|
49
46
|
import os as _os
|
50
47
|
|
@@ -111,6 +108,6 @@ if _check_instance_setup(from_lamindb=True):
|
|
111
108
|
track = context.track # backward compat
|
112
109
|
finish = context.finish # backward compat
|
113
110
|
Curate = Curator # backward compat
|
114
|
-
settings.__doc__ = """Global :class:`~lamindb.core.Settings
|
115
|
-
context.__doc__ = """Global :class:`~lamindb.core.Context
|
111
|
+
settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
|
112
|
+
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
|
116
113
|
from django.db.models import Q
|
lamindb/_artifact.py
CHANGED
@@ -366,11 +366,6 @@ def get_artifact_kwargs_from_data(
|
|
366
366
|
else:
|
367
367
|
storage = default_storage
|
368
368
|
|
369
|
-
# for now comment out this error to allow creating new versions of stores
|
370
|
-
# in the default folder (.lamindb)
|
371
|
-
# if key is not None and key.startswith(AUTO_KEY_PREFIX):
|
372
|
-
# raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
|
373
|
-
|
374
369
|
log_storage_hint(
|
375
370
|
check_path_in_storage=check_path_in_storage,
|
376
371
|
storage=storage,
|
@@ -542,10 +537,10 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
542
537
|
else VisibilityChoice.default.value
|
543
538
|
)
|
544
539
|
format = kwargs.pop("format") if "format" in kwargs else None
|
540
|
+
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
545
541
|
skip_check_exists = (
|
546
542
|
kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
|
547
543
|
)
|
548
|
-
_uid = kwargs.pop("_uid", None)
|
549
544
|
if "default_storage" in kwargs:
|
550
545
|
default_storage = kwargs.pop("default_storage")
|
551
546
|
else:
|
@@ -561,9 +556,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
561
556
|
if "is_new_version_of" in kwargs:
|
562
557
|
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
563
558
|
revises = kwargs.pop("is_new_version_of")
|
564
|
-
assert not ( # noqa: S101
|
565
|
-
revises is not None and _uid is not None
|
566
|
-
), "Can not init with both `revises` and `_uid`"
|
567
559
|
if not len(kwargs) == 0:
|
568
560
|
raise ValueError(
|
569
561
|
"Only data, key, run, description, version, revises, visibility"
|
@@ -579,15 +571,29 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
579
571
|
raise ValueError(
|
580
572
|
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
581
573
|
)
|
582
|
-
if _uid is not None:
|
583
|
-
provisional_uid, revises = _uid, None
|
584
|
-
else:
|
585
|
-
provisional_uid, revises = create_uid(revises=revises, version=version)
|
586
574
|
if revises is not None:
|
587
575
|
if not isinstance(revises, Artifact):
|
588
576
|
raise TypeError("`revises` has to be of type `Artifact`")
|
589
577
|
if description is None:
|
590
578
|
description = revises.description
|
579
|
+
if key is not None and AUTO_KEY_PREFIX in key:
|
580
|
+
raise ValueError(
|
581
|
+
f"Do not pass key that contains a managed storage path in `{AUTO_KEY_PREFIX}`"
|
582
|
+
)
|
583
|
+
# below is for internal calls that require defining the storage location
|
584
|
+
# ahead of constructing the Artifact
|
585
|
+
if isinstance(data, (str, Path)) and AUTO_KEY_PREFIX in str(data):
|
586
|
+
if _is_internal_call:
|
587
|
+
is_automanaged_path = True
|
588
|
+
user_provided_key = key
|
589
|
+
key = None
|
590
|
+
else:
|
591
|
+
raise ValueError(
|
592
|
+
f"Do not pass path inside the `{AUTO_KEY_PREFIX}` directory."
|
593
|
+
)
|
594
|
+
else:
|
595
|
+
is_automanaged_path = False
|
596
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
591
597
|
kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
|
592
598
|
data=data,
|
593
599
|
key=key,
|
@@ -615,16 +621,29 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
615
621
|
else:
|
616
622
|
kwargs = kwargs_or_artifact
|
617
623
|
|
624
|
+
if data is not None:
|
625
|
+
artifact._local_filepath = privates["local_filepath"]
|
626
|
+
artifact._cloud_filepath = privates["cloud_filepath"]
|
627
|
+
artifact._memory_rep = privates["memory_rep"]
|
628
|
+
artifact._to_store = not privates["check_path_in_storage"]
|
629
|
+
|
630
|
+
if is_automanaged_path and _is_internal_call:
|
631
|
+
kwargs["_key_is_virtual"] = True
|
632
|
+
assert AUTO_KEY_PREFIX in kwargs["key"] # noqa: S101
|
633
|
+
uid = kwargs["key"].replace(AUTO_KEY_PREFIX, "").replace(kwargs["suffix"], "")
|
634
|
+
kwargs["key"] = user_provided_key
|
635
|
+
if revises is not None:
|
636
|
+
assert uid.startswith(revises.stem_uid) # noqa: S101
|
637
|
+
if len(uid) == 16:
|
638
|
+
if revises is None:
|
639
|
+
uid += "0000"
|
640
|
+
else:
|
641
|
+
uid, revises = create_uid(revises=revises, version=version)
|
642
|
+
kwargs["uid"] = uid
|
643
|
+
|
618
644
|
# only set key now so that we don't do a look-up on it in case revises is passed
|
619
645
|
if revises is not None:
|
620
646
|
kwargs["key"] = revises.key
|
621
|
-
# in case we have a new version of a folder with a different hash, print a
|
622
|
-
# warning that the old version can't be recovered
|
623
|
-
if revises is not None and revises.n_objects is not None and revises.n_objects > 1:
|
624
|
-
logger.warning(
|
625
|
-
f"artifact version {version} will _update_ the state of folder {revises.path} - "
|
626
|
-
"to _retain_ the old state by duplicating the entire folder, do _not_ pass `revises`"
|
627
|
-
)
|
628
647
|
|
629
648
|
kwargs["type"] = type
|
630
649
|
kwargs["version"] = version
|
@@ -643,12 +662,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
643
662
|
|
644
663
|
add_transform_to_kwargs(kwargs, kwargs["run"])
|
645
664
|
|
646
|
-
if data is not None:
|
647
|
-
artifact._local_filepath = privates["local_filepath"]
|
648
|
-
artifact._cloud_filepath = privates["cloud_filepath"]
|
649
|
-
artifact._memory_rep = privates["memory_rep"]
|
650
|
-
artifact._to_store = not privates["check_path_in_storage"]
|
651
|
-
|
652
665
|
super(Artifact, artifact).__init__(**kwargs)
|
653
666
|
|
654
667
|
|
@@ -943,10 +956,9 @@ def open(
|
|
943
956
|
if self.hash != hash:
|
944
957
|
from ._record import init_self_from_db
|
945
958
|
|
946
|
-
|
947
|
-
|
948
|
-
)
|
949
|
-
new_version = Artifact(filepath, revises=self).save()
|
959
|
+
new_version = Artifact(
|
960
|
+
filepath, revises=self, _is_internal_call=True
|
961
|
+
).save()
|
950
962
|
init_self_from_db(self, new_version)
|
951
963
|
|
952
964
|
if localpath != filepath and localpath.exists():
|
@@ -1174,3 +1186,4 @@ Artifact._delete_skip_storage = _delete_skip_storage
|
|
1174
1186
|
Artifact._save_skip_storage = _save_skip_storage
|
1175
1187
|
Artifact.path = path
|
1176
1188
|
Artifact.backed = backed
|
1189
|
+
Artifact.view_lineage = HasFeatures.view_lineage
|
lamindb/_can_validate.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING,
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
5
|
import lamindb_setup as ln_setup
|
6
6
|
import numpy as np
|
@@ -79,6 +79,19 @@ def _check_organism_db(organism: Record, using_key: str | None):
|
|
79
79
|
)
|
80
80
|
|
81
81
|
|
82
|
+
def _concat_lists(values: ListLike) -> list[str]:
|
83
|
+
"""Concatenate a list of lists of strings into a single list."""
|
84
|
+
if len(values) > 0 and isinstance(values, (list, pd.Series)):
|
85
|
+
try:
|
86
|
+
if isinstance(values[0], list):
|
87
|
+
if isinstance(values, pd.Series):
|
88
|
+
values = values.tolist()
|
89
|
+
values = sum([v for v in values if isinstance(v, list)], [])
|
90
|
+
except KeyError:
|
91
|
+
pass
|
92
|
+
return values
|
93
|
+
|
94
|
+
|
82
95
|
def _inspect(
|
83
96
|
cls,
|
84
97
|
values: ListLike,
|
@@ -94,6 +107,7 @@ def _inspect(
|
|
94
107
|
|
95
108
|
if isinstance(values, str):
|
96
109
|
values = [values]
|
110
|
+
values = _concat_lists(values)
|
97
111
|
|
98
112
|
field = get_name_field(cls, field=field)
|
99
113
|
queryset = _queryset(cls, using_key)
|
@@ -184,6 +198,7 @@ def _validate(
|
|
184
198
|
return_str = True if isinstance(values, str) else False
|
185
199
|
if isinstance(values, str):
|
186
200
|
values = [values]
|
201
|
+
values = _concat_lists(values)
|
187
202
|
|
188
203
|
field = get_name_field(cls, field=field)
|
189
204
|
|
@@ -229,7 +244,7 @@ def _validate(
|
|
229
244
|
@doc_args(CanValidate.standardize.__doc__)
|
230
245
|
def standardize(
|
231
246
|
cls,
|
232
|
-
values:
|
247
|
+
values: ListLike,
|
233
248
|
field: str | StrField | None = None,
|
234
249
|
*,
|
235
250
|
return_field: str = None,
|
@@ -295,7 +310,7 @@ def remove_synonym(self, synonym: str | ListLike):
|
|
295
310
|
|
296
311
|
def _standardize(
|
297
312
|
cls,
|
298
|
-
values:
|
313
|
+
values: ListLike,
|
299
314
|
field: str | StrField | None = None,
|
300
315
|
*,
|
301
316
|
return_field: str = None,
|
@@ -315,6 +330,7 @@ def _standardize(
|
|
315
330
|
return_str = True if isinstance(values, str) else False
|
316
331
|
if isinstance(values, str):
|
317
332
|
values = [values]
|
333
|
+
values = _concat_lists(values)
|
318
334
|
|
319
335
|
field = get_name_field(cls, field=field)
|
320
336
|
return_field = get_name_field(
|
@@ -416,7 +432,7 @@ def _standardize(
|
|
416
432
|
|
417
433
|
|
418
434
|
def _add_or_remove_synonyms(
|
419
|
-
synonym: str |
|
435
|
+
synonym: str | ListLike,
|
420
436
|
record: Record,
|
421
437
|
action: Literal["add", "remove"],
|
422
438
|
force: bool = False,
|
lamindb/_curate.py
CHANGED
@@ -144,6 +144,7 @@ class DataFrameCurator(BaseCurator):
|
|
144
144
|
organism: str | None = None,
|
145
145
|
sources: dict[str, Record] | None = None,
|
146
146
|
exclude: dict | None = None,
|
147
|
+
check_valid_keys: bool = True,
|
147
148
|
) -> None:
|
148
149
|
from lamindb.core._settings import settings
|
149
150
|
|
@@ -163,6 +164,8 @@ class DataFrameCurator(BaseCurator):
|
|
163
164
|
exclude = {}
|
164
165
|
self._exclude = exclude
|
165
166
|
self._non_validated = None
|
167
|
+
if check_valid_keys:
|
168
|
+
self._check_valid_keys()
|
166
169
|
self._save_columns()
|
167
170
|
|
168
171
|
@property
|
@@ -191,14 +194,25 @@ class DataFrameCurator(BaseCurator):
|
|
191
194
|
using_key=using_key or self._using_key,
|
192
195
|
)
|
193
196
|
|
197
|
+
def _check_valid_keys(self, extra: set = None) -> None:
|
198
|
+
if extra is None:
|
199
|
+
extra = set()
|
200
|
+
for name, d in {
|
201
|
+
"categoricals": self._fields,
|
202
|
+
"sources": self._sources,
|
203
|
+
"exclude": self._exclude,
|
204
|
+
}.items():
|
205
|
+
if not isinstance(d, dict):
|
206
|
+
raise TypeError(f"{name} must be a dictionary!")
|
207
|
+
valid_keys = set(self._df.columns) | {"columns"} | extra
|
208
|
+
nonval_keys = [key for key in d.keys() if key not in valid_keys]
|
209
|
+
if len(nonval_keys) > 0:
|
210
|
+
raise ValueError(
|
211
|
+
f"the following keys passed to {name} are not allowed: {nonval_keys}"
|
212
|
+
)
|
213
|
+
|
194
214
|
def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
|
195
215
|
"""Save column name records."""
|
196
|
-
missing_columns = set(self.fields.keys()) - set(self._df.columns)
|
197
|
-
if missing_columns:
|
198
|
-
raise ValueError(
|
199
|
-
f"Columns {missing_columns} are not found in the data object!"
|
200
|
-
)
|
201
|
-
|
202
216
|
# Always save features specified as the fields keys
|
203
217
|
update_registry(
|
204
218
|
values=list(self.fields.keys()),
|
@@ -426,8 +440,10 @@ class AnnDataCurator(DataFrameCurator):
|
|
426
440
|
organism=organism,
|
427
441
|
sources=sources,
|
428
442
|
exclude=exclude,
|
443
|
+
check_valid_keys=False,
|
429
444
|
)
|
430
445
|
self._obs_fields = categoricals
|
446
|
+
self._check_valid_keys(extra={"var_index"})
|
431
447
|
|
432
448
|
@property
|
433
449
|
def var_index(self) -> FieldAttr:
|
@@ -633,6 +649,7 @@ class MuDataCurator:
|
|
633
649
|
verbosity=verbosity,
|
634
650
|
sources=self._sources.get(modality),
|
635
651
|
exclude=self._exclude.get(modality),
|
652
|
+
check_valid_keys=False,
|
636
653
|
**self._kwargs,
|
637
654
|
)
|
638
655
|
for modality in self._modalities
|
lamindb/_finish.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import shutil
|
5
|
-
import subprocess
|
6
6
|
from datetime import datetime, timezone
|
7
7
|
from typing import TYPE_CHECKING
|
8
8
|
|
@@ -18,13 +18,83 @@ if TYPE_CHECKING:
|
|
18
18
|
from ._query_set import QuerySet
|
19
19
|
|
20
20
|
|
21
|
+
# this is from the get_title function in nbproject
|
22
|
+
# should be moved into lamindb sooner or later
|
23
|
+
def prepare_notebook(
|
24
|
+
nb,
|
25
|
+
strip_title: bool = False,
|
26
|
+
) -> str | None:
|
27
|
+
"""Strip title from the notebook if requested."""
|
28
|
+
title_found = False
|
29
|
+
for cell in nb.cells:
|
30
|
+
cell.metadata.clear() # strip cell metadata
|
31
|
+
if not title_found and cell["cell_type"] == "markdown":
|
32
|
+
lines = cell["source"].split("\n")
|
33
|
+
for i, line in enumerate(lines):
|
34
|
+
if line.startswith("# "):
|
35
|
+
line.lstrip("#").strip(" .").strip()
|
36
|
+
title_found = True
|
37
|
+
if strip_title:
|
38
|
+
lines.pop(i)
|
39
|
+
cell["source"] = "\n".join(lines)
|
40
|
+
return None
|
41
|
+
|
42
|
+
|
43
|
+
def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
|
44
|
+
import nbformat
|
45
|
+
import traitlets.config as config
|
46
|
+
from nbconvert import HTMLExporter
|
47
|
+
|
48
|
+
with open(notebook_path, encoding="utf-8") as f:
|
49
|
+
notebook = nbformat.read(f, as_version=4)
|
50
|
+
prepare_notebook(notebook, strip_title=True)
|
51
|
+
notebook.metadata.clear() # strip notebook metadata
|
52
|
+
# if we were to export as ipynb, the following two lines would do it
|
53
|
+
# with open(output_path, "w", encoding="utf-8") as f:
|
54
|
+
# nbformat.write(notebook, f)
|
55
|
+
# instead we need all this code
|
56
|
+
c = config.Config()
|
57
|
+
c.HTMLExporter.preprocessors = []
|
58
|
+
c.HTMLExporter.exclude_input_prompt = True
|
59
|
+
c.HTMLExporter.exclude_output_prompt = True
|
60
|
+
c.HTMLExporter.anchor_link_text = " "
|
61
|
+
html_exporter = HTMLExporter(config=c)
|
62
|
+
html, _ = html_exporter.from_notebook_node(notebook)
|
63
|
+
output_path.write_text(html, encoding="utf-8")
|
64
|
+
|
65
|
+
|
66
|
+
def notebook_to_script(
|
67
|
+
transform: Transform, notebook_path: Path, script_path: Path
|
68
|
+
) -> None:
|
69
|
+
import jupytext
|
70
|
+
|
71
|
+
notebook = jupytext.read(notebook_path)
|
72
|
+
py_content = jupytext.writes(notebook, fmt="py:percent")
|
73
|
+
# remove global metadata header
|
74
|
+
py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
|
75
|
+
# replace title
|
76
|
+
py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
|
77
|
+
script_path.write_text(py_content)
|
78
|
+
|
79
|
+
|
80
|
+
def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
|
81
|
+
import jupytext
|
82
|
+
|
83
|
+
# get title back
|
84
|
+
py_content = transform.source_code.replace(
|
85
|
+
"# # transform.name", f"# # {transform.name}"
|
86
|
+
)
|
87
|
+
notebook = jupytext.reads(py_content, fmt="py:percent")
|
88
|
+
jupytext.write(notebook, notebook_path)
|
89
|
+
|
90
|
+
|
21
91
|
def save_context_core(
|
22
92
|
*,
|
23
93
|
run: Run,
|
24
94
|
transform: Transform,
|
25
95
|
filepath: Path,
|
26
|
-
transform_family: QuerySet | None = None,
|
27
96
|
finished_at: bool = False,
|
97
|
+
ignore_non_consecutive: bool | None = None,
|
28
98
|
from_cli: bool = False,
|
29
99
|
) -> str | None:
|
30
100
|
import lamindb as ln
|
@@ -36,106 +106,63 @@ def save_context_core(
|
|
36
106
|
# for scripts, things are easy
|
37
107
|
is_consecutive = True
|
38
108
|
is_notebook = transform.type == "notebook"
|
39
|
-
|
109
|
+
source_code_path = filepath
|
40
110
|
# for notebooks, we need more work
|
41
111
|
if is_notebook:
|
42
112
|
try:
|
43
|
-
import
|
113
|
+
import jupytext
|
44
114
|
from nbproject.dev import (
|
45
115
|
check_consecutiveness,
|
46
116
|
read_notebook,
|
47
117
|
)
|
48
118
|
except ImportError:
|
49
|
-
logger.error(
|
50
|
-
"install nbproject & nbstripout: pip install nbproject nbstripout"
|
51
|
-
)
|
119
|
+
logger.error("install nbproject & jupytext: pip install nbproject jupytext")
|
52
120
|
return None
|
53
121
|
notebook_content = read_notebook(filepath) # type: ignore
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
"jupyter",
|
70
|
-
"nbconvert",
|
71
|
-
"--to",
|
72
|
-
"html",
|
73
|
-
filepath.as_posix(),
|
74
|
-
"--Application.log_level=40",
|
75
|
-
],
|
76
|
-
check=True,
|
77
|
-
)
|
78
|
-
# move the temporary file into the cache dir in case it's accidentally
|
79
|
-
# in an existing storage location -> we want to move associated
|
80
|
-
# artifacts into default storage and not register them in an existing
|
81
|
-
# location
|
82
|
-
report_path_orig = filepath.with_suffix(".html") # current location
|
83
|
-
report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
|
84
|
-
# don't use Path.rename here because of cross-device link error
|
85
|
-
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
86
|
-
shutil.move(
|
87
|
-
report_path_orig, # type: ignore
|
88
|
-
report_path,
|
122
|
+
if not ignore_non_consecutive: # ignore_non_consecutive is None or False
|
123
|
+
is_consecutive = check_consecutiveness(
|
124
|
+
notebook_content, calling_statement=".finish()"
|
125
|
+
)
|
126
|
+
if not is_consecutive:
|
127
|
+
response = "n" # ignore_non_consecutive == False
|
128
|
+
if ignore_non_consecutive is None:
|
129
|
+
response = input(
|
130
|
+
" Do you still want to proceed with finishing? (y/n) "
|
131
|
+
)
|
132
|
+
if response != "y":
|
133
|
+
return "aborted-non-consecutive"
|
134
|
+
# write the report
|
135
|
+
report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
|
136
|
+
".ipynb", ".html"
|
89
137
|
)
|
90
|
-
|
91
|
-
#
|
92
|
-
|
93
|
-
|
94
|
-
subprocess.run(
|
95
|
-
[
|
96
|
-
"nbstripout",
|
97
|
-
_source_code_artifact_path,
|
98
|
-
"--extra-keys",
|
99
|
-
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
|
100
|
-
],
|
101
|
-
check=True,
|
138
|
+
notebook_to_report(filepath, report_path)
|
139
|
+
# write the source code
|
140
|
+
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
|
141
|
+
".ipynb", ".py"
|
102
142
|
)
|
103
|
-
|
104
|
-
prev_report = None
|
105
|
-
prev_source = None
|
106
|
-
if transform_family is None:
|
107
|
-
transform_family = transform.versions
|
108
|
-
if len(transform_family) > 0:
|
109
|
-
for prev_transform in transform_family.order_by("-created_at"):
|
110
|
-
if (
|
111
|
-
prev_transform.latest_run is not None
|
112
|
-
and prev_transform.latest_run.report_id is not None
|
113
|
-
):
|
114
|
-
prev_report = prev_transform.latest_run.report
|
115
|
-
if prev_transform._source_code_artifact_id is not None:
|
116
|
-
prev_source = prev_transform._source_code_artifact
|
143
|
+
notebook_to_script(transform, filepath, source_code_path)
|
117
144
|
ln.settings.creation.artifact_silence_missing_run_warning = True
|
118
|
-
|
119
145
|
# track source code
|
120
|
-
|
146
|
+
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
147
|
+
if (
|
148
|
+
transform._source_code_artifact_id is not None
|
149
|
+
or transform.source_code is not None # equivalent to transform.hash is not None
|
150
|
+
):
|
121
151
|
# check if the hash of the transform source code matches
|
122
152
|
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
123
|
-
|
124
|
-
|
125
|
-
if
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
)
|
131
|
-
|
132
|
-
|
153
|
+
ref_hash = (
|
154
|
+
transform.hash
|
155
|
+
if transform.hash is not None
|
156
|
+
else transform._source_code_artifact.hash
|
157
|
+
)
|
158
|
+
if hash != ref_hash:
|
159
|
+
response = input(
|
160
|
+
f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
|
161
|
+
f"Proceed? (y/n)"
|
162
|
+
)
|
133
163
|
if response == "y":
|
134
|
-
transform.
|
135
|
-
transform.
|
136
|
-
logger.success(
|
137
|
-
f"replaced transform._source_code_artifact: {transform._source_code_artifact}"
|
138
|
-
)
|
164
|
+
transform.source_code = source_code_path.read_text()
|
165
|
+
transform.hash = hash
|
139
166
|
else:
|
140
167
|
logger.warning(
|
141
168
|
"Please re-run `ln.context.track()` to make a new version"
|
@@ -144,19 +171,8 @@ def save_context_core(
|
|
144
171
|
else:
|
145
172
|
logger.important("source code is already saved")
|
146
173
|
else:
|
147
|
-
|
148
|
-
|
149
|
-
description=f"Source of transform {transform.uid}",
|
150
|
-
version=transform.version,
|
151
|
-
revises=prev_source,
|
152
|
-
visibility=0, # hidden file
|
153
|
-
run=False,
|
154
|
-
)
|
155
|
-
_source_code_artifact.save(upload=True, print_progress=False)
|
156
|
-
transform._source_code_artifact = _source_code_artifact
|
157
|
-
logger.debug(
|
158
|
-
f"saved transform._source_code_artifact: {transform._source_code_artifact}"
|
159
|
-
)
|
174
|
+
transform.source_code = source_code_path.read_text()
|
175
|
+
transform.hash = hash
|
160
176
|
|
161
177
|
# track environment
|
162
178
|
env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
@@ -193,13 +209,9 @@ def save_context_core(
|
|
193
209
|
if run.report_id is not None:
|
194
210
|
hash, _ = hash_file(report_path) # ignore hash_type for now
|
195
211
|
if hash != run.report.hash:
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
|
200
|
-
)
|
201
|
-
else:
|
202
|
-
response = "y"
|
212
|
+
response = input(
|
213
|
+
f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
|
214
|
+
)
|
203
215
|
if response == "y":
|
204
216
|
run.report.replace(report_path)
|
205
217
|
run.report.save(upload=True)
|
@@ -211,7 +223,6 @@ def save_context_core(
|
|
211
223
|
report_file = ln.Artifact(
|
212
224
|
report_path,
|
213
225
|
description=f"Report of run {run.uid}",
|
214
|
-
revises=prev_report,
|
215
226
|
visibility=0, # hidden file
|
216
227
|
run=False,
|
217
228
|
)
|
lamindb/_query_set.py
CHANGED
@@ -132,7 +132,7 @@ class RecordsList(UserList):
|
|
132
132
|
return one_helper(self)
|
133
133
|
|
134
134
|
|
135
|
-
class QuerySet(models.QuerySet
|
135
|
+
class QuerySet(models.QuerySet):
|
136
136
|
"""Sets of records returned by queries.
|
137
137
|
|
138
138
|
See Also:
|
@@ -301,42 +301,50 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
301
301
|
else:
|
302
302
|
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
303
303
|
|
304
|
-
@doc_args(Record.search.__doc__)
|
305
|
-
def search(self, string: str, **kwargs):
|
306
|
-
"""{}""" # noqa: D415
|
307
|
-
from ._record import _search
|
308
304
|
|
309
|
-
|
305
|
+
# -------------------------------------------------------------------------------------
|
306
|
+
# CanValidate
|
307
|
+
# -------------------------------------------------------------------------------------
|
310
308
|
|
311
|
-
@doc_args(Record.lookup.__doc__)
|
312
|
-
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
313
|
-
"""{}""" # noqa: D415
|
314
|
-
from ._record import _lookup
|
315
309
|
|
316
|
-
|
310
|
+
@doc_args(Record.search.__doc__)
|
311
|
+
def search(self, string: str, **kwargs):
|
312
|
+
"""{}""" # noqa: D415
|
313
|
+
from ._record import _search
|
317
314
|
|
318
|
-
|
319
|
-
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
320
|
-
"""{}""" # noqa: D415
|
321
|
-
from ._can_validate import _validate
|
315
|
+
return _search(cls=self, string=string, **kwargs)
|
322
316
|
|
323
|
-
return _validate(cls=self, values=values, field=field, **kwargs)
|
324
317
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
318
|
+
@doc_args(Record.lookup.__doc__)
|
319
|
+
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
320
|
+
"""{}""" # noqa: D415
|
321
|
+
from ._record import _lookup
|
329
322
|
|
330
|
-
|
323
|
+
return _lookup(cls=self, field=field, **kwargs)
|
331
324
|
|
332
|
-
@doc_args(CanValidate.standardize.__doc__)
|
333
|
-
def standardize(
|
334
|
-
self, values: Iterable, field: str | StrField | None = None, **kwargs
|
335
|
-
):
|
336
|
-
"""{}""" # noqa: D415
|
337
|
-
from ._can_validate import _standardize
|
338
325
|
|
339
|
-
|
326
|
+
@doc_args(CanValidate.validate.__doc__)
|
327
|
+
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
328
|
+
"""{}""" # noqa: D415
|
329
|
+
from ._can_validate import _validate
|
330
|
+
|
331
|
+
return _validate(cls=self, values=values, field=field, **kwargs)
|
332
|
+
|
333
|
+
|
334
|
+
@doc_args(CanValidate.inspect.__doc__)
|
335
|
+
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
336
|
+
"""{}""" # noqa: D415
|
337
|
+
from ._can_validate import _inspect
|
338
|
+
|
339
|
+
return _inspect(cls=self, values=values, field=field, **kwargs)
|
340
|
+
|
341
|
+
|
342
|
+
@doc_args(CanValidate.standardize.__doc__)
|
343
|
+
def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
|
344
|
+
"""{}""" # noqa: D415
|
345
|
+
from ._can_validate import _standardize
|
346
|
+
|
347
|
+
return _standardize(cls=self, values=values, field=field, **kwargs)
|
340
348
|
|
341
349
|
|
342
350
|
models.QuerySet.df = QuerySet.df
|
@@ -345,10 +353,10 @@ models.QuerySet.first = QuerySet.first
|
|
345
353
|
models.QuerySet.one = QuerySet.one
|
346
354
|
models.QuerySet.one_or_none = QuerySet.one_or_none
|
347
355
|
models.QuerySet.latest_version = QuerySet.latest_version
|
348
|
-
models.QuerySet.search =
|
349
|
-
models.QuerySet.lookup =
|
350
|
-
models.QuerySet.validate =
|
351
|
-
models.QuerySet.inspect =
|
352
|
-
models.QuerySet.standardize =
|
356
|
+
models.QuerySet.search = search
|
357
|
+
models.QuerySet.lookup = lookup
|
358
|
+
models.QuerySet.validate = validate
|
359
|
+
models.QuerySet.inspect = inspect
|
360
|
+
models.QuerySet.standardize = standardize
|
353
361
|
models.QuerySet._delete_base_class = models.QuerySet.delete
|
354
362
|
models.QuerySet.delete = QuerySet.delete
|
lamindb/_record.py
CHANGED
@@ -57,7 +57,7 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
|
|
57
57
|
if kwargs.get("name") is None:
|
58
58
|
return False
|
59
59
|
queryset = _search(
|
60
|
-
record.__class__, kwargs["name"], field="name", truncate_words=True, limit=
|
60
|
+
record.__class__, kwargs["name"], field="name", truncate_words=True, limit=3
|
61
61
|
)
|
62
62
|
if not queryset.exists(): # empty queryset
|
63
63
|
return False
|
@@ -586,7 +586,8 @@ def delete(self) -> None:
|
|
586
586
|
# but that's for another time
|
587
587
|
if isinstance(self, IsVersioned) and self.is_latest:
|
588
588
|
new_latest = (
|
589
|
-
self.__class__.
|
589
|
+
self.__class__.objects.using(self._state.db)
|
590
|
+
.filter(is_latest=False, uid__startswith=self.stem_uid)
|
590
591
|
.order_by("-created_at")
|
591
592
|
.first()
|
592
593
|
)
|
lamindb/_transform.py
CHANGED
@@ -37,9 +37,11 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
37
37
|
"Only name, key, version, type, revises, reference, "
|
38
38
|
f"reference_type can be passed, but you passed: {kwargs}"
|
39
39
|
)
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
if revises is None:
|
41
|
+
if key is not None:
|
42
|
+
revises = Transform.filter(key=key).order_by("-created_at").first()
|
43
|
+
elif uid is not None and not uid.endswith("0000"):
|
44
|
+
revises = Transform.filter(uid__startswith=uid[:-4]).one_or_none()
|
43
45
|
if revises is not None and key is not None and revises.key != key:
|
44
46
|
note = message_update_key_in_version_family(
|
45
47
|
suid=revises.stem_uid,
|
lamindb/core/__init__.py
CHANGED
lamindb/core/_context.py
CHANGED
@@ -7,6 +7,7 @@ from datetime import datetime, timezone
|
|
7
7
|
from pathlib import Path, PurePath
|
8
8
|
from typing import TYPE_CHECKING
|
9
9
|
|
10
|
+
import lamindb_setup as ln_setup
|
10
11
|
from lamin_utils import logger
|
11
12
|
from lamindb_setup.core.hashing import hash_file
|
12
13
|
from lnschema_core import Run, Transform, ids
|
@@ -111,7 +112,18 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
111
112
|
class Context:
|
112
113
|
"""Run context.
|
113
114
|
|
114
|
-
|
115
|
+
Enables convenient data lineage tracking by managing a transform & run
|
116
|
+
upon :meth:`~lamindb.core.Context.track` & :meth:`~lamindb.core.Context.finish`.
|
117
|
+
|
118
|
+
Examples:
|
119
|
+
|
120
|
+
Is typically used via :class:`~lamindb.context`:
|
121
|
+
|
122
|
+
>>> import lamindb as ln
|
123
|
+
>>> ln.context.track()
|
124
|
+
>>> # do things while tracking data lineage
|
125
|
+
>>> ln.context.finish()
|
126
|
+
|
115
127
|
"""
|
116
128
|
|
117
129
|
def __init__(self):
|
@@ -165,42 +177,35 @@ class Context:
|
|
165
177
|
self,
|
166
178
|
*,
|
167
179
|
params: dict | None = None,
|
168
|
-
transform: Transform | None = None,
|
169
180
|
new_run: bool | None = None,
|
170
181
|
path: str | None = None,
|
182
|
+
transform: Transform | None = None,
|
171
183
|
) -> None:
|
172
|
-
"""
|
173
|
-
|
174
|
-
Creates or loads a global :class:`~lamindb.Run` that enables data
|
175
|
-
lineage tracking.
|
184
|
+
"""Starts data lineage tracking for a run.
|
176
185
|
|
177
|
-
|
186
|
+
- sets :attr:`~lamindb.core.Context.transform` &
|
187
|
+
:attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
|
188
|
+
`Run` records
|
189
|
+
- saves compute environment as a `requirements.txt` file: `run.environment`
|
178
190
|
|
179
|
-
If :attr:`~lamindb.core.Settings.sync_git_repo` is set,
|
180
|
-
|
191
|
+
If :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a
|
192
|
+
script-like transform exists in a git repository and links it.
|
181
193
|
|
182
194
|
Args:
|
183
195
|
params: A dictionary of parameters to track for the run.
|
184
|
-
transform: Can be of type `"pipeline"` or `"notebook"`
|
185
|
-
(:class:`~lamindb.core.types.TransformType`).
|
186
196
|
new_run: If `False`, loads latest run of transform
|
187
197
|
(default notebook), if `True`, creates new run (default pipeline).
|
188
198
|
path: Filepath of notebook or script. Only needed if it can't be
|
189
199
|
automatically detected.
|
200
|
+
transform: Useful to track an abstract pipeline.
|
190
201
|
|
191
202
|
Examples:
|
192
203
|
|
193
|
-
To track a notebook or script, call:
|
204
|
+
To track the run of a notebook or script, call:
|
194
205
|
|
195
206
|
>>> import lamindb as ln
|
196
207
|
>>> ln.context.track()
|
197
208
|
|
198
|
-
If you'd like to track an abstract pipeline run, pass a
|
199
|
-
:class:`~lamindb.Transform` object of ``type`` ``"pipeline"``:
|
200
|
-
|
201
|
-
>>> ln.Transform(name="Cell Ranger", version="2", type="pipeline").save()
|
202
|
-
>>> transform = ln.Transform.get(name="Cell Ranger", version="2")
|
203
|
-
>>> ln.context.track(transform=transform)
|
204
209
|
"""
|
205
210
|
self._path = None
|
206
211
|
if transform is None:
|
@@ -414,6 +419,10 @@ class Context:
|
|
414
419
|
if transform is None:
|
415
420
|
if uid is None:
|
416
421
|
uid = f"{stem_uid}{get_uid_ext(version)}"
|
422
|
+
# note that here we're not passing revises because we're not querying it
|
423
|
+
# hence, we need to do a revision family lookup based on key
|
424
|
+
# hence, we need key to be not None
|
425
|
+
assert key is not None # noqa: S101
|
417
426
|
transform = Transform(
|
418
427
|
uid=uid,
|
419
428
|
version=version,
|
@@ -422,8 +431,7 @@ class Context:
|
|
422
431
|
reference=transform_ref,
|
423
432
|
reference_type=transform_ref_type,
|
424
433
|
type=transform_type,
|
425
|
-
)
|
426
|
-
transform.save()
|
434
|
+
).save()
|
427
435
|
self._logging_message += f"created Transform('{transform.uid}')"
|
428
436
|
else:
|
429
437
|
uid = transform.uid
|
@@ -449,39 +457,67 @@ class Context:
|
|
449
457
|
"updated transform name, " # white space on purpose
|
450
458
|
)
|
451
459
|
# check whether transform source code was already saved
|
452
|
-
if
|
453
|
-
|
460
|
+
if (
|
461
|
+
transform._source_code_artifact_id is not None
|
462
|
+
or transform.source_code is not None
|
463
|
+
):
|
464
|
+
bump_revision = False
|
454
465
|
if is_run_from_ipython:
|
455
|
-
|
466
|
+
bump_revision = True
|
456
467
|
else:
|
457
468
|
hash, _ = hash_file(self._path) # ignore hash_type for now
|
458
|
-
if hash
|
459
|
-
|
469
|
+
if transform.hash is not None:
|
470
|
+
condition = hash != transform.hash
|
471
|
+
else:
|
472
|
+
condition = hash != transform._source_code_artifact.hash
|
473
|
+
if condition:
|
474
|
+
bump_revision = True
|
460
475
|
else:
|
461
476
|
self._logging_message += f"loaded Transform('{transform.uid}')"
|
462
|
-
if
|
477
|
+
if bump_revision:
|
463
478
|
change_type = (
|
464
479
|
"Re-running saved notebook"
|
465
480
|
if is_run_from_ipython
|
466
481
|
else "Source code changed"
|
467
482
|
)
|
468
483
|
suid, vuid = (
|
469
|
-
uid[
|
470
|
-
uid[
|
484
|
+
uid[:-4],
|
485
|
+
uid[-4:],
|
471
486
|
)
|
472
487
|
new_vuid = increment_base62(vuid)
|
473
488
|
raise UpdateContext(
|
474
|
-
f"{change_type}, bump
|
489
|
+
f"{change_type}, bump revision by setting:\n\n"
|
475
490
|
f'ln.context.uid = "{suid}{new_vuid}"'
|
476
491
|
)
|
477
492
|
else:
|
478
493
|
self._logging_message += f"loaded Transform('{transform.uid}')"
|
479
494
|
self._transform = transform
|
480
495
|
|
481
|
-
def finish(self) -> None:
|
482
|
-
"""Mark
|
496
|
+
def finish(self, ignore_non_consecutive: None | bool = None) -> None:
|
497
|
+
"""Mark the run context as finished.
|
498
|
+
|
499
|
+
- writes a timestamp: `run.finished_at`
|
500
|
+
- saves the source code: `transform.source_code`
|
501
|
+
|
502
|
+
When called in the last cell of a notebook:
|
503
|
+
|
504
|
+
- prompts for user input if not consecutively executed
|
505
|
+
- requires to save the notebook in your editor
|
506
|
+
- saves a run report: `run.report`
|
507
|
+
|
508
|
+
Args:
|
509
|
+
ignore_non_consecutive: Whether to ignore if a notebook was non-consecutively executed.
|
510
|
+
|
511
|
+
Examples:
|
512
|
+
|
513
|
+
>>> import lamindb as ln
|
514
|
+
>>> ln.context.track()
|
515
|
+
>>> # do things while tracking data lineage
|
516
|
+
>>> ln.context.finish()
|
517
|
+
|
518
|
+
See Also:
|
519
|
+
`lamin save script.py` or `lamin save notebook.ipynb` → `docs </cli#lamin-save>`__
|
483
520
|
|
484
|
-
Saves source code and, for notebooks, a run report to your default storage location.
|
485
521
|
"""
|
486
522
|
from lamindb._finish import save_context_core
|
487
523
|
|
@@ -500,18 +536,16 @@ class Context:
|
|
500
536
|
# nothing else to do
|
501
537
|
return None
|
502
538
|
if is_run_from_ipython: # notebooks
|
503
|
-
if (
|
504
|
-
get_seconds_since_modified(context._path) > 3
|
505
|
-
and os.getenv("LAMIN_TESTING") is None
|
506
|
-
):
|
539
|
+
if get_seconds_since_modified(context._path) > 2 and not ln_setup._TESTING:
|
507
540
|
raise NotebookFileNotSavedToDisk(
|
508
|
-
"Please save the notebook manually in your editor right before running `ln.finish()`"
|
541
|
+
"Please save the notebook manually in your editor right before running `ln.context.finish()`"
|
509
542
|
)
|
510
543
|
save_context_core(
|
511
544
|
run=context.run,
|
512
545
|
transform=context.run.transform,
|
513
546
|
filepath=context._path,
|
514
547
|
finished_at=True,
|
548
|
+
ignore_non_consecutive=ignore_non_consecutive,
|
515
549
|
)
|
516
550
|
|
517
551
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -118,13 +118,11 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
118
118
|
|
119
119
|
|
120
120
|
class LabelManager:
|
121
|
-
"""Label manager
|
121
|
+
"""Label manager.
|
122
122
|
|
123
123
|
This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
|
124
124
|
typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
|
125
125
|
with features.
|
126
|
-
|
127
|
-
See :class:`~lamindb.core.HasFeatures` for more information.
|
128
126
|
"""
|
129
127
|
|
130
128
|
def __init__(self, host: Artifact | Collection):
|
@@ -10,7 +10,7 @@ import numpy as np
|
|
10
10
|
import pandas as pd
|
11
11
|
from anndata import AnnData
|
12
12
|
from anndata import __version__ as anndata_version
|
13
|
-
from anndata._core.index import
|
13
|
+
from anndata._core.index import _normalize_indices
|
14
14
|
from anndata._core.views import _resolve_idx
|
15
15
|
from anndata._io.h5ad import read_dataframe_legacy as read_dataframe_legacy_h5
|
16
16
|
from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
|
@@ -29,6 +29,11 @@ if TYPE_CHECKING:
|
|
29
29
|
|
30
30
|
anndata_version_parse = version.parse(anndata_version)
|
31
31
|
|
32
|
+
if anndata_version_parse < version.parse("0.9.0"):
|
33
|
+
from anndata._core.index import Index
|
34
|
+
else:
|
35
|
+
from anndata.compat import Index
|
36
|
+
|
32
37
|
if anndata_version_parse < version.parse("0.10.0"):
|
33
38
|
if anndata_version_parse < version.parse("0.9.1"):
|
34
39
|
logger.warning(
|
@@ -2,11 +2,11 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
|
-
from anndata import AnnData
|
5
|
+
from anndata import AnnData, read_h5ad
|
6
6
|
from lamin_utils import logger
|
7
7
|
from lamindb_setup import settings as setup_settings
|
8
8
|
from lamindb_setup.core._settings_storage import get_storage_region
|
9
|
-
from lamindb_setup.core.upath import create_path
|
9
|
+
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
10
10
|
from lnschema_core import Artifact, Run
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
@@ -23,7 +23,12 @@ def _read_adata_h5ad_zarr(objpath: UPath):
|
|
23
23
|
if objpath.is_dir():
|
24
24
|
adata = read_adata_zarr(objpath)
|
25
25
|
else:
|
26
|
-
|
26
|
+
# read only local in backed for now
|
27
|
+
# in principle possible to read remote in backed also
|
28
|
+
if isinstance(objpath, LocalPathClasses):
|
29
|
+
adata = read_h5ad(objpath.as_posix(), backed="r")
|
30
|
+
else:
|
31
|
+
adata = read_adata_h5ad(objpath)
|
27
32
|
return adata
|
28
33
|
|
29
34
|
|
@@ -85,27 +90,24 @@ def save_tiledbsoma_experiment(
|
|
85
90
|
) -> Artifact:
|
86
91
|
"""Write `AnnData` to `tiledbsoma.Experiment`.
|
87
92
|
|
88
|
-
Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates
|
89
|
-
|
90
|
-
|
91
|
-
when it writes to a new store or appends to a store that has this column in `obs`.
|
93
|
+
Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
|
94
|
+
|
95
|
+
Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
|
92
96
|
|
93
|
-
|
97
|
+
Is based on `tiledbsoma.io.from_anndata
|
94
98
|
<https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
|
95
99
|
|
96
100
|
Args:
|
97
101
|
adatas: `AnnData` objects to write, in-memory or on-disk.
|
98
|
-
key:
|
102
|
+
key: An optional key to reference the artifact.
|
99
103
|
description: A description.
|
100
104
|
run: The run that creates the artifact.
|
101
105
|
revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
|
102
|
-
Triggers a revision (a new untagged version).
|
103
106
|
measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
|
104
107
|
obs_id_name: Which `AnnData` `obs` column to use for append mode.
|
105
108
|
var_id_name: Which `AnnData` `var` column to use for append mode.
|
106
109
|
append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
|
107
|
-
**kwargs:
|
108
|
-
writes `adatas`.
|
110
|
+
**kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
|
109
111
|
"""
|
110
112
|
try:
|
111
113
|
import tiledbsoma as soma
|
@@ -120,14 +122,12 @@ def save_tiledbsoma_experiment(
|
|
120
122
|
run = get_run(run)
|
121
123
|
|
122
124
|
appending = revises is not None
|
123
|
-
|
124
125
|
if appending:
|
125
|
-
_uid = None
|
126
126
|
storepath = revises.path
|
127
127
|
else:
|
128
|
-
|
128
|
+
uid, _ = create_uid(n_full_id=20)
|
129
129
|
storage_key = auto_storage_key_from_artifact_uid(
|
130
|
-
|
130
|
+
uid, ".tiledbsoma", is_dir=True
|
131
131
|
)
|
132
132
|
storepath = setup_settings.storage.root / storage_key
|
133
133
|
|
@@ -162,7 +162,8 @@ def save_tiledbsoma_experiment(
|
|
162
162
|
adata.obs["lamin_run_uid"] = run.uid
|
163
163
|
adata_objects.append(adata)
|
164
164
|
|
165
|
-
|
165
|
+
registration_mapping = kwargs.get("registration_mapping", None)
|
166
|
+
if registration_mapping is None and (appending or len(adata_objects) > 1):
|
166
167
|
registration_mapping = soma_io.register_anndatas(
|
167
168
|
experiment_uri=storepath if appending else None,
|
168
169
|
adatas=adata_objects,
|
@@ -172,8 +173,6 @@ def save_tiledbsoma_experiment(
|
|
172
173
|
append_obsm_varm=append_obsm_varm,
|
173
174
|
context=ctx,
|
174
175
|
)
|
175
|
-
else:
|
176
|
-
registration_mapping = None
|
177
176
|
|
178
177
|
for adata_obj in adata_objects:
|
179
178
|
soma_io.from_anndata(
|
@@ -188,5 +187,10 @@ def save_tiledbsoma_experiment(
|
|
188
187
|
)
|
189
188
|
|
190
189
|
return Artifact(
|
191
|
-
storepath,
|
190
|
+
storepath,
|
191
|
+
key=key,
|
192
|
+
description=description,
|
193
|
+
run=run,
|
194
|
+
revises=revises,
|
195
|
+
_is_internal_call=True,
|
192
196
|
).save()
|
lamindb/core/versioning.py
CHANGED
@@ -99,6 +99,10 @@ def create_uid(
|
|
99
99
|
n_full_id: int = 20,
|
100
100
|
revises: IsVersioned | None = None,
|
101
101
|
) -> tuple[str, IsVersioned | None]:
|
102
|
+
"""This also updates revises in case it's not the latest version.
|
103
|
+
|
104
|
+
This is why it returns revises.
|
105
|
+
"""
|
102
106
|
if revises is not None:
|
103
107
|
if not revises.is_latest:
|
104
108
|
# need one more request
|
@@ -56,22 +56,12 @@ def save_vitessce_config(
|
|
56
56
|
raise ValueError("Each file must have a 'url' key.")
|
57
57
|
s3_path = file["url"]
|
58
58
|
s3_path_last_element = s3_path.split("/")[-1]
|
59
|
-
# note 1: the following parses the stem uid of the artifact from the S3 path
|
60
|
-
# there might be a better way of doing this in case the vitessce config
|
61
|
-
# gets updated in the future; but given these paths are set in stone
|
62
|
-
# this should be more robust than it looks
|
63
|
-
#
|
64
|
-
# note 2: what's not great is the fact that people might use composite suffixes we don't recognize
|
65
|
-
# I don't know what to do about it other than documenting it clearly
|
66
|
-
# https://github.com/laminlabs/lamindb/blob/main/lamindb/core/storage/_valid_suffixes.py
|
67
|
-
# https://docs.lamin.ai/lamindb.core.storage.valid_suffixes
|
68
|
-
#
|
69
59
|
# now start with attempting to strip the composite suffix candidates
|
70
60
|
for suffix in valid_composite_zarr_suffixes:
|
71
61
|
s3_path_last_element = s3_path_last_element.replace(suffix, "")
|
72
62
|
# in case there was no hit, strip plain ".zarr"
|
73
63
|
artifact_stem_uid = s3_path_last_element.replace(".zarr", "")
|
74
|
-
# if there is still a "." in string,
|
64
|
+
# if there is still a "." in string, raise an error
|
75
65
|
if "." in artifact_stem_uid:
|
76
66
|
raise ValueError(
|
77
67
|
f"Suffix should be '.zarr' or one of {valid_composite_zarr_suffixes}. Inspect your path {s3_path}"
|
@@ -83,12 +73,16 @@ def save_vitessce_config(
|
|
83
73
|
)
|
84
74
|
else:
|
85
75
|
dataset_artifacts.append(artifact)
|
86
|
-
#
|
76
|
+
# the below will be replaced with a `ln.tracked()` decorator soon
|
87
77
|
with logger.mute():
|
88
|
-
transform = Transform(
|
78
|
+
transform = Transform(
|
79
|
+
uid="kup03MJBsIVa0001",
|
80
|
+
name="save_vitessce_config",
|
81
|
+
type="function",
|
82
|
+
version="2",
|
83
|
+
)
|
89
84
|
transform.save()
|
90
|
-
run = Run(transform=transform)
|
91
|
-
run.save()
|
85
|
+
run = Run(transform=transform).save()
|
92
86
|
if len(dataset_artifacts) > 1:
|
93
87
|
# if we have more datasets, we should create a collection
|
94
88
|
# and attach an action to the collection
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.76.
|
3
|
+
Version: 0.76.5
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,22 +9,22 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.73.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.73.5
|
13
|
+
Requires-Dist: lamindb_setup==0.77.1
|
14
14
|
Requires-Dist: lamin_utils==0.13.4
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.17.1
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
19
19
|
Requires-Dist: python-dateutil
|
20
|
-
Requires-Dist: anndata>=0.8.0,<=0.10.
|
20
|
+
Requires-Dist: anndata>=0.8.0,<=0.10.9
|
21
21
|
Requires-Dist: scipy<1.13.0rc1
|
22
22
|
Requires-Dist: fsspec
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
26
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
27
|
-
Requires-Dist: bionty==0.49.
|
27
|
+
Requires-Dist: bionty==0.49.1 ; extra == "bionty"
|
28
28
|
Requires-Dist: pre-commit ; extra == "dev"
|
29
29
|
Requires-Dist: nox ; extra == "dev"
|
30
30
|
Requires-Dist: laminci>=0.3 ; extra == "dev"
|
@@ -38,7 +38,7 @@ Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
|
38
38
|
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
39
39
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
40
40
|
Requires-Dist: nbproject==0.10.4 ; extra == "jupyter"
|
41
|
-
Requires-Dist:
|
41
|
+
Requires-Dist: jupytext ; extra == "jupyter"
|
42
42
|
Requires-Dist: nbconvert ; extra == "jupyter"
|
43
43
|
Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
|
44
44
|
Project-URL: Home, https://github.com/laminlabs/lamindb
|
@@ -1,30 +1,30 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_artifact.py,sha256=
|
3
|
-
lamindb/_can_validate.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=jllkTTXtX6UHCCnv7BvPAywnywCNWlWm0l5UbCnK9nM,2344
|
2
|
+
lamindb/_artifact.py,sha256=QjCge5kaAcfhGv84s299OT99LmHTSYDxgzw5kN-x3-8,44416
|
3
|
+
lamindb/_can_validate.py,sha256=9di9FLmC2m3dpT42sceF34UEFzQITi2e_hjVMa8DIc4,18261
|
4
4
|
lamindb/_collection.py,sha256=F_VgpLBprrzUQ-tPngWvO9vFd7jX66MVwIi031JOris,14871
|
5
|
-
lamindb/_curate.py,sha256=
|
5
|
+
lamindb/_curate.py,sha256=gCbDiqhsJzVZZ6BuEoFXUpsNOffpUNDlrX1dJiOqJJo,58753
|
6
6
|
lamindb/_feature.py,sha256=nZhtrH0ssoNls-hV-dkwfK9sKypg2El59R9qfarxfUE,5340
|
7
7
|
lamindb/_feature_set.py,sha256=DmAy96V_RyV0yiyvWOCHgustXPsCaMwn4TrWwh2qDd8,8104
|
8
8
|
lamindb/_filter.py,sha256=9QHa9J-_6QeYPQATZpTun2VGiFofwzB0Km-KnKajHcM,663
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=riwm7mA-RXej_L0iz_svt6j5Z6faQb3NmQGKjAwhx8g,9282
|
10
10
|
lamindb/_from_values.py,sha256=8kYpR8Q85EOaTcsPGjVHeZh29fGVgum5OEQf4Hsz_80,13533
|
11
11
|
lamindb/_is_versioned.py,sha256=5lAnhTboltFkZCKVRV1uxkm0OCjJz_HKi3yQq_vEuMs,1306
|
12
12
|
lamindb/_parents.py,sha256=eMavdd6IO6STOVJSlR2TzdRtx6sKYDKsMOtlR3DZlgQ,15599
|
13
13
|
lamindb/_query_manager.py,sha256=Ipe85HL31DDwMbC8CN_1Svbwk48a_DUh_INGQdZL08I,4222
|
14
|
-
lamindb/_query_set.py,sha256=
|
15
|
-
lamindb/_record.py,sha256=
|
14
|
+
lamindb/_query_set.py,sha256=BiGvEiaBSd9aV28EAy83Q8h6RLsYMDjfxLOljAcyMaM,12692
|
15
|
+
lamindb/_record.py,sha256=53_0oU6v45V5gIDJgkAUSX7iIV5Si_4cuOWUHJa8JVo,21241
|
16
16
|
lamindb/_run.py,sha256=5M_r1zGDv9HlqbqRKTWCYCOtENovJ-8mQ4kY7XqcLaU,1888
|
17
17
|
lamindb/_save.py,sha256=Fu7Z84btKOXfTfpunKLni21s5ER2zIllqg5e3nPq-0A,10910
|
18
18
|
lamindb/_storage.py,sha256=GBVChv-DHVMNEBJL5l_JT6B4RDhZ6NnwgzmUICphYKk,413
|
19
|
-
lamindb/_transform.py,sha256=
|
19
|
+
lamindb/_transform.py,sha256=ekwHQc4fv8PV1cffCYtTPfxL1RJtENd9_Y3v9CwxqYc,4213
|
20
20
|
lamindb/_ulabel.py,sha256=XDSdZBXX_ki5s1vOths3MjF2x5DPggBR_PV_KF4SGyg,1611
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=4Ln2ItTb3857PAI-70O8eJYqoTJ_NNFc7E_wds6OGns,2412
|
23
|
-
lamindb/core/__init__.py,sha256=
|
24
|
-
lamindb/core/_context.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=hxPWM_Jnrllx0G_6itEGU2meXwptkkgiL9zsBvlhHM4,1495
|
24
|
+
lamindb/core/_context.py,sha256=zt4aJz_IxPu3ujENyPjwJCebh_3w5Vu6QkDh0dspZFA,20719
|
25
25
|
lamindb/core/_data.py,sha256=eocOXsZGu62LPtz6yIlvHhPSJTf3yF2ITZTffyflWYI,16269
|
26
26
|
lamindb/core/_feature_manager.py,sha256=94tX6gq_Rx7fkDARQBxB2z92qUDpHocFSAdKv5izMT4,32490
|
27
|
-
lamindb/core/_label_manager.py,sha256=
|
27
|
+
lamindb/core/_label_manager.py,sha256=zCE-PS1Y5ALpzoSOx1P6ZTFVPgFNRAmmyTQF0e8QBXA,9131
|
28
28
|
lamindb/core/_mapped_collection.py,sha256=1XzratL2IvRleqioNhWo26Lsuqkev8-HEImmHQxw9Kw,23266
|
29
29
|
lamindb/core/_settings.py,sha256=GGEB8BU5GinIfD4ktr1Smp6GPHGaInu46MhP4EecZDY,5950
|
30
30
|
lamindb/core/_sync_git.py,sha256=qc0yfPyKeG4uuNT_3qsv-mkIMqhLFqfXNeNVO49vV00,4547
|
@@ -33,15 +33,15 @@ lamindb/core/exceptions.py,sha256=TKyt1JOUwWIHbkCQjir_LQadf8960eQ95RWhSpz5_Bk,12
|
|
33
33
|
lamindb/core/fields.py,sha256=47Jmh3efUr5ZscgimR_yckY-I3cNf8ScLutbwKCK3j4,162
|
34
34
|
lamindb/core/schema.py,sha256=KiYQn_8fokSMztTNDe6qUocZzKXWxU32H-YChNJv51A,1877
|
35
35
|
lamindb/core/types.py,sha256=uVBqSVLoQaTkqP9nqsJhwU6yYnx8H5e6-ZxrB6vpOOw,265
|
36
|
-
lamindb/core/versioning.py,sha256=
|
36
|
+
lamindb/core/versioning.py,sha256=GYhgSA6IOlWMMNfctZu7U_jIvmQP2gdvsZxn4bTanOc,5277
|
37
37
|
lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
|
38
38
|
lamindb/core/datasets/_core.py,sha256=CgVF_pXuBXLElzubDMsl1DbpYOnXCY0HleITVvBKih4,19873
|
39
39
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
40
40
|
lamindb/core/storage/__init__.py,sha256=x-Bpxv1rx6uGog-chI0fdpt0UhkXQkwoQqye0TNk0WQ,514
|
41
|
-
lamindb/core/storage/_anndata_accessor.py,sha256=
|
41
|
+
lamindb/core/storage/_anndata_accessor.py,sha256=F3ze8ICG7K4BKueg-766olnoEA8Eh8gVrvDSSE2FX-M,24160
|
42
42
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
43
|
lamindb/core/storage/_backed_access.py,sha256=YcWCeT2eligJGsBdjJS_-4el_eC9J088jxUWG9lsleM,3231
|
44
|
-
lamindb/core/storage/_tiledbsoma.py,sha256=
|
44
|
+
lamindb/core/storage/_tiledbsoma.py,sha256=OVDjvu4lRovRtWcn9Tu3N5tA-zbF8snsjkR4DOXOTTc,7001
|
45
45
|
lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
|
46
46
|
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
47
47
|
lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
|
@@ -50,10 +50,10 @@ lamindb/core/subsettings/__init__.py,sha256=KFHPzIE7f7Bj4RgMjGQF4CjTdHVG_VNFBrCn
|
|
50
50
|
lamindb/core/subsettings/_creation_settings.py,sha256=54mfMH_osC753hpxcl7Dq1rwBD2LHnWveXtQpkLBITE,1194
|
51
51
|
lamindb/core/subsettings/_transform_settings.py,sha256=4YbCuZtJo6zdytl6UQR4GvdDkTtT6SRBqVzofGzNOt8,583
|
52
52
|
lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23OItw,215
|
53
|
-
lamindb/integrations/_vitessce.py,sha256=
|
53
|
+
lamindb/integrations/_vitessce.py,sha256=671jHIF8LgUjcOgRvJNhP0aK1Xty9pHkQ8ukx1U2gLY,4578
|
54
54
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
55
55
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
56
|
-
lamindb-0.76.
|
57
|
-
lamindb-0.76.
|
58
|
-
lamindb-0.76.
|
59
|
-
lamindb-0.76.
|
56
|
+
lamindb-0.76.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
57
|
+
lamindb-0.76.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
58
|
+
lamindb-0.76.5.dist-info/METADATA,sha256=lmB6jhR6ZnklsigeYZFeCiNWMyGHqStcg_teeo1JmNk,2372
|
59
|
+
lamindb-0.76.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|