lamindb 0.69.9__py3-none-any.whl → 0.70.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +461 -126
- lamindb/_artifact.py +69 -20
- lamindb/_can_validate.py +13 -18
- lamindb/_collection.py +48 -44
- lamindb/_feature_set.py +20 -8
- lamindb/_finish.py +28 -42
- lamindb/_from_values.py +23 -17
- lamindb/_registry.py +7 -2
- lamindb/core/__init__.py +16 -4
- lamindb/core/_data.py +22 -16
- lamindb/core/_feature_manager.py +80 -25
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +106 -52
- lamindb/core/_run_context.py +0 -1
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/_core.py +42 -2
- lamindb/core/storage/_backed_access.py +8 -4
- lamindb/core/storage/file.py +9 -0
- lamindb/core/storage/object.py +19 -0
- lamindb/integrations/_vitessce.py +18 -9
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/METADATA +7 -8
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/RECORD +25 -25
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/LICENSE +0 -0
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/WHEEL +0 -0
lamindb/_finish.py
CHANGED
@@ -43,39 +43,35 @@ def finish(i_saved_the_notebook: bool = False):
|
|
43
43
|
"Please pass `i_saved_the_notebook=True` to `ln.finish()`, save the notebook, and re-run this cell."
|
44
44
|
)
|
45
45
|
return None
|
46
|
-
notebook_content = read_notebook(run_context.path) # type: ignore
|
47
|
-
if not check_last_cell(notebook_content, "i_saved_the_notebook"):
|
48
|
-
raise CallFinishInLastCell(
|
49
|
-
"Can only run `ln.finish(i_saved_the_notebook=True)` from the last code cell of the notebook."
|
50
|
-
)
|
51
46
|
save_run_context_core(
|
52
47
|
run=run_context.run,
|
53
48
|
transform=run_context.transform,
|
54
49
|
filepath=run_context.path,
|
55
50
|
finished_at=True,
|
56
|
-
notebook_content=notebook_content,
|
57
51
|
)
|
58
52
|
else:
|
59
53
|
# scripts
|
54
|
+
# save_run_context_core was already called during ln.track()
|
60
55
|
run_context.run.finished_at = datetime.now(timezone.utc) # update run time
|
61
56
|
run_context.run.save()
|
62
57
|
|
63
58
|
|
64
|
-
# do not type because we need to be aware of lnschema_core import order
|
65
59
|
def save_run_context_core(
|
66
60
|
*,
|
67
61
|
run: Run,
|
68
62
|
transform: Transform,
|
69
63
|
filepath: Path,
|
70
64
|
transform_family: QuerySet | None = None,
|
71
|
-
is_consecutive: bool = True,
|
72
65
|
finished_at: bool = False,
|
73
|
-
notebook_content=None, # nbproject.Notebook
|
74
66
|
) -> str | None:
|
75
67
|
import lamindb as ln
|
76
68
|
|
77
69
|
ln.settings.verbosity = "success"
|
78
70
|
|
71
|
+
# for scripts, things are easy
|
72
|
+
is_consecutive = True
|
73
|
+
source_code_path = filepath
|
74
|
+
# for notebooks, we need more work
|
79
75
|
if transform.type == TransformType.notebook:
|
80
76
|
try:
|
81
77
|
import nbstripout
|
@@ -88,62 +84,52 @@ def save_run_context_core(
|
|
88
84
|
"install nbproject & nbstripout: pip install nbproject nbstripout"
|
89
85
|
)
|
90
86
|
return None
|
91
|
-
|
92
|
-
notebook_content = read_notebook(filepath) # type: ignore
|
87
|
+
notebook_content = read_notebook(filepath) # type: ignore
|
93
88
|
is_consecutive = check_consecutiveness(notebook_content)
|
94
89
|
if not is_consecutive:
|
90
|
+
msg = " Do you still want to proceed with finishing? (y/n) "
|
95
91
|
if os.getenv("LAMIN_TESTING") is None:
|
96
|
-
|
97
|
-
" Do you still want to proceed with publishing? (y/n) "
|
98
|
-
)
|
92
|
+
response = input(msg)
|
99
93
|
else:
|
100
|
-
|
101
|
-
if
|
102
|
-
logger.error("Aborted (non-consecutive)!")
|
94
|
+
response = "n"
|
95
|
+
if response != "y":
|
103
96
|
return "aborted-non-consecutive"
|
104
|
-
|
105
97
|
# convert the notebook file to html
|
106
98
|
# log_level is set to 40 to silence the nbconvert logging
|
107
|
-
|
99
|
+
subprocess.run(
|
108
100
|
"jupyter nbconvert --to html"
|
109
101
|
f" {filepath.as_posix()} --Application.log_level=40",
|
110
102
|
shell=True,
|
103
|
+
check=True,
|
111
104
|
)
|
112
105
|
# move the temporary file into the cache dir in case it's accidentally
|
113
106
|
# in an existing storage location -> we want to move associated
|
114
107
|
# artifacts into default storage and not register them in an existing
|
115
108
|
# location
|
116
|
-
|
109
|
+
filepath_html_orig = filepath.with_suffix(".html") # current location
|
110
|
+
filepath_html = ln_setup.settings.storage.cache_dir / filepath_html_orig.name
|
111
|
+
# don't use Path.rename here because of cross-device link error
|
112
|
+
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
117
113
|
shutil.move(
|
118
|
-
|
119
|
-
|
120
|
-
)
|
121
|
-
#
|
122
|
-
|
123
|
-
ln_setup.settings.storage.cache_dir / filepath_html.name
|
124
|
-
) # adjust location
|
125
|
-
assert result.returncode == 0
|
126
|
-
# copy the notebook file to a temporary file
|
114
|
+
filepath_html_orig, # type: ignore
|
115
|
+
filepath_html,
|
116
|
+
)
|
117
|
+
# strip the output from the notebook to create the source code file
|
118
|
+
# first, copy the notebook file to a temporary file in the cache
|
127
119
|
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
|
128
120
|
shutil.copy2(filepath, source_code_path) # copy
|
129
|
-
|
130
|
-
assert result.returncode == 0
|
131
|
-
else:
|
132
|
-
source_code_path = filepath
|
121
|
+
subprocess.run(f"nbstripout {source_code_path}", shell=True, check=True)
|
133
122
|
# find initial versions of source codes and html reports
|
134
|
-
|
135
|
-
|
123
|
+
prev_report = None
|
124
|
+
prev_source = None
|
136
125
|
if transform_family is None:
|
137
126
|
transform_family = transform.versions
|
138
127
|
if len(transform_family) > 0:
|
139
128
|
for prev_transform in transform_family.order_by("-created_at"):
|
140
|
-
# check for id to avoid query
|
141
129
|
if prev_transform.latest_report_id is not None:
|
142
|
-
|
143
|
-
initial_report = prev_transform.latest_report
|
130
|
+
prev_report = prev_transform.latest_report
|
144
131
|
if prev_transform.source_code_id is not None:
|
145
|
-
|
146
|
-
initial_source = prev_transform.source_code
|
132
|
+
prev_source = prev_transform.source_code
|
147
133
|
ln.settings.silence_file_run_transform_warning = True
|
148
134
|
# register the source code
|
149
135
|
if transform.source_code is not None:
|
@@ -173,7 +159,7 @@ def save_run_context_core(
|
|
173
159
|
source_code_path,
|
174
160
|
description=f"Source of transform {transform.uid}",
|
175
161
|
version=transform.version,
|
176
|
-
is_new_version_of=
|
162
|
+
is_new_version_of=prev_source,
|
177
163
|
visibility=0, # hidden file
|
178
164
|
run=False,
|
179
165
|
)
|
@@ -207,7 +193,7 @@ def save_run_context_core(
|
|
207
193
|
report_file = ln.Artifact(
|
208
194
|
filepath_html,
|
209
195
|
description=f"Report of run {run.uid}",
|
210
|
-
is_new_version_of=
|
196
|
+
is_new_version_of=prev_report,
|
211
197
|
visibility=0, # hidden file
|
212
198
|
run=False,
|
213
199
|
)
|
lamindb/_from_values.py
CHANGED
@@ -19,19 +19,26 @@ def get_or_create_records(
|
|
19
19
|
field: StrField,
|
20
20
|
*,
|
21
21
|
from_public: bool = False,
|
22
|
-
|
22
|
+
organism: Registry | str | None = None,
|
23
|
+
public_source: Registry | None = None,
|
23
24
|
) -> list[Registry]:
|
24
25
|
"""Get or create records from iterables."""
|
25
26
|
upon_create_search_names = settings.upon_create_search_names
|
26
|
-
settings.upon_create_search_names = False
|
27
27
|
feature: Feature = None
|
28
|
+
organism = _get_organism_record(field, organism)
|
29
|
+
kwargs: dict = {}
|
30
|
+
if organism is not None:
|
31
|
+
kwargs["organism"] = organism
|
32
|
+
if public_source is not None:
|
33
|
+
kwargs["public_source"] = public_source
|
34
|
+
settings.upon_create_search_names = False
|
28
35
|
try:
|
29
36
|
Registry = field.field.model
|
30
37
|
iterable_idx = index_iterable(iterable)
|
31
38
|
|
32
39
|
# returns existing records & non-existing values
|
33
40
|
records, nonexist_values, msg = get_existing_records(
|
34
|
-
iterable_idx=iterable_idx, field=field, kwargs
|
41
|
+
iterable_idx=iterable_idx, field=field, **kwargs
|
35
42
|
)
|
36
43
|
|
37
44
|
# new records to be created based on new values
|
@@ -78,26 +85,14 @@ def get_or_create_records(
|
|
78
85
|
def get_existing_records(
|
79
86
|
iterable_idx: pd.Index,
|
80
87
|
field: StrField,
|
81
|
-
kwargs
|
88
|
+
**kwargs,
|
82
89
|
):
|
83
|
-
if kwargs is None:
|
84
|
-
kwargs = {}
|
85
90
|
model = field.field.model
|
86
91
|
condition: dict = {} if len(kwargs) == 0 else kwargs.copy()
|
87
92
|
# existing records matching is agnostic to the bionty source
|
88
93
|
if "public_source" in condition:
|
89
94
|
condition.pop("public_source")
|
90
95
|
|
91
|
-
if _has_organism_field(model):
|
92
|
-
from lnschema_bionty._bionty import create_or_get_organism_record
|
93
|
-
|
94
|
-
organism_record = create_or_get_organism_record(
|
95
|
-
organism=kwargs.get("organism"), orm=model
|
96
|
-
)
|
97
|
-
if organism_record is not None:
|
98
|
-
kwargs.update({"organism": organism_record})
|
99
|
-
condition.update({"organism": organism_record})
|
100
|
-
|
101
96
|
# standardize based on the DB reference
|
102
97
|
# log synonyms mapped terms
|
103
98
|
result = model.inspect(
|
@@ -252,7 +247,8 @@ def index_iterable(iterable: Iterable) -> pd.Index:
|
|
252
247
|
|
253
248
|
|
254
249
|
def _print_values(names: list, n: int = 20) -> str:
|
255
|
-
|
250
|
+
names = list(set(names))
|
251
|
+
print_values = ", ".join([f"'{name}'" for name in names[:n] if name != "None"])
|
256
252
|
if len(names) > n:
|
257
253
|
print_values += ", ..."
|
258
254
|
return print_values
|
@@ -322,3 +318,13 @@ def _has_organism_field(orm: Registry) -> bool:
|
|
322
318
|
return True
|
323
319
|
except FieldDoesNotExist:
|
324
320
|
return False
|
321
|
+
|
322
|
+
|
323
|
+
def _get_organism_record(field: StrField, organism: str | Registry) -> Registry:
|
324
|
+
model = field.field.model
|
325
|
+
if _has_organism_field(model):
|
326
|
+
from lnschema_bionty._bionty import create_or_get_organism_record
|
327
|
+
|
328
|
+
organism_record = create_or_get_organism_record(organism=organism, orm=model)
|
329
|
+
if organism_record is not None:
|
330
|
+
return organism_record
|
lamindb/_registry.py
CHANGED
@@ -129,7 +129,11 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
129
129
|
@classmethod # type:ignore
|
130
130
|
@doc_args(Registry.from_values.__doc__)
|
131
131
|
def from_values(
|
132
|
-
cls,
|
132
|
+
cls,
|
133
|
+
values: ListLike,
|
134
|
+
field: StrField | None = None,
|
135
|
+
organism: Registry | str | None = None,
|
136
|
+
public_source: Registry | None = None,
|
133
137
|
) -> list[Registry]:
|
134
138
|
"""{}."""
|
135
139
|
from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
|
@@ -138,7 +142,8 @@ def from_values(
|
|
138
142
|
iterable=values,
|
139
143
|
field=getattr(cls, field_str),
|
140
144
|
from_public=from_public,
|
141
|
-
|
145
|
+
organism=organism,
|
146
|
+
public_source=public_source,
|
142
147
|
)
|
143
148
|
|
144
149
|
|
lamindb/core/__init__.py
CHANGED
@@ -14,14 +14,21 @@ Registries:
|
|
14
14
|
LabelManager
|
15
15
|
IsTree
|
16
16
|
IsVersioned
|
17
|
-
DataFrameAnnotator
|
18
|
-
AnnDataAnnotator
|
19
|
-
AnnotateLookup
|
20
17
|
CanValidate
|
21
18
|
HasParents
|
22
19
|
InspectResult
|
23
20
|
fields
|
24
21
|
|
22
|
+
Annotators:
|
23
|
+
|
24
|
+
.. autosummary::
|
25
|
+
:toctree: .
|
26
|
+
|
27
|
+
DataFrameAnnotator
|
28
|
+
AnnDataAnnotator
|
29
|
+
MuDataAnnotator
|
30
|
+
AnnotateLookup
|
31
|
+
|
25
32
|
Classes:
|
26
33
|
|
27
34
|
.. autosummary::
|
@@ -53,7 +60,12 @@ from lnschema_core.models import (
|
|
53
60
|
Registry,
|
54
61
|
)
|
55
62
|
|
56
|
-
from lamindb._annotate import
|
63
|
+
from lamindb._annotate import (
|
64
|
+
AnnDataAnnotator,
|
65
|
+
AnnotateLookup,
|
66
|
+
DataFrameAnnotator,
|
67
|
+
MuDataAnnotator,
|
68
|
+
)
|
57
69
|
from lamindb._query_manager import QueryManager
|
58
70
|
from lamindb._query_set import QuerySet, RecordsList
|
59
71
|
from lamindb.core._feature_manager import FeatureManager
|
lamindb/core/_data.py
CHANGED
@@ -94,6 +94,23 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
94
94
|
bulk_create(links, ignore_conflicts=True)
|
95
95
|
|
96
96
|
|
97
|
+
def format_repr(value: Registry, exclude: list[str] | str | None = None) -> str:
|
98
|
+
if isinstance(exclude, str):
|
99
|
+
exclude = [exclude]
|
100
|
+
exclude_fields = set() if exclude is None else set(exclude)
|
101
|
+
exclude_fields.update(["created_at", "updated_at"])
|
102
|
+
|
103
|
+
fields = [
|
104
|
+
f
|
105
|
+
for f in value.__repr__(include_foreign_keys=False).split(", ")
|
106
|
+
if not any(f"{excluded_field}=" in f for excluded_field in exclude_fields)
|
107
|
+
]
|
108
|
+
repr = ", ".join(fields)
|
109
|
+
if not repr.endswith(")"):
|
110
|
+
repr += ")"
|
111
|
+
return repr
|
112
|
+
|
113
|
+
|
97
114
|
@doc_args(Data.describe.__doc__)
|
98
115
|
def describe(self: Data):
|
99
116
|
"""{}."""
|
@@ -109,17 +126,7 @@ def describe(self: Data):
|
|
109
126
|
else:
|
110
127
|
direct_fields.append(f.name)
|
111
128
|
|
112
|
-
#
|
113
|
-
# display line by line the foreign key fields
|
114
|
-
from lamindb._parents import _transform_emoji
|
115
|
-
|
116
|
-
emojis = {
|
117
|
-
"storage": "🗃️",
|
118
|
-
"created_by": "👤",
|
119
|
-
"transform": _transform_emoji(self.transform),
|
120
|
-
"run": "👣",
|
121
|
-
"artifact": "📄",
|
122
|
-
}
|
129
|
+
# provenance
|
123
130
|
if len(foreign_key_fields) > 0: # always True for Artifact and Collection
|
124
131
|
record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
|
125
132
|
msg += f"{record_msg}\n\n"
|
@@ -127,17 +134,16 @@ def describe(self: Data):
|
|
127
134
|
msg += f"{colors.green('Provenance')}:\n "
|
128
135
|
related_msg = "".join(
|
129
136
|
[
|
130
|
-
f"
|
131
|
-
for
|
132
|
-
if self.__getattribute__(
|
137
|
+
f"📎 {field}: {format_repr(self.__getattribute__(field))}\n "
|
138
|
+
for field in foreign_key_fields
|
139
|
+
if self.__getattribute__(field) is not None
|
133
140
|
]
|
134
141
|
)
|
135
142
|
msg += related_msg
|
136
143
|
# input of
|
137
|
-
# can only access many-to-many once record is saved
|
138
144
|
if self.id is not None and self.input_of.exists():
|
139
145
|
values = [format_field_value(i.started_at) for i in self.input_of.all()]
|
140
|
-
msg += f"
|
146
|
+
msg += f"📎 input_of ({colors.italic('core.Run')}): {values}\n "
|
141
147
|
msg = msg.rstrip(" ") # do not use removesuffix as we need to remove 2 or 4 spaces
|
142
148
|
msg += print_features(self)
|
143
149
|
msg += print_labels(self)
|
lamindb/core/_feature_manager.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from itertools import compress
|
4
|
-
from typing import TYPE_CHECKING, Iterable
|
4
|
+
from typing import TYPE_CHECKING, Iterable, Optional
|
5
5
|
|
6
6
|
import anndata as ad
|
7
7
|
from anndata import AnnData
|
@@ -91,6 +91,8 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
91
91
|
def print_features(self: Data) -> str:
|
92
92
|
from lamindb._from_values import _print_values
|
93
93
|
|
94
|
+
from ._data import format_repr
|
95
|
+
|
94
96
|
msg = ""
|
95
97
|
features_lookup = Feature.objects.using(self._state.db).lookup().dict()
|
96
98
|
for slot, feature_set in self.features._feature_set_by_slot.items():
|
@@ -98,12 +100,16 @@ def print_features(self: Data) -> str:
|
|
98
100
|
features = feature_set.members
|
99
101
|
name_field = get_default_str_field(features[0])
|
100
102
|
feature_names = [getattr(feature, name_field) for feature in features]
|
101
|
-
msg +=
|
103
|
+
msg += (
|
104
|
+
f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
|
105
|
+
)
|
102
106
|
print_values = _print_values(feature_names, n=20)
|
103
107
|
msg += f" {print_values}\n"
|
104
108
|
else:
|
105
109
|
df_slot = feature_set.features.df()
|
106
|
-
msg +=
|
110
|
+
msg += (
|
111
|
+
f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
|
112
|
+
)
|
107
113
|
for _, row in df_slot.iterrows():
|
108
114
|
if row["type"] == "category" and row["registries"] is not None:
|
109
115
|
labels = self.labels.get(
|
@@ -133,9 +139,10 @@ def print_features(self: Data) -> str:
|
|
133
139
|
|
134
140
|
def parse_feature_sets_from_anndata(
|
135
141
|
adata: AnnData,
|
136
|
-
var_field: FieldAttr,
|
142
|
+
var_field: FieldAttr | None = None,
|
137
143
|
obs_field: FieldAttr = Feature.name,
|
138
|
-
|
144
|
+
mute: bool = False,
|
145
|
+
organism: str | Registry | None = None,
|
139
146
|
) -> dict:
|
140
147
|
data_parse = adata
|
141
148
|
if not isinstance(adata, AnnData): # is a path
|
@@ -149,29 +156,36 @@ def parse_feature_sets_from_anndata(
|
|
149
156
|
data_parse = ad.read(filepath, backed="r")
|
150
157
|
type = "float"
|
151
158
|
else:
|
152
|
-
type =
|
159
|
+
type = (
|
160
|
+
"float"
|
161
|
+
if adata.X is None
|
162
|
+
else convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
|
163
|
+
)
|
153
164
|
feature_sets = {}
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
logger.
|
165
|
+
if var_field is not None:
|
166
|
+
logger.info("parsing feature names of X stored in slot 'var'")
|
167
|
+
logger.indent = " "
|
168
|
+
feature_set_var = FeatureSet.from_values(
|
169
|
+
data_parse.var.index,
|
170
|
+
var_field,
|
171
|
+
type=type,
|
172
|
+
mute=mute,
|
173
|
+
organism=organism,
|
174
|
+
)
|
175
|
+
if feature_set_var is not None:
|
176
|
+
feature_sets["var"] = feature_set_var
|
177
|
+
logger.save(f"linked: {feature_set_var}")
|
178
|
+
logger.indent = ""
|
179
|
+
if feature_set_var is None:
|
180
|
+
logger.warning("skip linking features to artifact in slot 'var'")
|
168
181
|
if len(data_parse.obs.columns) > 0:
|
169
182
|
logger.info("parsing feature names of slot 'obs'")
|
170
183
|
logger.indent = " "
|
171
184
|
feature_set_obs = FeatureSet.from_df(
|
172
185
|
df=data_parse.obs,
|
173
186
|
field=obs_field,
|
174
|
-
|
187
|
+
mute=mute,
|
188
|
+
organism=organism,
|
175
189
|
)
|
176
190
|
if feature_set_obs is not None:
|
177
191
|
feature_sets["obs"] = feature_set_obs
|
@@ -224,7 +238,7 @@ class FeatureManager:
|
|
224
238
|
slot = "columns" if slot is None else slot
|
225
239
|
self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
|
226
240
|
|
227
|
-
def add_from_df(self, field: FieldAttr = Feature.name,
|
241
|
+
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
228
242
|
"""Add features from DataFrame."""
|
229
243
|
if isinstance(self._host, Artifact):
|
230
244
|
assert self._host.accessor == "DataFrame"
|
@@ -235,7 +249,7 @@ class FeatureManager:
|
|
235
249
|
# parse and register features
|
236
250
|
registry = field.field.model
|
237
251
|
df = self._host.load()
|
238
|
-
features = registry.from_values(df.columns, field=field,
|
252
|
+
features = registry.from_values(df.columns, field=field, organism=organism)
|
239
253
|
if len(features) == 0:
|
240
254
|
logger.error(
|
241
255
|
"no validated features found in DataFrame! please register features first!"
|
@@ -252,7 +266,8 @@ class FeatureManager:
|
|
252
266
|
self,
|
253
267
|
var_field: FieldAttr,
|
254
268
|
obs_field: FieldAttr | None = Feature.name,
|
255
|
-
|
269
|
+
mute: bool = False,
|
270
|
+
organism: str | Registry | None = None,
|
256
271
|
):
|
257
272
|
"""Add features from AnnData."""
|
258
273
|
if isinstance(self._host, Artifact):
|
@@ -263,13 +278,53 @@ class FeatureManager:
|
|
263
278
|
# parse and register features
|
264
279
|
adata = self._host.load()
|
265
280
|
feature_sets = parse_feature_sets_from_anndata(
|
266
|
-
adata,
|
281
|
+
adata,
|
282
|
+
var_field=var_field,
|
283
|
+
obs_field=obs_field,
|
284
|
+
mute=mute,
|
285
|
+
organism=organism,
|
267
286
|
)
|
268
287
|
|
269
288
|
# link feature sets
|
270
289
|
self._host._feature_sets = feature_sets
|
271
290
|
self._host.save()
|
272
291
|
|
292
|
+
def add_from_mudata(
|
293
|
+
self,
|
294
|
+
var_fields: dict[str, FieldAttr],
|
295
|
+
obs_fields: dict[str, FieldAttr] = None,
|
296
|
+
mute: bool = False,
|
297
|
+
organism: str | Registry | None = None,
|
298
|
+
):
|
299
|
+
"""Add features from MuData."""
|
300
|
+
if obs_fields is None:
|
301
|
+
obs_fields = {}
|
302
|
+
if isinstance(self._host, Artifact):
|
303
|
+
assert self._host.accessor == "MuData"
|
304
|
+
else:
|
305
|
+
raise NotImplementedError()
|
306
|
+
|
307
|
+
# parse and register features
|
308
|
+
mdata = self._host.load()
|
309
|
+
feature_sets = {}
|
310
|
+
obs_features = features = Feature.from_values(mdata.obs.columns)
|
311
|
+
if len(obs_features) > 0:
|
312
|
+
feature_sets["obs"] = FeatureSet(features=features)
|
313
|
+
for modality, field in var_fields.items():
|
314
|
+
modality_fs = parse_feature_sets_from_anndata(
|
315
|
+
mdata[modality],
|
316
|
+
var_field=field,
|
317
|
+
obs_field=obs_fields.get(modality, Feature.name),
|
318
|
+
mute=mute,
|
319
|
+
organism=organism,
|
320
|
+
)
|
321
|
+
for k, v in modality_fs.items():
|
322
|
+
feature_sets[f"['{modality}'].{k}"] = v
|
323
|
+
|
324
|
+
# link feature sets
|
325
|
+
self._host._feature_sets = feature_sets
|
326
|
+
self._host.save()
|
327
|
+
|
273
328
|
def _add_feature_set(self, feature_set: FeatureSet, slot: str):
|
274
329
|
"""Add new feature set to a slot.
|
275
330
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -49,7 +49,7 @@ def print_labels(self: Data):
|
|
49
49
|
n = labels.count()
|
50
50
|
field = get_default_str_field(labels)
|
51
51
|
print_values = _print_values(labels.list(field), n=10)
|
52
|
-
labels_msg += f"
|
52
|
+
labels_msg += f" 📎 {related_name} ({n}, {colors.italic(related_model)}): {print_values}\n"
|
53
53
|
if len(labels_msg) > 0:
|
54
54
|
return f"{colors.green('Labels')}:\n{labels_msg}"
|
55
55
|
else:
|