lamindb 0.77.0__py3-none-any.whl → 0.77.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +6 -3
- lamindb/_can_curate.py +3 -1
- lamindb/_collection.py +1 -1
- lamindb/_curate.py +387 -318
- lamindb/_feature.py +84 -58
- lamindb/_feature_set.py +6 -4
- lamindb/_finish.py +68 -13
- lamindb/_from_values.py +10 -6
- lamindb/_query_set.py +321 -102
- lamindb/_record.py +5 -3
- lamindb/_save.py +1 -0
- lamindb/_view.py +105 -9
- lamindb/core/__init__.py +2 -2
- lamindb/core/_context.py +9 -13
- lamindb/core/_data.py +58 -88
- lamindb/core/_describe.py +139 -0
- lamindb/core/_django.py +5 -6
- lamindb/core/_feature_manager.py +408 -198
- lamindb/core/_label_manager.py +147 -109
- lamindb/core/datasets/__init__.py +31 -2
- lamindb/core/datasets/_core.py +0 -27
- lamindb/core/datasets/_small.py +100 -0
- lamindb/core/exceptions.py +1 -1
- lamindb/core/storage/paths.py +9 -4
- lamindb/core/types.py +12 -2
- {lamindb-0.77.0.dist-info → lamindb-0.77.2.dist-info}/METADATA +7 -8
- {lamindb-0.77.0.dist-info → lamindb-0.77.2.dist-info}/RECORD +30 -28
- {lamindb-0.77.0.dist-info → lamindb-0.77.2.dist-info}/LICENSE +0 -0
- {lamindb-0.77.0.dist-info → lamindb-0.77.2.dist-info}/WHEEL +0 -0
lamindb/core/_label_manager.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import warnings
|
3
4
|
from collections import defaultdict
|
4
5
|
from typing import TYPE_CHECKING
|
5
6
|
|
6
|
-
import numpy as np
|
7
7
|
from django.db import connections
|
8
8
|
from lamin_utils import colors, logger
|
9
9
|
from lnschema_core.models import CanCurate, Feature
|
10
|
+
from rich.table import Column, Table
|
11
|
+
from rich.text import Text
|
10
12
|
|
11
13
|
from lamindb._from_values import _print_values
|
12
14
|
from lamindb._record import (
|
@@ -17,123 +19,164 @@ from lamindb._record import (
|
|
17
19
|
)
|
18
20
|
from lamindb._save import save
|
19
21
|
|
22
|
+
from ._describe import (
|
23
|
+
NAME_WIDTH,
|
24
|
+
TYPE_WIDTH,
|
25
|
+
VALUES_WIDTH,
|
26
|
+
describe_header,
|
27
|
+
print_rich_tree,
|
28
|
+
)
|
20
29
|
from ._django import get_artifact_with_related, get_related_model
|
21
30
|
from ._settings import settings
|
22
31
|
from .schema import dict_related_model_to_related_name
|
23
32
|
|
24
33
|
if TYPE_CHECKING:
|
25
34
|
from lnschema_core.models import Artifact, Collection, Record
|
35
|
+
from rich.tree import Tree
|
26
36
|
|
27
37
|
from lamindb._query_set import QuerySet
|
28
38
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
def
|
33
|
-
|
34
|
-
) -> dict:
|
35
|
-
labels
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
39
|
+
EXCLUDE_LABELS = {"feature_sets"}
|
40
|
+
|
41
|
+
|
42
|
+
def _get_labels(
|
43
|
+
obj, links: bool = False, instance: str | None = None
|
44
|
+
) -> dict[str, QuerySet]:
|
45
|
+
"""Get all labels associated with an object as a dictionary.
|
46
|
+
|
47
|
+
This is a generic approach that uses django orm.
|
48
|
+
"""
|
49
|
+
if obj.id is None:
|
50
|
+
return {}
|
51
|
+
|
52
|
+
labels = {}
|
53
|
+
related_models = dict_related_model_to_related_name(
|
54
|
+
obj.__class__, links=links, instance=instance
|
55
|
+
)
|
56
|
+
|
57
|
+
for _, related_name in related_models.items():
|
58
|
+
if related_name not in EXCLUDE_LABELS and not related_name.startswith("_"):
|
59
|
+
labels[related_name] = getattr(obj, related_name).all()
|
46
60
|
return labels
|
47
61
|
|
48
62
|
|
49
|
-
def
|
50
|
-
self: Artifact | Collection, m2m_data: dict | None = None
|
51
|
-
) -> str:
|
52
|
-
|
53
|
-
|
63
|
+
def _get_labels_postgres(
|
64
|
+
self: Artifact | Collection, m2m_data: dict | None = None
|
65
|
+
) -> dict[str, dict[int, str]]:
|
66
|
+
"""Get all labels associated with an artifact or collection as a dictionary.
|
67
|
+
|
68
|
+
This is a postgres-specific approach that uses django Subquery.
|
69
|
+
"""
|
70
|
+
if m2m_data is None:
|
54
71
|
artifact_meta = get_artifact_with_related(self, include_m2m=True)
|
55
72
|
m2m_data = artifact_meta.get("related_data", {}).get("m2m", {})
|
56
|
-
|
57
|
-
for related_name, labels in m2m_data.items():
|
58
|
-
if not labels or related_name == "feature_sets":
|
59
|
-
continue
|
60
|
-
related_model = get_related_model(self, related_name)
|
61
|
-
print_values = _print_values(labels.values(), n=10)
|
62
|
-
type_str = f": {related_model}" if print_types else ""
|
63
|
-
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
64
|
-
return labels_msg
|
73
|
+
return m2m_data
|
65
74
|
|
66
75
|
|
67
|
-
def
|
76
|
+
def describe_labels(
|
68
77
|
self: Artifact | Collection,
|
69
|
-
|
70
|
-
print_types: bool = False,
|
78
|
+
labels_data: dict | None = None,
|
79
|
+
print_types: bool = False, # deprecated
|
80
|
+
tree: Tree | None = None,
|
81
|
+
as_subtree: bool = False,
|
71
82
|
):
|
83
|
+
"""Describe labels associated with an artifact or collection."""
|
84
|
+
if print_types:
|
85
|
+
warnings.warn(
|
86
|
+
"`print_types` parameter is deprecated and will be removed in a future version. Types are now always printed.",
|
87
|
+
DeprecationWarning,
|
88
|
+
stacklevel=2,
|
89
|
+
)
|
72
90
|
if not self._state.adding and connections[self._state.db].vendor == "postgresql":
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
91
|
+
labels_data = _get_labels_postgres(self, labels_data)
|
92
|
+
if not labels_data:
|
93
|
+
labels_data = _get_labels(self, instance=self._state.db)
|
94
|
+
|
95
|
+
# initialize tree
|
96
|
+
if tree is None:
|
97
|
+
tree = describe_header(self)
|
98
|
+
if not labels_data:
|
99
|
+
return tree
|
100
|
+
|
101
|
+
labels_table = Table(
|
102
|
+
Column(
|
103
|
+
Text.assemble(("Labels", "green_yellow")),
|
104
|
+
style="",
|
105
|
+
no_wrap=True,
|
106
|
+
width=NAME_WIDTH,
|
107
|
+
),
|
108
|
+
Column("", style="dim", no_wrap=True, width=TYPE_WIDTH),
|
109
|
+
Column("", width=VALUES_WIDTH, no_wrap=True),
|
110
|
+
# show_header=True,
|
111
|
+
box=None,
|
112
|
+
pad_edge=False,
|
113
|
+
)
|
114
|
+
for related_name, labels in labels_data.items():
|
115
|
+
if not labels or related_name == "feature_sets":
|
116
|
+
continue
|
117
|
+
if isinstance(labels, dict): # postgres, labels are a dict[id, name]
|
118
|
+
print_values = _print_values(labels.values(), n=10)
|
119
|
+
else: # labels are a QuerySet
|
79
120
|
field = get_name_field(labels)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
msg = ""
|
87
|
-
if labels_msg:
|
88
|
-
msg += f" {colors.italic('Labels')}\n"
|
89
|
-
msg += labels_msg
|
90
|
-
return msg
|
91
|
-
|
92
|
-
|
93
|
-
# Alex: is this a label transfer function?
|
94
|
-
def validate_labels(labels: QuerySet | list | dict):
|
95
|
-
def validate_labels_registry(
|
96
|
-
labels: QuerySet | list | dict,
|
97
|
-
) -> tuple[list[str], list[str]]:
|
98
|
-
if len(labels) == 0:
|
99
|
-
return [], []
|
100
|
-
registry = labels[0].__class__
|
101
|
-
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
102
|
-
if hasattr(registry, "_ontology_id_field"):
|
103
|
-
field = registry._ontology_id_field
|
104
|
-
# if the field value is None, use uid field
|
105
|
-
label_uids = np.array(
|
106
|
-
[getattr(label, field) for label in labels if label is not None]
|
107
|
-
)
|
108
|
-
# save labels from ontology_ids
|
109
|
-
if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
|
110
|
-
try:
|
111
|
-
labels_records = registry.from_values(label_uids, field=field)
|
112
|
-
save([r for r in labels_records if r._state.adding])
|
113
|
-
except Exception: # noqa S110
|
114
|
-
pass
|
115
|
-
field = "uid"
|
116
|
-
label_uids = np.array(
|
117
|
-
[getattr(label, field) for label in labels if label is not None]
|
121
|
+
print_values = _print_values(labels.values_list(field, flat=True), n=10)
|
122
|
+
if print_values:
|
123
|
+
related_model = get_related_model(self, related_name)
|
124
|
+
type_str = related_model.__get_name_with_schema__()
|
125
|
+
labels_table.add_row(
|
126
|
+
f".{related_name}", Text(type_str, style="dim"), print_values
|
118
127
|
)
|
119
|
-
if issubclass(registry, CanCurate):
|
120
|
-
validated = registry.validate(label_uids, field=field, mute=True)
|
121
|
-
validated_uids = label_uids[validated]
|
122
|
-
validated_labels = registry.filter(
|
123
|
-
**{f"{field}__in": validated_uids}
|
124
|
-
).list()
|
125
|
-
new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
|
126
|
-
else:
|
127
|
-
validated_labels = []
|
128
|
-
new_labels = list(labels)
|
129
|
-
return validated_labels, new_labels
|
130
128
|
|
131
|
-
if
|
132
|
-
|
133
|
-
|
134
|
-
result[registry] = validate_labels_registry(labels_registry)
|
129
|
+
if as_subtree:
|
130
|
+
if labels_table.rows:
|
131
|
+
return labels_table
|
135
132
|
else:
|
136
|
-
|
133
|
+
if labels_table.rows:
|
134
|
+
tree.add(labels_table)
|
135
|
+
return tree
|
136
|
+
|
137
|
+
|
138
|
+
def _save_validated_records(
|
139
|
+
labels: QuerySet | list | dict,
|
140
|
+
) -> list[str]:
|
141
|
+
if not labels:
|
142
|
+
return []
|
143
|
+
registry = labels[0].__class__
|
144
|
+
field = (
|
145
|
+
REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
146
|
+
if not hasattr(registry, "_ontology_id_field")
|
147
|
+
else registry._ontology_id_field
|
148
|
+
)
|
149
|
+
# if the field value is None, use uid field
|
150
|
+
label_uids = [getattr(label, field) for label in labels if label is not None]
|
151
|
+
# save labels from ontology_ids
|
152
|
+
if hasattr(registry, "_ontology_id_field") and label_uids:
|
153
|
+
try:
|
154
|
+
records = registry.from_values(label_uids, field=field)
|
155
|
+
save([r for r in records if r._state.adding])
|
156
|
+
except Exception: # noqa: S110
|
157
|
+
pass
|
158
|
+
field = "uid"
|
159
|
+
label_uids = [label.uid for label in labels if label is not None]
|
160
|
+
|
161
|
+
if issubclass(registry, CanCurate):
|
162
|
+
validated = registry.validate(label_uids, field=field, mute=True)
|
163
|
+
new_labels = [
|
164
|
+
label for label, is_valid in zip(labels, validated) if not is_valid
|
165
|
+
]
|
166
|
+
return new_labels
|
167
|
+
return list(labels)
|
168
|
+
|
169
|
+
|
170
|
+
def save_validated_records(
|
171
|
+
labels: QuerySet | list | dict,
|
172
|
+
) -> list[str] | dict[str, list[str]]:
|
173
|
+
"""Save validated labels from public based on ontology_id_fields."""
|
174
|
+
if isinstance(labels, dict):
|
175
|
+
return {
|
176
|
+
registry: _save_validated_records(registry_labels)
|
177
|
+
for registry, registry_labels in labels.items()
|
178
|
+
}
|
179
|
+
return _save_validated_records(labels)
|
137
180
|
|
138
181
|
|
139
182
|
class LabelManager:
|
@@ -144,15 +187,12 @@ class LabelManager:
|
|
144
187
|
with features.
|
145
188
|
"""
|
146
189
|
|
147
|
-
def __init__(self, host: Artifact | Collection):
|
190
|
+
def __init__(self, host: Artifact | Collection) -> None:
|
148
191
|
self._host = host
|
149
192
|
|
150
193
|
def __repr__(self) -> str:
|
151
|
-
|
152
|
-
|
153
|
-
return msg
|
154
|
-
else:
|
155
|
-
return "no linked labels"
|
194
|
+
tree = describe_labels(self._host)
|
195
|
+
return print_rich_tree(tree, fallback="no linked labels")
|
156
196
|
|
157
197
|
def add(
|
158
198
|
self,
|
@@ -201,9 +241,7 @@ class LabelManager:
|
|
201
241
|
if transfer_logs is None:
|
202
242
|
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
203
243
|
using_key = settings._using_key
|
204
|
-
for related_name,
|
205
|
-
data, instance=data._state.db
|
206
|
-
).items():
|
244
|
+
for related_name, labels in _get_labels(data, instance=data._state.db).items():
|
207
245
|
labels = labels.all()
|
208
246
|
if not labels.exists():
|
209
247
|
continue
|
@@ -211,7 +249,7 @@ class LabelManager:
|
|
211
249
|
data_name_lower = data.__class__.__name__.lower()
|
212
250
|
labels_by_features = defaultdict(list)
|
213
251
|
features = set()
|
214
|
-
|
252
|
+
new_labels = save_validated_records(labels)
|
215
253
|
if len(new_labels) > 0:
|
216
254
|
transfer_fk_to_default_db_bulk(
|
217
255
|
new_labels, using_key, transfer_logs=transfer_logs
|
@@ -241,7 +279,7 @@ class LabelManager:
|
|
241
279
|
label = label_returned
|
242
280
|
labels_by_features[key].append(label)
|
243
281
|
# treat features
|
244
|
-
|
282
|
+
new_features = save_validated_records(list(features))
|
245
283
|
if len(new_features) > 0:
|
246
284
|
transfer_fk_to_default_db_bulk(
|
247
285
|
new_features, using_key, transfer_logs=transfer_logs
|
@@ -255,16 +293,16 @@ class LabelManager:
|
|
255
293
|
)
|
256
294
|
save(new_features)
|
257
295
|
if hasattr(self._host, related_name):
|
258
|
-
for feature_name,
|
296
|
+
for feature_name, feature_labels in labels_by_features.items():
|
259
297
|
if feature_name is not None:
|
260
298
|
feature_id = Feature.get(name=feature_name).id
|
261
299
|
else:
|
262
300
|
feature_id = None
|
263
301
|
getattr(self._host, related_name).add(
|
264
|
-
*
|
302
|
+
*feature_labels, through_defaults={"feature_id": feature_id}
|
265
303
|
)
|
266
304
|
|
267
|
-
def make_external(self, label: Record):
|
305
|
+
def make_external(self, label: Record) -> None:
|
268
306
|
"""Make a label external, aka dissociate label from internal features.
|
269
307
|
|
270
308
|
Args:
|
@@ -1,4 +1,15 @@
|
|
1
|
-
"""Test
|
1
|
+
"""Test datasets.
|
2
|
+
|
3
|
+
Small in-memory datasets.
|
4
|
+
|
5
|
+
.. autosummary::
|
6
|
+
:toctree: .
|
7
|
+
|
8
|
+
small_dataset1
|
9
|
+
small_dataset2
|
10
|
+
anndata_with_obs
|
11
|
+
|
12
|
+
Files.
|
2
13
|
|
3
14
|
.. autosummary::
|
4
15
|
:toctree: .
|
@@ -11,8 +22,20 @@
|
|
11
22
|
file_fastq
|
12
23
|
file_bam
|
13
24
|
file_mini_csv
|
25
|
+
|
26
|
+
Directories.
|
27
|
+
|
28
|
+
.. autosummary::
|
29
|
+
:toctree: .
|
30
|
+
|
14
31
|
dir_scrnaseq_cellranger
|
15
32
|
dir_iris_images
|
33
|
+
|
34
|
+
Dataframe, AnnData, MuData.
|
35
|
+
|
36
|
+
.. autosummary::
|
37
|
+
:toctree: .
|
38
|
+
|
16
39
|
df_iris
|
17
40
|
df_iris_in_meter
|
18
41
|
df_iris_in_meter_study1
|
@@ -27,6 +50,12 @@
|
|
27
50
|
mudata_papalexi21_subset
|
28
51
|
schmidt22_crispra_gws_IFNG
|
29
52
|
schmidt22_perturbseq
|
53
|
+
|
54
|
+
Other.
|
55
|
+
|
56
|
+
.. autosummary::
|
57
|
+
:toctree: .
|
58
|
+
|
30
59
|
fake_bio_notebook_titles
|
31
60
|
"""
|
32
61
|
|
@@ -37,7 +66,6 @@ from ._core import (
|
|
37
66
|
anndata_pbmc3k_processed,
|
38
67
|
anndata_pbmc68k_reduced,
|
39
68
|
anndata_suo22_Visium10X,
|
40
|
-
anndata_with_obs,
|
41
69
|
df_iris,
|
42
70
|
df_iris_in_meter,
|
43
71
|
df_iris_in_meter_study1,
|
@@ -57,3 +85,4 @@ from ._core import (
|
|
57
85
|
schmidt22_perturbseq,
|
58
86
|
)
|
59
87
|
from ._fake import fake_bio_notebook_titles
|
88
|
+
from ._small import anndata_with_obs, small_dataset1, small_dataset2
|
lamindb/core/datasets/_core.py
CHANGED
@@ -342,33 +342,6 @@ def anndata_human_immune_cells(
|
|
342
342
|
return adata
|
343
343
|
|
344
344
|
|
345
|
-
def anndata_with_obs() -> ad.AnnData:
|
346
|
-
"""Create a mini anndata with cell_type, disease and tissue."""
|
347
|
-
import anndata as ad
|
348
|
-
import bionty.base as bionty_base
|
349
|
-
|
350
|
-
celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"]
|
351
|
-
celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""]
|
352
|
-
diseases = [
|
353
|
-
"chronic kidney disease",
|
354
|
-
"liver lymphoma",
|
355
|
-
"cardiac ventricle disorder",
|
356
|
-
"Alzheimer disease",
|
357
|
-
]
|
358
|
-
tissues = ["kidney", "liver", "heart", "brain"]
|
359
|
-
df = pd.DataFrame()
|
360
|
-
df["cell_type"] = celltypes * 10
|
361
|
-
df["cell_type_id"] = celltype_ids * 10
|
362
|
-
df["tissue"] = tissues * 10
|
363
|
-
df["disease"] = diseases * 10
|
364
|
-
df.index = "obs" + df.index.astype(str)
|
365
|
-
|
366
|
-
adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
|
367
|
-
adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values
|
368
|
-
|
369
|
-
return adata
|
370
|
-
|
371
|
-
|
372
345
|
def anndata_suo22_Visium10X(): # pragma: no cover
|
373
346
|
"""AnnData from Suo22 generated by 10x Visium."""
|
374
347
|
import anndata as ad
|
@@ -0,0 +1,100 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any, Literal
|
4
|
+
|
5
|
+
import anndata as ad
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
|
10
|
+
def small_dataset1(
|
11
|
+
format: Literal["df", "anndata"],
|
12
|
+
with_typo: bool = False,
|
13
|
+
) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
|
14
|
+
# define the data in the dataset
|
15
|
+
# it's a mix of numerical measurements and observation-level metadata
|
16
|
+
ifng = "IFNJ" if with_typo else "IFNG"
|
17
|
+
dataset_dict = {
|
18
|
+
"CD8A": [1, 2, 3],
|
19
|
+
"CD4": [3, 4, 5],
|
20
|
+
"CD14": [5, 6, 7],
|
21
|
+
"cell_medium": ["DMSO", ifng, "DMSO"],
|
22
|
+
"sample_note": ["was ok", "looks naah", "pretty! 🤩"],
|
23
|
+
"cell_type_by_expert": ["B cell", "T cell", "T cell"],
|
24
|
+
"cell_type_by_model": ["B cell", "T cell", "T cell"],
|
25
|
+
}
|
26
|
+
# define the dataset-level metadata
|
27
|
+
metadata = {
|
28
|
+
"temperature": 21.6,
|
29
|
+
"study": "Candidate marker study 1",
|
30
|
+
"date_of_study": "2024-12-01",
|
31
|
+
"study_note": "We had a great time performing this study and the results look compelling.",
|
32
|
+
}
|
33
|
+
# the dataset as DataFrame
|
34
|
+
dataset_df = pd.DataFrame(dataset_dict, index=["sample1", "sample2", "sample3"])
|
35
|
+
if format == "df":
|
36
|
+
return dataset_df, metadata
|
37
|
+
else:
|
38
|
+
dataset_ad = ad.AnnData(
|
39
|
+
dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:], uns=metadata
|
40
|
+
)
|
41
|
+
return dataset_ad
|
42
|
+
|
43
|
+
|
44
|
+
def small_dataset2(
|
45
|
+
format: Literal["df", "anndata"],
|
46
|
+
) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
|
47
|
+
dataset_dict = {
|
48
|
+
"CD8A": [2, 3, 3],
|
49
|
+
"CD4": [3, 4, 5],
|
50
|
+
"CD38": [4, 2, 3],
|
51
|
+
"cell_medium": ["DMSO", "IFNG", "IFNG"],
|
52
|
+
"cell_type_by_model": ["B cell", "T cell", "T cell"],
|
53
|
+
}
|
54
|
+
metadata = {
|
55
|
+
"temperature": 22.6,
|
56
|
+
"study": "Candidate marker study 2",
|
57
|
+
"date_of_study": "2025-02-13",
|
58
|
+
}
|
59
|
+
dataset_df = pd.DataFrame(
|
60
|
+
dataset_dict,
|
61
|
+
index=["sample4", "sample5", "sample6"],
|
62
|
+
)
|
63
|
+
ad.AnnData(
|
64
|
+
dataset_df[["CD8A", "CD4", "CD38"]],
|
65
|
+
obs=dataset_df[["cell_medium", "cell_type_by_model"]],
|
66
|
+
)
|
67
|
+
if format == "df":
|
68
|
+
return dataset_df, metadata
|
69
|
+
else:
|
70
|
+
dataset_ad = ad.AnnData(
|
71
|
+
dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:], uns=metadata
|
72
|
+
)
|
73
|
+
return dataset_ad
|
74
|
+
|
75
|
+
|
76
|
+
def anndata_with_obs() -> ad.AnnData:
|
77
|
+
"""Create a mini anndata with cell_type, disease and tissue."""
|
78
|
+
import anndata as ad
|
79
|
+
import bionty.base as bionty_base
|
80
|
+
|
81
|
+
celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"]
|
82
|
+
celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""]
|
83
|
+
diseases = [
|
84
|
+
"chronic kidney disease",
|
85
|
+
"liver lymphoma",
|
86
|
+
"cardiac ventricle disorder",
|
87
|
+
"Alzheimer disease",
|
88
|
+
]
|
89
|
+
tissues = ["kidney", "liver", "heart", "brain"]
|
90
|
+
df = pd.DataFrame()
|
91
|
+
df["cell_type"] = celltypes * 10
|
92
|
+
df["cell_type_id"] = celltype_ids * 10
|
93
|
+
df["tissue"] = tissues * 10
|
94
|
+
df["disease"] = diseases * 10
|
95
|
+
df.index = "obs" + df.index.astype(str)
|
96
|
+
|
97
|
+
adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
|
98
|
+
adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values
|
99
|
+
|
100
|
+
return adata
|
lamindb/core/exceptions.py
CHANGED
lamindb/core/storage/paths.py
CHANGED
@@ -4,6 +4,7 @@ import shutil
|
|
4
4
|
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import anndata as ad
|
7
|
+
import fsspec
|
7
8
|
import pandas as pd
|
8
9
|
from lamin_utils import logger
|
9
10
|
from lamindb_setup.core import StorageSettings
|
@@ -45,12 +46,16 @@ def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> s
|
|
45
46
|
return storage_key
|
46
47
|
|
47
48
|
|
48
|
-
def check_path_is_child_of_root(path:
|
49
|
+
def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
|
49
50
|
# str is needed to eliminate UPath storage_options
|
50
51
|
# from the equality checks below
|
51
|
-
|
52
|
-
|
53
|
-
|
52
|
+
# and for fsspec.utils.get_protocol
|
53
|
+
path_str = str(path)
|
54
|
+
root_str = str(root)
|
55
|
+
# check that the protocols are the same first
|
56
|
+
if fsspec.utils.get_protocol(path_str) != fsspec.utils.get_protocol(root_str):
|
57
|
+
return False
|
58
|
+
return UPath(root_str).resolve() in UPath(path_str).resolve().parents
|
54
59
|
|
55
60
|
|
56
61
|
# returns filepath and root of the storage
|
lamindb/core/types.py
CHANGED
@@ -1,18 +1,28 @@
|
|
1
1
|
"""Types.
|
2
2
|
|
3
|
+
Central object types.
|
4
|
+
|
5
|
+
.. autosummary::
|
6
|
+
:toctree: .
|
7
|
+
|
8
|
+
ArtifactType
|
9
|
+
TransformType
|
10
|
+
FeatureDtype
|
11
|
+
|
12
|
+
Basic types.
|
13
|
+
|
3
14
|
.. autosummary::
|
4
15
|
:toctree: .
|
5
16
|
|
6
17
|
UPathStr
|
7
18
|
StrField
|
8
19
|
ListLike
|
9
|
-
TransformType
|
10
|
-
ArtifactType
|
11
20
|
"""
|
12
21
|
|
13
22
|
from lamindb_setup.core.types import UPathStr
|
14
23
|
from lnschema_core.types import (
|
15
24
|
ArtifactType,
|
25
|
+
FeatureDtype,
|
16
26
|
FieldAttr,
|
17
27
|
ListLike,
|
18
28
|
StrField,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.77.
|
3
|
+
Version: 0.77.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.9,<3.13
|
@@ -9,11 +9,10 @@ Classifier: Programming Language :: Python :: 3.9
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.10
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
12
|
-
Requires-Dist: lnschema_core==0.77.
|
13
|
-
Requires-Dist: lamin_utils==0.13.
|
14
|
-
Requires-Dist: lamin_cli==0.
|
15
|
-
Requires-Dist: lamindb_setup
|
16
|
-
Requires-Dist: rapidfuzz
|
12
|
+
Requires-Dist: lnschema_core==0.77.1
|
13
|
+
Requires-Dist: lamin_utils==0.13.9
|
14
|
+
Requires-Dist: lamin_cli==0.22.0
|
15
|
+
Requires-Dist: lamindb_setup==0.81.2
|
17
16
|
Requires-Dist: pyarrow
|
18
17
|
Requires-Dist: typing_extensions!=4.6.0
|
19
18
|
Requires-Dist: python-dateutil
|
@@ -22,7 +21,7 @@ Requires-Dist: fsspec
|
|
22
21
|
Requires-Dist: graphviz
|
23
22
|
Requires-Dist: psycopg2-binary
|
24
23
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
25
|
-
Requires-Dist: bionty==0.53.
|
24
|
+
Requires-Dist: bionty==0.53.2 ; extra == "bionty"
|
26
25
|
Requires-Dist: cellregistry ; extra == "cellregistry"
|
27
26
|
Requires-Dist: clinicore ; extra == "clinicore"
|
28
27
|
Requires-Dist: line_profiler ; extra == "dev"
|
@@ -41,7 +40,7 @@ Requires-Dist: findrefs ; extra == "findrefs"
|
|
41
40
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
42
41
|
Requires-Dist: nbproject==0.10.5 ; extra == "jupyter"
|
43
42
|
Requires-Dist: jupytext ; extra == "jupyter"
|
44
|
-
Requires-Dist: nbconvert ; extra == "jupyter"
|
43
|
+
Requires-Dist: nbconvert>=7.2.1 ; extra == "jupyter"
|
45
44
|
Requires-Dist: omop ; extra == "omop"
|
46
45
|
Requires-Dist: ourprojects ; extra == "ourprojects"
|
47
46
|
Requires-Dist: wetlab ; extra == "wetlab"
|