lamindb 0.65.1__py3-none-any.whl → 0.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_collection.py +21 -4
- lamindb/_feature.py +3 -2
- lamindb/_parents.py +6 -1
- lamindb/_query_set.py +37 -19
- lamindb/dev/__init__.py +2 -1
- lamindb/dev/_mapped_collection.py +11 -4
- {lamindb-0.65.1.dist-info → lamindb-0.66.0.dist-info}/METADATA +10 -10
- {lamindb-0.65.1.dist-info → lamindb-0.66.0.dist-info}/RECORD +11 -11
- {lamindb-0.65.1.dist-info → lamindb-0.66.0.dist-info}/LICENSE +0 -0
- {lamindb-0.65.1.dist-info → lamindb-0.66.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_collection.py
CHANGED
@@ -5,7 +5,7 @@ import anndata as ad
|
|
5
5
|
import pandas as pd
|
6
6
|
from lamin_utils import logger
|
7
7
|
from lamindb_setup.dev._docs import doc_args
|
8
|
-
from lnschema_core.models import Collection, Feature, FeatureSet
|
8
|
+
from lnschema_core.models import Collection, CollectionArtifact, Feature, FeatureSet
|
9
9
|
from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
|
10
10
|
|
11
11
|
from lamindb._utils import attach_func_to_class_method
|
@@ -15,6 +15,7 @@ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
|
|
15
15
|
|
16
16
|
from . import _TESTING, Artifact, Run
|
17
17
|
from ._artifact import parse_feature_sets_from_anndata
|
18
|
+
from ._query_set import QuerySet
|
18
19
|
from ._registry import init_self_from_db
|
19
20
|
from .dev._data import (
|
20
21
|
add_transform_to_kwargs,
|
@@ -312,7 +313,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
|
|
312
313
|
def mapped(
|
313
314
|
self,
|
314
315
|
label_keys: Optional[Union[str, List[str]]] = None,
|
315
|
-
|
316
|
+
join: Optional[Literal["inner", "outer"]] = "inner",
|
316
317
|
encode_labels: bool = True,
|
317
318
|
cache_categories: bool = True,
|
318
319
|
parallel: bool = False,
|
@@ -333,7 +334,7 @@ def mapped(
|
|
333
334
|
return MappedCollection(
|
334
335
|
path_list,
|
335
336
|
label_keys,
|
336
|
-
|
337
|
+
join,
|
337
338
|
encode_labels,
|
338
339
|
cache_categories,
|
339
340
|
parallel,
|
@@ -426,7 +427,14 @@ def save(self, *args, **kwargs) -> None:
|
|
426
427
|
super(Collection, self).save()
|
427
428
|
if hasattr(self, "_artifacts"):
|
428
429
|
if self._artifacts is not None and len(self._artifacts) > 0:
|
429
|
-
|
430
|
+
links = [
|
431
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
432
|
+
for artifact in self._artifacts
|
433
|
+
]
|
434
|
+
# the below seems to preserve the order of the list in the
|
435
|
+
# auto-incrementing integer primary
|
436
|
+
# merely using .unordered_artifacts.set(*...) doesn't achieve this
|
437
|
+
CollectionArtifact.objects.bulk_create(links)
|
430
438
|
save_feature_set_links(self)
|
431
439
|
|
432
440
|
|
@@ -439,6 +447,14 @@ def restore(self) -> None:
|
|
439
447
|
self.artifact.save()
|
440
448
|
|
441
449
|
|
450
|
+
@property # type: ignore
|
451
|
+
@doc_args(Collection.artifacts.__doc__)
|
452
|
+
def artifacts(self) -> QuerySet:
|
453
|
+
"""{}."""
|
454
|
+
_track_run_input(self)
|
455
|
+
return self.unordered_artifacts.order_by("collectionartifact__id")
|
456
|
+
|
457
|
+
|
442
458
|
METHOD_NAMES = [
|
443
459
|
"__init__",
|
444
460
|
"from_anndata",
|
@@ -465,3 +481,4 @@ for name in METHOD_NAMES:
|
|
465
481
|
|
466
482
|
# this seems a Django-generated function
|
467
483
|
delattr(Collection, "get_visibility_display")
|
484
|
+
Collection.artifacts = artifacts
|
lamindb/_feature.py
CHANGED
@@ -9,6 +9,7 @@ from lamindb._utils import attach_func_to_class_method
|
|
9
9
|
from lamindb.dev._settings import settings
|
10
10
|
|
11
11
|
from . import _TESTING
|
12
|
+
from ._query_set import RecordsList
|
12
13
|
|
13
14
|
FEATURE_TYPES = {
|
14
15
|
"int": "number",
|
@@ -86,7 +87,7 @@ def categoricals_from_df(df: "pd.DataFrame") -> Dict:
|
|
86
87
|
|
87
88
|
@classmethod # type:ignore
|
88
89
|
@doc_args(Feature.from_df.__doc__)
|
89
|
-
def from_df(cls, df: "pd.DataFrame") ->
|
90
|
+
def from_df(cls, df: "pd.DataFrame") -> "RecordsList":
|
90
91
|
"""{}."""
|
91
92
|
categoricals = categoricals_from_df(df)
|
92
93
|
|
@@ -141,7 +142,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
|
|
141
142
|
# f" {colors.yellow('unmapped categories')}:\n "
|
142
143
|
# f" {categoricals_with_unmapped_categories_formatted}"
|
143
144
|
# )
|
144
|
-
return features
|
145
|
+
return RecordsList(features)
|
145
146
|
|
146
147
|
|
147
148
|
@doc_args(Feature.save.__doc__)
|
lamindb/_parents.py
CHANGED
@@ -275,10 +275,15 @@ def _record_label(record: Registry, field: Optional[str] = None):
|
|
275
275
|
)
|
276
276
|
elif isinstance(record, Run):
|
277
277
|
name = f'{record.transform.name.replace("&", "&")}'
|
278
|
+
user_display = (
|
279
|
+
record.created_by.handle
|
280
|
+
if record.created_by.name is None
|
281
|
+
else record.created_by.name
|
282
|
+
)
|
278
283
|
return (
|
279
284
|
rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
|
280
285
|
rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
|
281
|
-
rf" user={
|
286
|
+
rf" user={user_display}<BR/>run={format_field_value(record.run_at)}</FONT>>"
|
282
287
|
)
|
283
288
|
elif isinstance(record, Transform):
|
284
289
|
name = f'{record.name.replace("&", "&")}'
|
lamindb/_query_set.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
from
|
1
|
+
from collections import UserList
|
2
|
+
from typing import Dict, Iterable, List, NamedTuple, Optional, Union
|
2
3
|
|
3
4
|
import pandas as pd
|
4
5
|
from django.db import models
|
@@ -21,6 +22,40 @@ class MultipleResultsFound(Exception):
|
|
21
22
|
# return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
22
23
|
|
23
24
|
|
25
|
+
def get_keys_from_df(data: List, registry: Registry) -> List[str]:
|
26
|
+
if len(data) > 0:
|
27
|
+
if isinstance(data[0], dict):
|
28
|
+
keys = list(data[0].keys())
|
29
|
+
else:
|
30
|
+
keys = list(data[0].__dict__.keys())
|
31
|
+
if "_state" in keys:
|
32
|
+
keys.remove("_state")
|
33
|
+
else:
|
34
|
+
keys = [
|
35
|
+
field.name
|
36
|
+
for field in registry._meta.fields
|
37
|
+
if not isinstance(field, models.ForeignKey)
|
38
|
+
]
|
39
|
+
keys += [
|
40
|
+
f"{field.name}_id"
|
41
|
+
for field in registry._meta.fields
|
42
|
+
if isinstance(field, models.ForeignKey)
|
43
|
+
]
|
44
|
+
return keys
|
45
|
+
|
46
|
+
|
47
|
+
class RecordsList(UserList):
|
48
|
+
"""Is ordered, can't be queried, but has `.df()`."""
|
49
|
+
|
50
|
+
def __init__(self, records: List[Registry]):
|
51
|
+
super().__init__(record for record in records)
|
52
|
+
|
53
|
+
def df(self) -> pd.DataFrame:
|
54
|
+
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
55
|
+
values = [record.__dict__ for record in self.data]
|
56
|
+
return pd.DataFrame(values, columns=keys)
|
57
|
+
|
58
|
+
|
24
59
|
class QuerySet(models.QuerySet, CanValidate, IsTree):
|
25
60
|
"""Lazily loaded queried records returned by queries.
|
26
61
|
|
@@ -59,24 +94,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
59
94
|
>>> ln.ULabel.filter().df(include=["labels__name", "labels__created_by_id"])
|
60
95
|
"""
|
61
96
|
data = self.values()
|
62
|
-
|
63
|
-
keys = list(data[0].keys())
|
64
|
-
if "created_at" in keys:
|
65
|
-
keys.remove("created_at")
|
66
|
-
else:
|
67
|
-
keys = [
|
68
|
-
field.name
|
69
|
-
for field in self.model._meta.fields
|
70
|
-
if (
|
71
|
-
not isinstance(field, models.ForeignKey)
|
72
|
-
and field.name != "created_at"
|
73
|
-
)
|
74
|
-
]
|
75
|
-
keys += [
|
76
|
-
f"{field.name}_id"
|
77
|
-
for field in self.model._meta.fields
|
78
|
-
if isinstance(field, models.ForeignKey)
|
79
|
-
]
|
97
|
+
keys = get_keys_from_df(data, self.model)
|
80
98
|
df = pd.DataFrame(self.values(), columns=keys)
|
81
99
|
# if len(df) > 0 and "updated_at" in df:
|
82
100
|
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
lamindb/dev/__init__.py
CHANGED
@@ -14,6 +14,7 @@ Queries of registries:
|
|
14
14
|
|
15
15
|
QuerySet
|
16
16
|
QueryManager
|
17
|
+
RecordsList
|
17
18
|
|
18
19
|
Functionality of data registries:
|
19
20
|
|
@@ -62,7 +63,7 @@ from lnschema_core.models import (
|
|
62
63
|
)
|
63
64
|
|
64
65
|
from lamindb._query_manager import QueryManager
|
65
|
-
from lamindb._query_set import QuerySet
|
66
|
+
from lamindb._query_set import QuerySet, RecordsList
|
66
67
|
from lamindb.dev._feature_manager import FeatureManager
|
67
68
|
from lamindb.dev._label_manager import LabelManager
|
68
69
|
|
@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
|
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
|
+
from lamin_utils import logger
|
8
9
|
from lamindb_setup.dev.upath import UPath
|
9
10
|
|
10
11
|
from .storage._backed_access import (
|
@@ -57,13 +58,13 @@ class MappedCollection:
|
|
57
58
|
self,
|
58
59
|
path_list: List[Union[str, PathLike]],
|
59
60
|
label_keys: Optional[Union[str, List[str]]] = None,
|
60
|
-
|
61
|
+
join: Optional[Literal["inner", "outer"]] = "outer",
|
61
62
|
encode_labels: bool = True,
|
62
63
|
cache_categories: bool = True,
|
63
64
|
parallel: bool = False,
|
64
65
|
dtype: Optional[str] = None,
|
65
66
|
):
|
66
|
-
assert
|
67
|
+
assert join in {None, "inner", "outer"}
|
67
68
|
|
68
69
|
self.storages = [] # type: ignore
|
69
70
|
self.conns = [] # type: ignore
|
@@ -83,7 +84,7 @@ class MappedCollection:
|
|
83
84
|
self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
|
84
85
|
self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
|
85
86
|
|
86
|
-
self.join_vars =
|
87
|
+
self.join_vars = join if len(path_list) > 1 else None
|
87
88
|
self.var_indices = None
|
88
89
|
if self.join_vars is not None:
|
89
90
|
self._make_join_vars()
|
@@ -99,7 +100,6 @@ class MappedCollection:
|
|
99
100
|
self._make_encoders(self.label_keys)
|
100
101
|
|
101
102
|
self._dtype = dtype
|
102
|
-
|
103
103
|
self._closed = False
|
104
104
|
|
105
105
|
def _make_connections(self, path_list: list, parallel: bool):
|
@@ -144,13 +144,20 @@ class MappedCollection:
|
|
144
144
|
vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
|
145
145
|
if vars_eq:
|
146
146
|
self.join_vars = None
|
147
|
+
logger.info("The variables are same, no virtual join is performed.")
|
147
148
|
return
|
148
149
|
else:
|
149
150
|
self.var_joint = reduce(pd.Index.intersection, var_list)
|
150
151
|
if len(self.var_joint) > 0:
|
151
152
|
self.join_vars = "inner"
|
153
|
+
logger.info(
|
154
|
+
"The intersection of variables is not empty, using virtual inner join."
|
155
|
+
)
|
152
156
|
else:
|
153
157
|
self.join_vars = "outer"
|
158
|
+
logger.info(
|
159
|
+
"The intersection of variables is empty, using virtual outer join."
|
160
|
+
)
|
154
161
|
|
155
162
|
if self.join_vars == "inner":
|
156
163
|
if self.var_joint is None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.66.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.61.0
|
13
|
+
Requires-Dist: lamindb_setup==0.64.0
|
14
14
|
Requires-Dist: lamin_utils==0.13.0
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.6.0
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -62,14 +62,14 @@ Provides-Extra: zarr
|
|
62
62
|
|
63
63
|
# LaminDB - A data framework for biology
|
64
64
|
|
65
|
-
LaminDB is an open-source Python framework to manage biological data & analyses
|
65
|
+
LaminDB is an open-source Python framework to manage biological data & analyses:
|
66
66
|
|
67
|
-
- Access data & metadata across storage
|
68
|
-
- Track data
|
69
|
-
- Manage registries for experimental metadata & in-house ontologies
|
70
|
-
- Validate, standardize & annotate data
|
67
|
+
- Access data & metadata across storage & databases.
|
68
|
+
- Track data lineage across notebooks & pipelines.
|
69
|
+
- Manage registries for experimental metadata & in-house ontologies.
|
70
|
+
- Validate, standardize & annotate data.
|
71
71
|
- Organize and share data across a mesh of LaminDB instances.
|
72
|
-
- Manage data access
|
72
|
+
- Manage data access, leverage an auditable system of record.
|
73
73
|
|
74
74
|
## Documentation
|
75
75
|
|
@@ -1,14 +1,14 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=rgCY0tETrHKyB7V5f2Y3BhY4BAJicGUUYzSRwIdRlmI,2691
|
2
2
|
lamindb/_artifact.py,sha256=eWsLj8x6Cqy8MR7LxKyScxozM52MaqOTCK8gplloP2c,38087
|
3
|
-
lamindb/_collection.py,sha256=
|
3
|
+
lamindb/_collection.py,sha256=gVcs3A200JZilfdYd0zrX29UrAmhP9Eovu6r_SIxXQ4,17634
|
4
4
|
lamindb/_delete.py,sha256=jO6kcIoxY6EFgqiVF2vlbXaCaqlI25AvBo7nre3JXkQ,1968
|
5
|
-
lamindb/_feature.py,sha256=
|
5
|
+
lamindb/_feature.py,sha256=tEcqFoEj5yp4LSJfMGyiVvxDUuLoZaik6lo05ZKcCtE,6036
|
6
6
|
lamindb/_feature_set.py,sha256=KYgdmMdXb21pfpir1J1O21in3nJvUeznECOB38qfTvk,8654
|
7
7
|
lamindb/_filter.py,sha256=YwWqviJ34kHTMJ8NYlrEw-vsrXkKrVIPsEZSBVvMcrI,1163
|
8
8
|
lamindb/_from_values.py,sha256=dKz4cTUBRkXOOzFX2Ix2cKhK2Lw9PyTgi7d0PI-kh3c,11869
|
9
|
-
lamindb/_parents.py,sha256=
|
9
|
+
lamindb/_parents.py,sha256=hyoN92YnfJFmRWmQMLLUjTKKwnIOJci5z6csMjsdYDE,14165
|
10
10
|
lamindb/_query_manager.py,sha256=m4WUScviuNlMHeNEPZ8H8y0YsMXSbwWyfIgS4L00wBY,4332
|
11
|
-
lamindb/_query_set.py,sha256=
|
11
|
+
lamindb/_query_set.py,sha256=tItL2YNdycpbXklYd8aW4jJX6Z-kGcNclscg0v3l8t4,10495
|
12
12
|
lamindb/_registry.py,sha256=MxYpJUKD6Qu5eO2jO6JOcQBBGxfQpiEGPJrFaXau_jw,17421
|
13
13
|
lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
|
14
14
|
lamindb/_save.py,sha256=UlRHJGUiHGOXv90wmawZVsOqhJIqk8f1wj8MW3Rlq_c,10535
|
@@ -18,11 +18,11 @@ lamindb/_ulabel.py,sha256=HALoy6HerRnehR-u8zPH-qmiFQHWxeAwkZ31jxjrfgI,1893
|
|
18
18
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
19
19
|
lamindb/_validate.py,sha256=fS2685MYX9h6iAWymEorJJmDYA2CGNOSmJpesbG6faU,14400
|
20
20
|
lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
|
21
|
-
lamindb/dev/__init__.py,sha256=
|
21
|
+
lamindb/dev/__init__.py,sha256=LLqivujL8c-oKWC15SJepAYyrTlLNvql5Vdwunc0qvE,1174
|
22
22
|
lamindb/dev/_data.py,sha256=YPZ664qGKMl34LbZCMCEFIxQ-E81iAt_b3lvMiTe-oc,17066
|
23
23
|
lamindb/dev/_feature_manager.py,sha256=jn8x_JbrtLFelmaFh4noOXqGSCfqVuVX0quoa7gTJtM,9366
|
24
24
|
lamindb/dev/_label_manager.py,sha256=6E_pSQicqfTWDGEGe4WPn_3GZl_CCIMTZ6xJDh4EkC0,8740
|
25
|
-
lamindb/dev/_mapped_collection.py,sha256=
|
25
|
+
lamindb/dev/_mapped_collection.py,sha256=NRjOYnC1d3IcVyqhT_Yp0xycepmeytlngYnw-5Xcnw4,14445
|
26
26
|
lamindb/dev/_run_context.py,sha256=4eBZsbfcFpW5nqmRLbRZxuA5oeRW17XVHMzVtMH0bKA,22965
|
27
27
|
lamindb/dev/_settings.py,sha256=nixk8lVijCbq_fRlUpkX5gvO9AdgUFjbXzFThAJhGBA,3824
|
28
28
|
lamindb/dev/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
|
@@ -43,7 +43,7 @@ lamindb/dev/storage/file.py,sha256=jalzFQ8q110UUu_GGQBkU-g3M04h5g4LJ3nLjCzJ4pU,5
|
|
43
43
|
lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
|
44
44
|
lamindb/setup/__init__.py,sha256=WaWKO-2XT67S65lSbS80hUojL-Mr_Wms9UxH6U54TsY,289
|
45
45
|
lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
|
46
|
-
lamindb-0.
|
47
|
-
lamindb-0.
|
48
|
-
lamindb-0.
|
49
|
-
lamindb-0.
|
46
|
+
lamindb-0.66.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
47
|
+
lamindb-0.66.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
48
|
+
lamindb-0.66.0.dist-info/METADATA,sha256=d9S5mPiFAzV1EhN7KB_VnugNCy7vdeivGqtxZsZPD60,3076
|
49
|
+
lamindb-0.66.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|