lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +114 -113
- lamindb/_artifact.py +1206 -1205
- lamindb/_can_validate.py +621 -579
- lamindb/_collection.py +390 -387
- lamindb/_curate.py +1603 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +244 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +250 -256
- lamindb/_from_values.py +403 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +364 -362
- lamindb/_record.py +668 -649
- lamindb/_run.py +60 -57
- lamindb/_save.py +310 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +130 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +590 -574
- lamindb/core/_data.py +510 -438
- lamindb/core/_django.py +209 -0
- lamindb/core/_feature_manager.py +994 -867
- lamindb/core/_label_manager.py +289 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +188 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +741 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +146 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
- lamindb-0.76.10.dist-info/RECORD +61 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_feature_set.py
CHANGED
@@ -1,242 +1,244 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
|
-
import lamindb_setup as ln_setup
|
6
|
-
import numpy as np
|
7
|
-
from lamin_utils import logger
|
8
|
-
from lamindb_setup.core._docs import doc_args
|
9
|
-
from lamindb_setup.core.hashing import hash_set
|
10
|
-
from lnschema_core import Feature, FeatureSet, Record, ids
|
11
|
-
from lnschema_core.types import FieldAttr, ListLike
|
12
|
-
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
|
-
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
|
-
from ._record import init_self_from_db
|
17
|
-
from .core.exceptions import ValidationError
|
18
|
-
from .core.schema import (
|
19
|
-
dict_related_model_to_related_name,
|
20
|
-
get_related_name,
|
21
|
-
)
|
22
|
-
|
23
|
-
if TYPE_CHECKING:
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
if not
|
44
|
-
raise TypeError(
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
type_str =
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
if
|
128
|
-
raise
|
129
|
-
if
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
return
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
"
|
226
|
-
"
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
FeatureSet
|
242
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
import lamindb_setup as ln_setup
|
6
|
+
import numpy as np
|
7
|
+
from lamin_utils import logger
|
8
|
+
from lamindb_setup.core._docs import doc_args
|
9
|
+
from lamindb_setup.core.hashing import hash_set
|
10
|
+
from lnschema_core import Feature, FeatureSet, Record, ids
|
11
|
+
from lnschema_core.types import FieldAttr, ListLike
|
12
|
+
|
13
|
+
from lamindb._utils import attach_func_to_class_method
|
14
|
+
|
15
|
+
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
|
+
from ._record import init_self_from_db
|
17
|
+
from .core.exceptions import ValidationError
|
18
|
+
from .core.schema import (
|
19
|
+
dict_related_model_to_related_name,
|
20
|
+
get_related_name,
|
21
|
+
)
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Iterable
|
25
|
+
|
26
|
+
import pandas as pd
|
27
|
+
|
28
|
+
from ._query_set import QuerySet
|
29
|
+
|
30
|
+
NUMBER_TYPE = "number"
|
31
|
+
DICT_KEYS_TYPE = type({}.keys()) # type: ignore
|
32
|
+
|
33
|
+
|
34
|
+
def validate_features(features: list[Record]) -> Record:
|
35
|
+
"""Validate and return feature type."""
|
36
|
+
try:
|
37
|
+
if len(features) == 0:
|
38
|
+
raise ValueError("Provide list of features with at least one element")
|
39
|
+
except TypeError:
|
40
|
+
raise ValueError(
|
41
|
+
"Please pass a ListLike of features, not a single feature"
|
42
|
+
) from None
|
43
|
+
if not hasattr(features, "__getitem__"):
|
44
|
+
raise TypeError("features has to be list-like")
|
45
|
+
if not isinstance(features[0], Record):
|
46
|
+
raise TypeError(
|
47
|
+
"features has to store feature records! use .from_values() otherwise"
|
48
|
+
)
|
49
|
+
feature_types = {feature.__class__ for feature in features}
|
50
|
+
if len(feature_types) > 1:
|
51
|
+
raise TypeError("feature_set can only contain a single type")
|
52
|
+
for feature in features:
|
53
|
+
if feature._state.adding:
|
54
|
+
raise ValueError("Can only construct feature sets from validated features")
|
55
|
+
return next(iter(feature_types)) # return value in set of cardinality 1
|
56
|
+
|
57
|
+
|
58
|
+
def __init__(self, *args, **kwargs):
|
59
|
+
if len(args) == len(self._meta.concrete_fields):
|
60
|
+
super(FeatureSet, self).__init__(*args, **kwargs)
|
61
|
+
return None
|
62
|
+
# now we proceed with the user-facing constructor
|
63
|
+
if len(args) > 1:
|
64
|
+
raise ValueError("Only one non-keyword arg allowed: features")
|
65
|
+
features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
|
66
|
+
dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
|
67
|
+
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
68
|
+
if len(kwargs) > 0:
|
69
|
+
raise ValueError("Only features, dtype, name are valid keyword arguments")
|
70
|
+
# now code
|
71
|
+
features_registry = validate_features(features)
|
72
|
+
if dtype is None:
|
73
|
+
dtype = None if features_registry == Feature else NUMBER_TYPE
|
74
|
+
n_features = len(features)
|
75
|
+
features_hash = hash_set({feature.uid for feature in features})
|
76
|
+
feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
|
77
|
+
if feature_set is not None:
|
78
|
+
logger.debug(f"loaded: {feature_set}")
|
79
|
+
init_self_from_db(self, feature_set)
|
80
|
+
return None
|
81
|
+
else:
|
82
|
+
hash = features_hash
|
83
|
+
self._features = (get_related_name(features_registry), features)
|
84
|
+
|
85
|
+
super(FeatureSet, self).__init__(
|
86
|
+
uid=ids.base62_20(),
|
87
|
+
name=name,
|
88
|
+
dtype=get_type_str(dtype),
|
89
|
+
n=n_features,
|
90
|
+
registry=features_registry.__get_name_with_schema__(),
|
91
|
+
hash=hash,
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
@doc_args(FeatureSet.save.__doc__)
|
96
|
+
def save(self, *args, **kwargs) -> FeatureSet:
|
97
|
+
"""{}""" # noqa: D415
|
98
|
+
super(FeatureSet, self).save(*args, **kwargs)
|
99
|
+
if hasattr(self, "_features"):
|
100
|
+
related_name, records = self._features
|
101
|
+
getattr(self, related_name).set(records)
|
102
|
+
return self
|
103
|
+
|
104
|
+
|
105
|
+
def get_type_str(dtype: str | None) -> str | None:
|
106
|
+
if dtype is not None:
|
107
|
+
type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
|
108
|
+
else:
|
109
|
+
type_str = None
|
110
|
+
return type_str
|
111
|
+
|
112
|
+
|
113
|
+
@classmethod # type:ignore
|
114
|
+
@doc_args(FeatureSet.from_values.__doc__)
|
115
|
+
def from_values(
|
116
|
+
cls,
|
117
|
+
values: ListLike,
|
118
|
+
field: FieldAttr = Feature.name,
|
119
|
+
type: str | None = None,
|
120
|
+
name: str | None = None,
|
121
|
+
mute: bool = False,
|
122
|
+
organism: Record | str | None = None,
|
123
|
+
source: Record | None = None,
|
124
|
+
raise_validation_error: bool = True,
|
125
|
+
) -> FeatureSet:
|
126
|
+
"""{}""" # noqa: D415
|
127
|
+
if not isinstance(field, FieldAttr):
|
128
|
+
raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
|
129
|
+
if len(values) == 0:
|
130
|
+
raise ValueError("Provide a list of at least one value")
|
131
|
+
if isinstance(values, DICT_KEYS_TYPE):
|
132
|
+
values = list(values)
|
133
|
+
registry = field.field.model
|
134
|
+
if registry != Feature and type is None:
|
135
|
+
type = NUMBER_TYPE
|
136
|
+
logger.debug("setting feature set to 'number'")
|
137
|
+
validated = registry.validate(values, field=field, mute=mute, organism=organism)
|
138
|
+
values_array = np.array(values)
|
139
|
+
validated_values = values_array[validated]
|
140
|
+
if validated.sum() != len(values):
|
141
|
+
not_validated_values = values_array[~validated]
|
142
|
+
msg = (
|
143
|
+
f"These values could not be validated: {not_validated_values.tolist()}\n"
|
144
|
+
f"If there are no typos, add them to their registry: {registry.__name__}"
|
145
|
+
)
|
146
|
+
if raise_validation_error:
|
147
|
+
raise ValidationError(msg)
|
148
|
+
elif len(validated_values) == 0:
|
149
|
+
return None # temporarily return None here
|
150
|
+
validated_features = registry.from_values(
|
151
|
+
validated_values,
|
152
|
+
field=field,
|
153
|
+
organism=organism,
|
154
|
+
source=source,
|
155
|
+
)
|
156
|
+
feature_set = FeatureSet(
|
157
|
+
features=validated_features,
|
158
|
+
name=name,
|
159
|
+
dtype=get_type_str(type),
|
160
|
+
)
|
161
|
+
return feature_set
|
162
|
+
|
163
|
+
|
164
|
+
@classmethod # type:ignore
|
165
|
+
@doc_args(FeatureSet.from_df.__doc__)
|
166
|
+
def from_df(
|
167
|
+
cls,
|
168
|
+
df: pd.DataFrame,
|
169
|
+
field: FieldAttr = Feature.name,
|
170
|
+
name: str | None = None,
|
171
|
+
mute: bool = False,
|
172
|
+
organism: Record | str | None = None,
|
173
|
+
source: Record | None = None,
|
174
|
+
) -> FeatureSet | None:
|
175
|
+
"""{}""" # noqa: D415
|
176
|
+
registry = field.field.model
|
177
|
+
validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
|
178
|
+
if validated.sum() == 0:
|
179
|
+
if mute is True:
|
180
|
+
logger.warning("no validated features, skip creating feature set")
|
181
|
+
return None
|
182
|
+
if registry == Feature:
|
183
|
+
validated_features = Feature.from_df(df.loc[:, validated])
|
184
|
+
feature_set = FeatureSet(validated_features, name=name, dtype=None)
|
185
|
+
else:
|
186
|
+
dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
|
187
|
+
if len(set(dtypes)) != 1:
|
188
|
+
raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
|
189
|
+
dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
|
190
|
+
validated_features = registry.from_values(
|
191
|
+
df.columns[validated],
|
192
|
+
field=field,
|
193
|
+
organism=organism,
|
194
|
+
source=source,
|
195
|
+
)
|
196
|
+
feature_set = FeatureSet(
|
197
|
+
features=validated_features,
|
198
|
+
name=name,
|
199
|
+
dtype=get_type_str(dtype),
|
200
|
+
)
|
201
|
+
return feature_set
|
202
|
+
|
203
|
+
|
204
|
+
@property # type: ignore
|
205
|
+
@doc_args(FeatureSet.members.__doc__)
|
206
|
+
def members(self) -> QuerySet:
|
207
|
+
"""{}""" # noqa: D415
|
208
|
+
if self._state.adding:
|
209
|
+
# this should return a queryset and not a list...
|
210
|
+
# need to fix this
|
211
|
+
return self._features[1]
|
212
|
+
related_name = self._get_related_name()
|
213
|
+
if related_name is None:
|
214
|
+
related_name = "features"
|
215
|
+
return self.__getattribute__(related_name).all()
|
216
|
+
|
217
|
+
|
218
|
+
def _get_related_name(self: FeatureSet) -> str:
|
219
|
+
feature_sets_related_models = dict_related_model_to_related_name(self)
|
220
|
+
related_name = feature_sets_related_models.get(self.registry)
|
221
|
+
return related_name
|
222
|
+
|
223
|
+
|
224
|
+
METHOD_NAMES = [
|
225
|
+
"__init__",
|
226
|
+
"from_values",
|
227
|
+
"from_df",
|
228
|
+
"save",
|
229
|
+
]
|
230
|
+
|
231
|
+
if ln_setup._TESTING:
|
232
|
+
from inspect import signature
|
233
|
+
|
234
|
+
SIGS = {
|
235
|
+
name: signature(getattr(FeatureSet, name))
|
236
|
+
for name in METHOD_NAMES
|
237
|
+
if name != "__init__"
|
238
|
+
}
|
239
|
+
|
240
|
+
for name in METHOD_NAMES:
|
241
|
+
attach_func_to_class_method(name, FeatureSet, globals())
|
242
|
+
|
243
|
+
FeatureSet.members = members
|
244
|
+
FeatureSet._get_related_name = _get_related_name
|
lamindb/_filter.py
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
|
-
from lnschema_core import Artifact, Collection
|
6
|
-
|
7
|
-
from ._query_set import QuerySet, process_expressions
|
8
|
-
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from lnschema_core import Record
|
11
|
-
|
12
|
-
|
13
|
-
def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
|
14
|
-
"""See :meth:`~lamindb.core.Record.filter`."""
|
15
|
-
_using_key = None
|
16
|
-
if "_using_key" in expressions:
|
17
|
-
_using_key = expressions.pop("_using_key")
|
18
|
-
expressions = process_expressions(registry, expressions)
|
19
|
-
qs = QuerySet(model=registry, using=_using_key)
|
20
|
-
if len(expressions) > 0:
|
21
|
-
return qs.filter(*queries, **expressions)
|
22
|
-
else:
|
23
|
-
return qs
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from lnschema_core import Artifact, Collection
|
6
|
+
|
7
|
+
from ._query_set import QuerySet, process_expressions
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from lnschema_core import Record
|
11
|
+
|
12
|
+
|
13
|
+
def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
|
14
|
+
"""See :meth:`~lamindb.core.Record.filter`."""
|
15
|
+
_using_key = None
|
16
|
+
if "_using_key" in expressions:
|
17
|
+
_using_key = expressions.pop("_using_key")
|
18
|
+
expressions = process_expressions(registry, expressions)
|
19
|
+
qs = QuerySet(model=registry, using=_using_key)
|
20
|
+
if len(expressions) > 0:
|
21
|
+
return qs.filter(*queries, **expressions)
|
22
|
+
else:
|
23
|
+
return qs
|