lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_feature_set.py CHANGED
@@ -1,242 +1,244 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Iterable, Type
4
-
5
- import lamindb_setup as ln_setup
6
- import numpy as np
7
- from lamin_utils import logger
8
- from lamindb_setup.core._docs import doc_args
9
- from lamindb_setup.core.hashing import hash_set
10
- from lnschema_core import Feature, FeatureSet, Record, ids
11
- from lnschema_core.types import FieldAttr, ListLike
12
-
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
- from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
- from ._record import init_self_from_db
17
- from .core.exceptions import ValidationError
18
- from .core.schema import (
19
- dict_related_model_to_related_name,
20
- get_related_name,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- import pandas as pd
25
-
26
- from ._query_set import QuerySet
27
-
28
- NUMBER_TYPE = "number"
29
- DICT_KEYS_TYPE = type({}.keys()) # type: ignore
30
-
31
-
32
- def validate_features(features: list[Record]) -> Record:
33
- """Validate and return feature type."""
34
- try:
35
- if len(features) == 0:
36
- raise ValueError("Provide list of features with at least one element")
37
- except TypeError:
38
- raise ValueError(
39
- "Please pass a ListLike of features, not a single feature"
40
- ) from None
41
- if not hasattr(features, "__getitem__"):
42
- raise TypeError("features has to be list-like")
43
- if not isinstance(features[0], Record):
44
- raise TypeError(
45
- "features has to store feature records! use .from_values() otherwise"
46
- )
47
- feature_types = {feature.__class__ for feature in features}
48
- if len(feature_types) > 1:
49
- raise TypeError("feature_set can only contain a single type")
50
- for feature in features:
51
- if feature._state.adding:
52
- raise ValueError("Can only construct feature sets from validated features")
53
- return next(iter(feature_types)) # return value in set of cardinality 1
54
-
55
-
56
- def __init__(self, *args, **kwargs):
57
- if len(args) == len(self._meta.concrete_fields):
58
- super(FeatureSet, self).__init__(*args, **kwargs)
59
- return None
60
- # now we proceed with the user-facing constructor
61
- if len(args) > 1:
62
- raise ValueError("Only one non-keyword arg allowed: features")
63
- features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
64
- dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
65
- name: str | None = kwargs.pop("name") if "name" in kwargs else None
66
- if len(kwargs) > 0:
67
- raise ValueError("Only features, dtype, name are valid keyword arguments")
68
- # now code
69
- features_registry = validate_features(features)
70
- if dtype is None:
71
- dtype = None if features_registry == Feature else NUMBER_TYPE
72
- n_features = len(features)
73
- features_hash = hash_set({feature.uid for feature in features})
74
- feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
75
- if feature_set is not None:
76
- logger.debug(f"loaded: {feature_set}")
77
- init_self_from_db(self, feature_set)
78
- return None
79
- else:
80
- hash = features_hash
81
- self._features = (get_related_name(features_registry), features)
82
-
83
- super(FeatureSet, self).__init__(
84
- uid=ids.base62_20(),
85
- name=name,
86
- dtype=get_type_str(dtype),
87
- n=n_features,
88
- registry=features_registry.__get_name_with_schema__(),
89
- hash=hash,
90
- )
91
-
92
-
93
- @doc_args(FeatureSet.save.__doc__)
94
- def save(self, *args, **kwargs) -> FeatureSet:
95
- """{}""" # noqa: D415
96
- super(FeatureSet, self).save(*args, **kwargs)
97
- if hasattr(self, "_features"):
98
- related_name, records = self._features
99
- getattr(self, related_name).set(records)
100
- return self
101
-
102
-
103
- def get_type_str(dtype: str | None) -> str | None:
104
- if dtype is not None:
105
- type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
106
- else:
107
- type_str = None
108
- return type_str
109
-
110
-
111
- @classmethod # type:ignore
112
- @doc_args(FeatureSet.from_values.__doc__)
113
- def from_values(
114
- cls,
115
- values: ListLike,
116
- field: FieldAttr = Feature.name,
117
- type: str | None = None,
118
- name: str | None = None,
119
- mute: bool = False,
120
- organism: Record | str | None = None,
121
- source: Record | None = None,
122
- raise_validation_error: bool = True,
123
- ) -> FeatureSet:
124
- """{}""" # noqa: D415
125
- if not isinstance(field, FieldAttr):
126
- raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
127
- if len(values) == 0:
128
- raise ValueError("Provide a list of at least one value")
129
- if isinstance(values, DICT_KEYS_TYPE):
130
- values = list(values)
131
- registry = field.field.model
132
- if registry != Feature and type is None:
133
- type = NUMBER_TYPE
134
- logger.debug("setting feature set to 'number'")
135
- validated = registry.validate(values, field=field, mute=mute, organism=organism)
136
- values_array = np.array(values)
137
- validated_values = values_array[validated]
138
- if validated.sum() != len(values):
139
- not_validated_values = values_array[~validated]
140
- msg = (
141
- f"These values could not be validated: {not_validated_values.tolist()}\n"
142
- f"If there are no typos, add them to their registry: {registry.__name__}"
143
- )
144
- if raise_validation_error:
145
- raise ValidationError(msg)
146
- elif len(validated_values) == 0:
147
- return None # temporarily return None here
148
- validated_features = registry.from_values(
149
- validated_values,
150
- field=field,
151
- organism=organism,
152
- source=source,
153
- )
154
- feature_set = FeatureSet(
155
- features=validated_features,
156
- name=name,
157
- dtype=get_type_str(type),
158
- )
159
- return feature_set
160
-
161
-
162
- @classmethod # type:ignore
163
- @doc_args(FeatureSet.from_df.__doc__)
164
- def from_df(
165
- cls,
166
- df: pd.DataFrame,
167
- field: FieldAttr = Feature.name,
168
- name: str | None = None,
169
- mute: bool = False,
170
- organism: Record | str | None = None,
171
- source: Record | None = None,
172
- ) -> FeatureSet | None:
173
- """{}""" # noqa: D415
174
- registry = field.field.model
175
- validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
176
- if validated.sum() == 0:
177
- if mute is True:
178
- logger.warning("no validated features, skip creating feature set")
179
- return None
180
- if registry == Feature:
181
- validated_features = Feature.from_df(df.loc[:, validated])
182
- feature_set = FeatureSet(validated_features, name=name, dtype=None)
183
- else:
184
- dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
185
- if len(set(dtypes)) != 1:
186
- raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
187
- dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
188
- validated_features = registry.from_values(
189
- df.columns[validated],
190
- field=field,
191
- organism=organism,
192
- source=source,
193
- )
194
- feature_set = FeatureSet(
195
- features=validated_features,
196
- name=name,
197
- dtype=get_type_str(dtype),
198
- )
199
- return feature_set
200
-
201
-
202
- @property # type: ignore
203
- @doc_args(FeatureSet.members.__doc__)
204
- def members(self) -> QuerySet:
205
- """{}""" # noqa: D415
206
- if self._state.adding:
207
- # this should return a queryset and not a list...
208
- # need to fix this
209
- return self._features[1]
210
- related_name = self._get_related_name()
211
- if related_name is None:
212
- related_name = "features"
213
- return self.__getattribute__(related_name).all()
214
-
215
-
216
- def _get_related_name(self: FeatureSet) -> str:
217
- feature_sets_related_models = dict_related_model_to_related_name(self)
218
- related_name = feature_sets_related_models.get(self.registry)
219
- return related_name
220
-
221
-
222
- METHOD_NAMES = [
223
- "__init__",
224
- "from_values",
225
- "from_df",
226
- "save",
227
- ]
228
-
229
- if ln_setup._TESTING:
230
- from inspect import signature
231
-
232
- SIGS = {
233
- name: signature(getattr(FeatureSet, name))
234
- for name in METHOD_NAMES
235
- if name != "__init__"
236
- }
237
-
238
- for name in METHOD_NAMES:
239
- attach_func_to_class_method(name, FeatureSet, globals())
240
-
241
- FeatureSet.members = members
242
- FeatureSet._get_related_name = _get_related_name
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import lamindb_setup as ln_setup
6
+ import numpy as np
7
+ from lamin_utils import logger
8
+ from lamindb_setup.core._docs import doc_args
9
+ from lamindb_setup.core.hashing import hash_set
10
+ from lnschema_core import Feature, FeatureSet, Record, ids
11
+ from lnschema_core.types import FieldAttr, ListLike
12
+
13
+ from lamindb._utils import attach_func_to_class_method
14
+
15
+ from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
+ from ._record import init_self_from_db
17
+ from .core.exceptions import ValidationError
18
+ from .core.schema import (
19
+ dict_related_model_to_related_name,
20
+ get_related_name,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+
26
+ import pandas as pd
27
+
28
+ from ._query_set import QuerySet
29
+
30
+ NUMBER_TYPE = "number"
31
+ DICT_KEYS_TYPE = type({}.keys()) # type: ignore
32
+
33
+
34
+ def validate_features(features: list[Record]) -> Record:
35
+ """Validate and return feature type."""
36
+ try:
37
+ if len(features) == 0:
38
+ raise ValueError("Provide list of features with at least one element")
39
+ except TypeError:
40
+ raise ValueError(
41
+ "Please pass a ListLike of features, not a single feature"
42
+ ) from None
43
+ if not hasattr(features, "__getitem__"):
44
+ raise TypeError("features has to be list-like")
45
+ if not isinstance(features[0], Record):
46
+ raise TypeError(
47
+ "features has to store feature records! use .from_values() otherwise"
48
+ )
49
+ feature_types = {feature.__class__ for feature in features}
50
+ if len(feature_types) > 1:
51
+ raise TypeError("feature_set can only contain a single type")
52
+ for feature in features:
53
+ if feature._state.adding:
54
+ raise ValueError("Can only construct feature sets from validated features")
55
+ return next(iter(feature_types)) # return value in set of cardinality 1
56
+
57
+
58
+ def __init__(self, *args, **kwargs):
59
+ if len(args) == len(self._meta.concrete_fields):
60
+ super(FeatureSet, self).__init__(*args, **kwargs)
61
+ return None
62
+ # now we proceed with the user-facing constructor
63
+ if len(args) > 1:
64
+ raise ValueError("Only one non-keyword arg allowed: features")
65
+ features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
66
+ dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
67
+ name: str | None = kwargs.pop("name") if "name" in kwargs else None
68
+ if len(kwargs) > 0:
69
+ raise ValueError("Only features, dtype, name are valid keyword arguments")
70
+ # now code
71
+ features_registry = validate_features(features)
72
+ if dtype is None:
73
+ dtype = None if features_registry == Feature else NUMBER_TYPE
74
+ n_features = len(features)
75
+ features_hash = hash_set({feature.uid for feature in features})
76
+ feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
77
+ if feature_set is not None:
78
+ logger.debug(f"loaded: {feature_set}")
79
+ init_self_from_db(self, feature_set)
80
+ return None
81
+ else:
82
+ hash = features_hash
83
+ self._features = (get_related_name(features_registry), features)
84
+
85
+ super(FeatureSet, self).__init__(
86
+ uid=ids.base62_20(),
87
+ name=name,
88
+ dtype=get_type_str(dtype),
89
+ n=n_features,
90
+ registry=features_registry.__get_name_with_schema__(),
91
+ hash=hash,
92
+ )
93
+
94
+
95
+ @doc_args(FeatureSet.save.__doc__)
96
+ def save(self, *args, **kwargs) -> FeatureSet:
97
+ """{}""" # noqa: D415
98
+ super(FeatureSet, self).save(*args, **kwargs)
99
+ if hasattr(self, "_features"):
100
+ related_name, records = self._features
101
+ getattr(self, related_name).set(records)
102
+ return self
103
+
104
+
105
+ def get_type_str(dtype: str | None) -> str | None:
106
+ if dtype is not None:
107
+ type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
108
+ else:
109
+ type_str = None
110
+ return type_str
111
+
112
+
113
+ @classmethod # type:ignore
114
+ @doc_args(FeatureSet.from_values.__doc__)
115
+ def from_values(
116
+ cls,
117
+ values: ListLike,
118
+ field: FieldAttr = Feature.name,
119
+ type: str | None = None,
120
+ name: str | None = None,
121
+ mute: bool = False,
122
+ organism: Record | str | None = None,
123
+ source: Record | None = None,
124
+ raise_validation_error: bool = True,
125
+ ) -> FeatureSet:
126
+ """{}""" # noqa: D415
127
+ if not isinstance(field, FieldAttr):
128
+ raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
129
+ if len(values) == 0:
130
+ raise ValueError("Provide a list of at least one value")
131
+ if isinstance(values, DICT_KEYS_TYPE):
132
+ values = list(values)
133
+ registry = field.field.model
134
+ if registry != Feature and type is None:
135
+ type = NUMBER_TYPE
136
+ logger.debug("setting feature set to 'number'")
137
+ validated = registry.validate(values, field=field, mute=mute, organism=organism)
138
+ values_array = np.array(values)
139
+ validated_values = values_array[validated]
140
+ if validated.sum() != len(values):
141
+ not_validated_values = values_array[~validated]
142
+ msg = (
143
+ f"These values could not be validated: {not_validated_values.tolist()}\n"
144
+ f"If there are no typos, add them to their registry: {registry.__name__}"
145
+ )
146
+ if raise_validation_error:
147
+ raise ValidationError(msg)
148
+ elif len(validated_values) == 0:
149
+ return None # temporarily return None here
150
+ validated_features = registry.from_values(
151
+ validated_values,
152
+ field=field,
153
+ organism=organism,
154
+ source=source,
155
+ )
156
+ feature_set = FeatureSet(
157
+ features=validated_features,
158
+ name=name,
159
+ dtype=get_type_str(type),
160
+ )
161
+ return feature_set
162
+
163
+
164
+ @classmethod # type:ignore
165
+ @doc_args(FeatureSet.from_df.__doc__)
166
+ def from_df(
167
+ cls,
168
+ df: pd.DataFrame,
169
+ field: FieldAttr = Feature.name,
170
+ name: str | None = None,
171
+ mute: bool = False,
172
+ organism: Record | str | None = None,
173
+ source: Record | None = None,
174
+ ) -> FeatureSet | None:
175
+ """{}""" # noqa: D415
176
+ registry = field.field.model
177
+ validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
178
+ if validated.sum() == 0:
179
+ if mute is True:
180
+ logger.warning("no validated features, skip creating feature set")
181
+ return None
182
+ if registry == Feature:
183
+ validated_features = Feature.from_df(df.loc[:, validated])
184
+ feature_set = FeatureSet(validated_features, name=name, dtype=None)
185
+ else:
186
+ dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
187
+ if len(set(dtypes)) != 1:
188
+ raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
189
+ dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
190
+ validated_features = registry.from_values(
191
+ df.columns[validated],
192
+ field=field,
193
+ organism=organism,
194
+ source=source,
195
+ )
196
+ feature_set = FeatureSet(
197
+ features=validated_features,
198
+ name=name,
199
+ dtype=get_type_str(dtype),
200
+ )
201
+ return feature_set
202
+
203
+
204
+ @property # type: ignore
205
+ @doc_args(FeatureSet.members.__doc__)
206
+ def members(self) -> QuerySet:
207
+ """{}""" # noqa: D415
208
+ if self._state.adding:
209
+ # this should return a queryset and not a list...
210
+ # need to fix this
211
+ return self._features[1]
212
+ related_name = self._get_related_name()
213
+ if related_name is None:
214
+ related_name = "features"
215
+ return self.__getattribute__(related_name).all()
216
+
217
+
218
+ def _get_related_name(self: FeatureSet) -> str:
219
+ feature_sets_related_models = dict_related_model_to_related_name(self)
220
+ related_name = feature_sets_related_models.get(self.registry)
221
+ return related_name
222
+
223
+
224
+ METHOD_NAMES = [
225
+ "__init__",
226
+ "from_values",
227
+ "from_df",
228
+ "save",
229
+ ]
230
+
231
+ if ln_setup._TESTING:
232
+ from inspect import signature
233
+
234
+ SIGS = {
235
+ name: signature(getattr(FeatureSet, name))
236
+ for name in METHOD_NAMES
237
+ if name != "__init__"
238
+ }
239
+
240
+ for name in METHOD_NAMES:
241
+ attach_func_to_class_method(name, FeatureSet, globals())
242
+
243
+ FeatureSet.members = members
244
+ FeatureSet._get_related_name = _get_related_name
lamindb/_filter.py CHANGED
@@ -1,23 +1,23 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from lnschema_core import Artifact, Collection
6
-
7
- from ._query_set import QuerySet, process_expressions
8
-
9
- if TYPE_CHECKING:
10
- from lnschema_core import Record
11
-
12
-
13
- def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
- """See :meth:`~lamindb.core.Record.filter`."""
15
- _using_key = None
16
- if "_using_key" in expressions:
17
- _using_key = expressions.pop("_using_key")
18
- expressions = process_expressions(registry, expressions)
19
- qs = QuerySet(model=registry, using=_using_key)
20
- if len(expressions) > 0:
21
- return qs.filter(*queries, **expressions)
22
- else:
23
- return qs
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from lnschema_core import Artifact, Collection
6
+
7
+ from ._query_set import QuerySet, process_expressions
8
+
9
+ if TYPE_CHECKING:
10
+ from lnschema_core import Record
11
+
12
+
13
+ def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
+ """See :meth:`~lamindb.core.Record.filter`."""
15
+ _using_key = None
16
+ if "_using_key" in expressions:
17
+ _using_key = expressions.pop("_using_key")
18
+ expressions = process_expressions(registry, expressions)
19
+ qs = QuerySet(model=registry, using=_using_key)
20
+ if len(expressions) > 0:
21
+ return qs.filter(*queries, **expressions)
22
+ else:
23
+ return qs