lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1178
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -387
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +574 -574
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -867
  27. lamindb/core/_label_manager.py +253 -253
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -164
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -204
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -141
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.7.dist-info/RECORD +0 -60
lamindb/_feature_set.py CHANGED
@@ -1,242 +1,242 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Iterable, Type
4
-
5
- import lamindb_setup as ln_setup
6
- import numpy as np
7
- from lamin_utils import logger
8
- from lamindb_setup.core._docs import doc_args
9
- from lamindb_setup.core.hashing import hash_set
10
- from lnschema_core import Feature, FeatureSet, Record, ids
11
- from lnschema_core.types import FieldAttr, ListLike
12
-
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
- from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
- from ._record import init_self_from_db
17
- from .core.exceptions import ValidationError
18
- from .core.schema import (
19
- dict_related_model_to_related_name,
20
- get_related_name,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- import pandas as pd
25
-
26
- from ._query_set import QuerySet
27
-
28
- NUMBER_TYPE = "number"
29
- DICT_KEYS_TYPE = type({}.keys()) # type: ignore
30
-
31
-
32
- def validate_features(features: list[Record]) -> Record:
33
- """Validate and return feature type."""
34
- try:
35
- if len(features) == 0:
36
- raise ValueError("Provide list of features with at least one element")
37
- except TypeError:
38
- raise ValueError(
39
- "Please pass a ListLike of features, not a single feature"
40
- ) from None
41
- if not hasattr(features, "__getitem__"):
42
- raise TypeError("features has to be list-like")
43
- if not isinstance(features[0], Record):
44
- raise TypeError(
45
- "features has to store feature records! use .from_values() otherwise"
46
- )
47
- feature_types = {feature.__class__ for feature in features}
48
- if len(feature_types) > 1:
49
- raise TypeError("feature_set can only contain a single type")
50
- for feature in features:
51
- if feature._state.adding:
52
- raise ValueError("Can only construct feature sets from validated features")
53
- return next(iter(feature_types)) # return value in set of cardinality 1
54
-
55
-
56
- def __init__(self, *args, **kwargs):
57
- if len(args) == len(self._meta.concrete_fields):
58
- super(FeatureSet, self).__init__(*args, **kwargs)
59
- return None
60
- # now we proceed with the user-facing constructor
61
- if len(args) > 1:
62
- raise ValueError("Only one non-keyword arg allowed: features")
63
- features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
64
- dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
65
- name: str | None = kwargs.pop("name") if "name" in kwargs else None
66
- if len(kwargs) > 0:
67
- raise ValueError("Only features, dtype, name are valid keyword arguments")
68
- # now code
69
- features_registry = validate_features(features)
70
- if dtype is None:
71
- dtype = None if features_registry == Feature else NUMBER_TYPE
72
- n_features = len(features)
73
- features_hash = hash_set({feature.uid for feature in features})
74
- feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
75
- if feature_set is not None:
76
- logger.debug(f"loaded: {feature_set}")
77
- init_self_from_db(self, feature_set)
78
- return None
79
- else:
80
- hash = features_hash
81
- self._features = (get_related_name(features_registry), features)
82
-
83
- super(FeatureSet, self).__init__(
84
- uid=ids.base62_20(),
85
- name=name,
86
- dtype=get_type_str(dtype),
87
- n=n_features,
88
- registry=features_registry.__get_name_with_schema__(),
89
- hash=hash,
90
- )
91
-
92
-
93
- @doc_args(FeatureSet.save.__doc__)
94
- def save(self, *args, **kwargs) -> FeatureSet:
95
- """{}""" # noqa: D415
96
- super(FeatureSet, self).save(*args, **kwargs)
97
- if hasattr(self, "_features"):
98
- related_name, records = self._features
99
- getattr(self, related_name).set(records)
100
- return self
101
-
102
-
103
- def get_type_str(dtype: str | None) -> str | None:
104
- if dtype is not None:
105
- type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
106
- else:
107
- type_str = None
108
- return type_str
109
-
110
-
111
- @classmethod # type:ignore
112
- @doc_args(FeatureSet.from_values.__doc__)
113
- def from_values(
114
- cls,
115
- values: ListLike,
116
- field: FieldAttr = Feature.name,
117
- type: str | None = None,
118
- name: str | None = None,
119
- mute: bool = False,
120
- organism: Record | str | None = None,
121
- source: Record | None = None,
122
- raise_validation_error: bool = True,
123
- ) -> FeatureSet:
124
- """{}""" # noqa: D415
125
- if not isinstance(field, FieldAttr):
126
- raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
127
- if len(values) == 0:
128
- raise ValueError("Provide a list of at least one value")
129
- if isinstance(values, DICT_KEYS_TYPE):
130
- values = list(values)
131
- registry = field.field.model
132
- if registry != Feature and type is None:
133
- type = NUMBER_TYPE
134
- logger.debug("setting feature set to 'number'")
135
- validated = registry.validate(values, field=field, mute=mute, organism=organism)
136
- values_array = np.array(values)
137
- validated_values = values_array[validated]
138
- if validated.sum() != len(values):
139
- not_validated_values = values_array[~validated]
140
- msg = (
141
- f"These values could not be validated: {not_validated_values.tolist()}\n"
142
- f"If there are no typos, add them to their registry: {registry.__name__}"
143
- )
144
- if raise_validation_error:
145
- raise ValidationError(msg)
146
- elif len(validated_values) == 0:
147
- return None # temporarily return None here
148
- validated_features = registry.from_values(
149
- validated_values,
150
- field=field,
151
- organism=organism,
152
- source=source,
153
- )
154
- feature_set = FeatureSet(
155
- features=validated_features,
156
- name=name,
157
- dtype=get_type_str(type),
158
- )
159
- return feature_set
160
-
161
-
162
- @classmethod # type:ignore
163
- @doc_args(FeatureSet.from_df.__doc__)
164
- def from_df(
165
- cls,
166
- df: pd.DataFrame,
167
- field: FieldAttr = Feature.name,
168
- name: str | None = None,
169
- mute: bool = False,
170
- organism: Record | str | None = None,
171
- source: Record | None = None,
172
- ) -> FeatureSet | None:
173
- """{}""" # noqa: D415
174
- registry = field.field.model
175
- validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
176
- if validated.sum() == 0:
177
- if mute is True:
178
- logger.warning("no validated features, skip creating feature set")
179
- return None
180
- if registry == Feature:
181
- validated_features = Feature.from_df(df.loc[:, validated])
182
- feature_set = FeatureSet(validated_features, name=name, dtype=None)
183
- else:
184
- dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
185
- if len(set(dtypes)) != 1:
186
- raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
187
- dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
188
- validated_features = registry.from_values(
189
- df.columns[validated],
190
- field=field,
191
- organism=organism,
192
- source=source,
193
- )
194
- feature_set = FeatureSet(
195
- features=validated_features,
196
- name=name,
197
- dtype=get_type_str(dtype),
198
- )
199
- return feature_set
200
-
201
-
202
- @property # type: ignore
203
- @doc_args(FeatureSet.members.__doc__)
204
- def members(self) -> QuerySet:
205
- """{}""" # noqa: D415
206
- if self._state.adding:
207
- # this should return a queryset and not a list...
208
- # need to fix this
209
- return self._features[1]
210
- related_name = self._get_related_name()
211
- if related_name is None:
212
- related_name = "features"
213
- return self.__getattribute__(related_name).all()
214
-
215
-
216
- def _get_related_name(self: FeatureSet) -> str:
217
- feature_sets_related_models = dict_related_model_to_related_name(self)
218
- related_name = feature_sets_related_models.get(self.registry)
219
- return related_name
220
-
221
-
222
- METHOD_NAMES = [
223
- "__init__",
224
- "from_values",
225
- "from_df",
226
- "save",
227
- ]
228
-
229
- if ln_setup._TESTING:
230
- from inspect import signature
231
-
232
- SIGS = {
233
- name: signature(getattr(FeatureSet, name))
234
- for name in METHOD_NAMES
235
- if name != "__init__"
236
- }
237
-
238
- for name in METHOD_NAMES:
239
- attach_func_to_class_method(name, FeatureSet, globals())
240
-
241
- FeatureSet.members = members
242
- FeatureSet._get_related_name = _get_related_name
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Iterable, Type
4
+
5
+ import lamindb_setup as ln_setup
6
+ import numpy as np
7
+ from lamin_utils import logger
8
+ from lamindb_setup.core._docs import doc_args
9
+ from lamindb_setup.core.hashing import hash_set
10
+ from lnschema_core import Feature, FeatureSet, Record, ids
11
+ from lnschema_core.types import FieldAttr, ListLike
12
+
13
+ from lamindb._utils import attach_func_to_class_method
14
+
15
+ from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
+ from ._record import init_self_from_db
17
+ from .core.exceptions import ValidationError
18
+ from .core.schema import (
19
+ dict_related_model_to_related_name,
20
+ get_related_name,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ import pandas as pd
25
+
26
+ from ._query_set import QuerySet
27
+
28
+ NUMBER_TYPE = "number"
29
+ DICT_KEYS_TYPE = type({}.keys()) # type: ignore
30
+
31
+
32
+ def validate_features(features: list[Record]) -> Record:
33
+ """Validate and return feature type."""
34
+ try:
35
+ if len(features) == 0:
36
+ raise ValueError("Provide list of features with at least one element")
37
+ except TypeError:
38
+ raise ValueError(
39
+ "Please pass a ListLike of features, not a single feature"
40
+ ) from None
41
+ if not hasattr(features, "__getitem__"):
42
+ raise TypeError("features has to be list-like")
43
+ if not isinstance(features[0], Record):
44
+ raise TypeError(
45
+ "features has to store feature records! use .from_values() otherwise"
46
+ )
47
+ feature_types = {feature.__class__ for feature in features}
48
+ if len(feature_types) > 1:
49
+ raise TypeError("feature_set can only contain a single type")
50
+ for feature in features:
51
+ if feature._state.adding:
52
+ raise ValueError("Can only construct feature sets from validated features")
53
+ return next(iter(feature_types)) # return value in set of cardinality 1
54
+
55
+
56
+ def __init__(self, *args, **kwargs):
57
+ if len(args) == len(self._meta.concrete_fields):
58
+ super(FeatureSet, self).__init__(*args, **kwargs)
59
+ return None
60
+ # now we proceed with the user-facing constructor
61
+ if len(args) > 1:
62
+ raise ValueError("Only one non-keyword arg allowed: features")
63
+ features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
64
+ dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
65
+ name: str | None = kwargs.pop("name") if "name" in kwargs else None
66
+ if len(kwargs) > 0:
67
+ raise ValueError("Only features, dtype, name are valid keyword arguments")
68
+ # now code
69
+ features_registry = validate_features(features)
70
+ if dtype is None:
71
+ dtype = None if features_registry == Feature else NUMBER_TYPE
72
+ n_features = len(features)
73
+ features_hash = hash_set({feature.uid for feature in features})
74
+ feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
75
+ if feature_set is not None:
76
+ logger.debug(f"loaded: {feature_set}")
77
+ init_self_from_db(self, feature_set)
78
+ return None
79
+ else:
80
+ hash = features_hash
81
+ self._features = (get_related_name(features_registry), features)
82
+
83
+ super(FeatureSet, self).__init__(
84
+ uid=ids.base62_20(),
85
+ name=name,
86
+ dtype=get_type_str(dtype),
87
+ n=n_features,
88
+ registry=features_registry.__get_name_with_schema__(),
89
+ hash=hash,
90
+ )
91
+
92
+
93
+ @doc_args(FeatureSet.save.__doc__)
94
+ def save(self, *args, **kwargs) -> FeatureSet:
95
+ """{}""" # noqa: D415
96
+ super(FeatureSet, self).save(*args, **kwargs)
97
+ if hasattr(self, "_features"):
98
+ related_name, records = self._features
99
+ getattr(self, related_name).set(records)
100
+ return self
101
+
102
+
103
+ def get_type_str(dtype: str | None) -> str | None:
104
+ if dtype is not None:
105
+ type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
106
+ else:
107
+ type_str = None
108
+ return type_str
109
+
110
+
111
+ @classmethod # type:ignore
112
+ @doc_args(FeatureSet.from_values.__doc__)
113
+ def from_values(
114
+ cls,
115
+ values: ListLike,
116
+ field: FieldAttr = Feature.name,
117
+ type: str | None = None,
118
+ name: str | None = None,
119
+ mute: bool = False,
120
+ organism: Record | str | None = None,
121
+ source: Record | None = None,
122
+ raise_validation_error: bool = True,
123
+ ) -> FeatureSet:
124
+ """{}""" # noqa: D415
125
+ if not isinstance(field, FieldAttr):
126
+ raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
127
+ if len(values) == 0:
128
+ raise ValueError("Provide a list of at least one value")
129
+ if isinstance(values, DICT_KEYS_TYPE):
130
+ values = list(values)
131
+ registry = field.field.model
132
+ if registry != Feature and type is None:
133
+ type = NUMBER_TYPE
134
+ logger.debug("setting feature set to 'number'")
135
+ validated = registry.validate(values, field=field, mute=mute, organism=organism)
136
+ values_array = np.array(values)
137
+ validated_values = values_array[validated]
138
+ if validated.sum() != len(values):
139
+ not_validated_values = values_array[~validated]
140
+ msg = (
141
+ f"These values could not be validated: {not_validated_values.tolist()}\n"
142
+ f"If there are no typos, add them to their registry: {registry.__name__}"
143
+ )
144
+ if raise_validation_error:
145
+ raise ValidationError(msg)
146
+ elif len(validated_values) == 0:
147
+ return None # temporarily return None here
148
+ validated_features = registry.from_values(
149
+ validated_values,
150
+ field=field,
151
+ organism=organism,
152
+ source=source,
153
+ )
154
+ feature_set = FeatureSet(
155
+ features=validated_features,
156
+ name=name,
157
+ dtype=get_type_str(type),
158
+ )
159
+ return feature_set
160
+
161
+
162
+ @classmethod # type:ignore
163
+ @doc_args(FeatureSet.from_df.__doc__)
164
+ def from_df(
165
+ cls,
166
+ df: pd.DataFrame,
167
+ field: FieldAttr = Feature.name,
168
+ name: str | None = None,
169
+ mute: bool = False,
170
+ organism: Record | str | None = None,
171
+ source: Record | None = None,
172
+ ) -> FeatureSet | None:
173
+ """{}""" # noqa: D415
174
+ registry = field.field.model
175
+ validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
176
+ if validated.sum() == 0:
177
+ if mute is True:
178
+ logger.warning("no validated features, skip creating feature set")
179
+ return None
180
+ if registry == Feature:
181
+ validated_features = Feature.from_df(df.loc[:, validated])
182
+ feature_set = FeatureSet(validated_features, name=name, dtype=None)
183
+ else:
184
+ dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
185
+ if len(set(dtypes)) != 1:
186
+ raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
187
+ dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
188
+ validated_features = registry.from_values(
189
+ df.columns[validated],
190
+ field=field,
191
+ organism=organism,
192
+ source=source,
193
+ )
194
+ feature_set = FeatureSet(
195
+ features=validated_features,
196
+ name=name,
197
+ dtype=get_type_str(dtype),
198
+ )
199
+ return feature_set
200
+
201
+
202
+ @property # type: ignore
203
+ @doc_args(FeatureSet.members.__doc__)
204
+ def members(self) -> QuerySet:
205
+ """{}""" # noqa: D415
206
+ if self._state.adding:
207
+ # this should return a queryset and not a list...
208
+ # need to fix this
209
+ return self._features[1]
210
+ related_name = self._get_related_name()
211
+ if related_name is None:
212
+ related_name = "features"
213
+ return self.__getattribute__(related_name).all()
214
+
215
+
216
+ def _get_related_name(self: FeatureSet) -> str:
217
+ feature_sets_related_models = dict_related_model_to_related_name(self)
218
+ related_name = feature_sets_related_models.get(self.registry)
219
+ return related_name
220
+
221
+
222
+ METHOD_NAMES = [
223
+ "__init__",
224
+ "from_values",
225
+ "from_df",
226
+ "save",
227
+ ]
228
+
229
+ if ln_setup._TESTING:
230
+ from inspect import signature
231
+
232
+ SIGS = {
233
+ name: signature(getattr(FeatureSet, name))
234
+ for name in METHOD_NAMES
235
+ if name != "__init__"
236
+ }
237
+
238
+ for name in METHOD_NAMES:
239
+ attach_func_to_class_method(name, FeatureSet, globals())
240
+
241
+ FeatureSet.members = members
242
+ FeatureSet._get_related_name = _get_related_name
lamindb/_filter.py CHANGED
@@ -1,23 +1,23 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from lnschema_core import Artifact, Collection
6
-
7
- from ._query_set import QuerySet, process_expressions
8
-
9
- if TYPE_CHECKING:
10
- from lnschema_core import Record
11
-
12
-
13
- def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
- """See :meth:`~lamindb.core.Record.filter`."""
15
- _using_key = None
16
- if "_using_key" in expressions:
17
- _using_key = expressions.pop("_using_key")
18
- expressions = process_expressions(registry, expressions)
19
- qs = QuerySet(model=registry, using=_using_key)
20
- if len(expressions) > 0:
21
- return qs.filter(*queries, **expressions)
22
- else:
23
- return qs
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from lnschema_core import Artifact, Collection
6
+
7
+ from ._query_set import QuerySet, process_expressions
8
+
9
+ if TYPE_CHECKING:
10
+ from lnschema_core import Record
11
+
12
+
13
+ def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
+ """See :meth:`~lamindb.core.Record.filter`."""
15
+ _using_key = None
16
+ if "_using_key" in expressions:
17
+ _using_key = expressions.pop("_using_key")
18
+ expressions = process_expressions(registry, expressions)
19
+ qs = QuerySet(model=registry, using=_using_key)
20
+ if len(expressions) > 0:
21
+ return qs.filter(*queries, **expressions)
22
+ else:
23
+ return qs