lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
@@ -1,253 +1,289 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from typing import TYPE_CHECKING, Dict
5
-
6
- import numpy as np
7
- from lamin_utils import colors
8
- from lnschema_core.models import Feature
9
-
10
- from lamindb._from_values import _print_values
11
- from lamindb._record import (
12
- REGISTRY_UNIQUE_FIELD,
13
- get_name_field,
14
- transfer_fk_to_default_db_bulk,
15
- transfer_to_default_db,
16
- )
17
- from lamindb._save import save
18
-
19
- from ._settings import settings
20
- from .schema import dict_related_model_to_related_name
21
-
22
- if TYPE_CHECKING:
23
- from lnschema_core.models import Artifact, Collection, Record
24
-
25
- from lamindb._query_set import QuerySet
26
-
27
-
28
- def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
29
- exclude_set = {
30
- "feature_sets",
31
- "artifacts",
32
- "input_of_runs",
33
- "collections",
34
- "_source_code_artifact_of",
35
- "_report_of",
36
- "_environment_of",
37
- "links_collection",
38
- "links_artifact",
39
- "links_feature_set",
40
- "previous_runs",
41
- "_feature_values",
42
- "_action_targets",
43
- "_lnschema_core_collection__actions_+", # something seems off with this one
44
- "_actions",
45
- }
46
- labels = {} # type: ignore
47
- if self.id is None:
48
- return labels
49
- for related_model_name, related_name in dict_related_model_to_related_name(
50
- self.__class__, links=links
51
- ).items():
52
- if related_name not in exclude_set:
53
- labels[related_name] = (
54
- related_model_name,
55
- getattr(self, related_name).all(),
56
- )
57
- return labels
58
-
59
-
60
- def print_labels(
61
- self: Artifact | Collection, field: str = "name", print_types: bool = False
62
- ):
63
- labels_msg = ""
64
- for related_name, (related_model, labels) in get_labels_as_dict(self).items():
65
- # there is a try except block here to deal with schema inconsistencies
66
- # during transfer between instances
67
- try:
68
- labels_list = list(labels.values_list(field, flat=True))
69
- if len(labels_list) > 0:
70
- get_name_field(labels)
71
- print_values = _print_values(labels_list, n=10)
72
- type_str = f": {related_model}" if print_types else ""
73
- labels_msg += f" .{related_name}{type_str} = {print_values}\n"
74
- except Exception: # noqa: S112
75
- continue
76
- msg = ""
77
- if labels_msg:
78
- msg += f" {colors.italic('Labels')}\n"
79
- msg += labels_msg
80
- return msg
81
-
82
-
83
- # Alex: is this a label transfer function?
84
- def validate_labels(labels: QuerySet | list | dict):
85
- def validate_labels_registry(
86
- labels: QuerySet | list | dict,
87
- ) -> tuple[list[str], list[str]]:
88
- if len(labels) == 0:
89
- return [], []
90
- registry = labels[0].__class__
91
- field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
92
- if hasattr(registry, "_ontology_id_field"):
93
- field = registry._ontology_id_field
94
- # if the field value is None, use uid field
95
- label_uids = np.array(
96
- [getattr(label, field) for label in labels if label is not None]
97
- )
98
- # save labels from ontology_ids
99
- if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
100
- try:
101
- labels_records = registry.from_values(label_uids, field=field)
102
- save([r for r in labels_records if r._state.adding])
103
- except Exception: # noqa S110
104
- pass
105
- field = "uid"
106
- label_uids = np.array(
107
- [getattr(label, field) for label in labels if label is not None]
108
- )
109
- validated = registry.validate(label_uids, field=field, mute=True)
110
- validated_uids = label_uids[validated]
111
- validated_labels = registry.filter(**{f"{field}__in": validated_uids}).list()
112
- new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
113
- return validated_labels, new_labels
114
-
115
- if isinstance(labels, Dict):
116
- result = {}
117
- for registry, labels_registry in labels.items():
118
- result[registry] = validate_labels_registry(labels_registry)
119
- else:
120
- return validate_labels_registry(labels)
121
-
122
-
123
- class LabelManager:
124
- """Label manager.
125
-
126
- This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
127
- typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
128
- with features.
129
- """
130
-
131
- def __init__(self, host: Artifact | Collection):
132
- self._host = host
133
-
134
- def __repr__(self) -> str:
135
- msg = print_labels(self._host)
136
- if len(msg) > 0:
137
- return msg
138
- else:
139
- return "no linked labels"
140
-
141
- def add(
142
- self,
143
- records: Record | list[Record] | QuerySet,
144
- feature: Feature | None = None,
145
- ) -> None:
146
- """Add one or several labels and associate them with a feature.
147
-
148
- Args:
149
- records: Label records to add.
150
- feature: Feature under which to group the labels.
151
- """
152
- from ._data import add_labels
153
-
154
- return add_labels(self._host, records=records, feature=feature)
155
-
156
- def get(
157
- self,
158
- feature: Feature,
159
- mute: bool = False,
160
- flat_names: bool = False,
161
- ) -> QuerySet | dict[str, QuerySet] | list:
162
- """Get labels given a feature.
163
-
164
- Args:
165
- feature: Feature under which labels are grouped.
166
- mute: Show no logging.
167
- flat_names: Flatten list to names rather than returning records.
168
- """
169
- from ._data import get_labels
170
-
171
- return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
172
-
173
- def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
174
- """Add labels from an artifact or collection to another artifact or collection.
175
-
176
- Examples:
177
- >>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
178
- >>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
179
- >>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
180
- >>> ln.save(ulabels)
181
- >>> labels = ln.ULabel.filter(name__icontains = "label").all()
182
- >>> artifact1.ulabels.set(labels)
183
- >>> artifact2.labels.add_from(artifact1)
184
- """
185
- from django.db.utils import ProgrammingError
186
-
187
- if transfer_logs is None:
188
- transfer_logs = {"mapped": [], "transferred": []}
189
- using_key = settings._using_key
190
- for related_name, (_, labels) in get_labels_as_dict(data).items():
191
- labels = labels.all()
192
- try:
193
- if not labels.exists():
194
- continue
195
- # look for features
196
- data_name_lower = data.__class__.__name__.lower()
197
- labels_by_features = defaultdict(list)
198
- features = set()
199
- _, new_labels = validate_labels(labels)
200
- if len(new_labels) > 0:
201
- transfer_fk_to_default_db_bulk(
202
- new_labels, using_key, transfer_logs=transfer_logs
203
- )
204
- for label in labels:
205
- # if the link table doesn't follow this convention, we'll ignore it
206
- if not hasattr(label, f"links_{data_name_lower}"):
207
- key = None
208
- else:
209
- link = getattr(label, f"links_{data_name_lower}").get(
210
- **{f"{data_name_lower}_id": data.id}
211
- )
212
- if link.feature is not None:
213
- features.add(link.feature)
214
- key = link.feature.name
215
- else:
216
- key = None
217
- label_returned = transfer_to_default_db(
218
- label,
219
- using_key,
220
- transfer_logs=transfer_logs,
221
- transfer_fk=False,
222
- save=True,
223
- )
224
- # TODO: refactor return value of transfer to default db
225
- if label_returned is not None:
226
- label = label_returned
227
- labels_by_features[key].append(label)
228
- # treat features
229
- _, new_features = validate_labels(list(features))
230
- if len(new_features) > 0:
231
- transfer_fk_to_default_db_bulk(
232
- new_features, using_key, transfer_logs=transfer_logs
233
- )
234
- for feature in new_features:
235
- transfer_to_default_db(
236
- feature,
237
- using_key,
238
- transfer_logs=transfer_logs,
239
- transfer_fk=False,
240
- )
241
- save(new_features)
242
- if hasattr(self._host, related_name):
243
- for feature_name, labels in labels_by_features.items():
244
- if feature_name is not None:
245
- feature_id = Feature.get(name=feature_name).id
246
- else:
247
- feature_id = None
248
- getattr(self._host, related_name).add(
249
- *labels, through_defaults={"feature_id": feature_id}
250
- )
251
- # ProgrammingError is raised when schemas don't match between source and target instances
252
- except ProgrammingError:
253
- continue
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import TYPE_CHECKING
5
+
6
+ import numpy as np
7
+ from django.db import connections
8
+ from lamin_utils import colors, logger
9
+ from lnschema_core.models import CanValidate, Feature
10
+
11
+ from lamindb._from_values import _print_values
12
+ from lamindb._record import (
13
+ REGISTRY_UNIQUE_FIELD,
14
+ get_name_field,
15
+ transfer_fk_to_default_db_bulk,
16
+ transfer_to_default_db,
17
+ )
18
+ from lamindb._save import save
19
+
20
+ from ._django import get_artifact_with_related, get_related_model
21
+ from ._settings import settings
22
+ from .schema import dict_related_model_to_related_name
23
+
24
+ if TYPE_CHECKING:
25
+ from lnschema_core.models import Artifact, Collection, Record
26
+
27
+ from lamindb._query_set import QuerySet
28
+
29
+
30
+ def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
31
+ exclude_set = {
32
+ "feature_sets",
33
+ "artifacts",
34
+ "input_of_runs",
35
+ "collections",
36
+ "_source_code_artifact_of",
37
+ "_report_of",
38
+ "_environment_of",
39
+ "links_collection",
40
+ "links_artifact",
41
+ "links_feature_set",
42
+ "previous_runs",
43
+ "_feature_values",
44
+ "_action_targets",
45
+ "_lnschema_core_collection__actions_+", # something seems off with this one
46
+ "_actions",
47
+ }
48
+ labels = {} # type: ignore
49
+ if self.id is None:
50
+ return labels
51
+ for related_model_name, related_name in dict_related_model_to_related_name(
52
+ self.__class__, links=links
53
+ ).items():
54
+ if related_name not in exclude_set:
55
+ labels[related_name] = (
56
+ related_model_name,
57
+ getattr(self, related_name).all(),
58
+ )
59
+ return labels
60
+
61
+
62
+ def _print_labels_postgres(
63
+ self: Artifact | Collection, m2m_data: dict | None = None, print_types: bool = False
64
+ ) -> str:
65
+ labels_msg = ""
66
+ if not m2m_data:
67
+ artifact_meta = get_artifact_with_related(self, include_m2m=True)
68
+ m2m_data = artifact_meta.get("related_data", {}).get("m2m", {})
69
+ if m2m_data:
70
+ for related_name, labels in m2m_data.items():
71
+ if not labels or related_name == "feature_sets":
72
+ continue
73
+ related_model = get_related_model(self, related_name)
74
+ print_values = _print_values(labels.values(), n=10)
75
+ type_str = f": {related_model}" if print_types else ""
76
+ labels_msg += f" .{related_name}{type_str} = {print_values}\n"
77
+ return labels_msg
78
+
79
+
80
+ def print_labels(
81
+ self: Artifact | Collection,
82
+ m2m_data: dict | None = None,
83
+ print_types: bool = False,
84
+ ):
85
+ if not self._state.adding and connections[self._state.db].vendor == "postgresql":
86
+ labels_msg = _print_labels_postgres(self, m2m_data, print_types)
87
+ else:
88
+ labels_msg = ""
89
+ for related_name, (related_model, labels) in get_labels_as_dict(self).items():
90
+ # there is a try except block here to deal with schema inconsistencies
91
+ # during transfer between instances
92
+ try:
93
+ field = get_name_field(self.__class__)
94
+ labels_list = list(labels.values_list(field, flat=True))
95
+ if len(labels_list) > 0:
96
+ get_name_field(labels)
97
+ print_values = _print_values(labels_list, n=10)
98
+ type_str = f": {related_model}" if print_types else ""
99
+ labels_msg += f" .{related_name}{type_str} = {print_values}\n"
100
+ except Exception: # noqa: S112
101
+ continue
102
+
103
+ msg = ""
104
+ if labels_msg:
105
+ msg += f" {colors.italic('Labels')}\n"
106
+ msg += labels_msg
107
+ return msg
108
+
109
+
110
+ # Alex: is this a label transfer function?
111
+ def validate_labels(labels: QuerySet | list | dict):
112
+ def validate_labels_registry(
113
+ labels: QuerySet | list | dict,
114
+ ) -> tuple[list[str], list[str]]:
115
+ if len(labels) == 0:
116
+ return [], []
117
+ registry = labels[0].__class__
118
+ field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
119
+ if hasattr(registry, "_ontology_id_field"):
120
+ field = registry._ontology_id_field
121
+ # if the field value is None, use uid field
122
+ label_uids = np.array(
123
+ [getattr(label, field) for label in labels if label is not None]
124
+ )
125
+ # save labels from ontology_ids
126
+ if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
127
+ try:
128
+ labels_records = registry.from_values(label_uids, field=field)
129
+ save([r for r in labels_records if r._state.adding])
130
+ except Exception: # noqa S110
131
+ pass
132
+ field = "uid"
133
+ label_uids = np.array(
134
+ [getattr(label, field) for label in labels if label is not None]
135
+ )
136
+ if issubclass(registry, CanValidate):
137
+ validated = registry.validate(label_uids, field=field, mute=True)
138
+ validated_uids = label_uids[validated]
139
+ validated_labels = registry.filter(
140
+ **{f"{field}__in": validated_uids}
141
+ ).list()
142
+ new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
143
+ else:
144
+ validated_labels = []
145
+ new_labels = list(labels)
146
+ return validated_labels, new_labels
147
+
148
+ if isinstance(labels, dict):
149
+ result = {}
150
+ for registry, labels_registry in labels.items():
151
+ result[registry] = validate_labels_registry(labels_registry)
152
+ else:
153
+ return validate_labels_registry(labels)
154
+
155
+
156
+ class LabelManager:
157
+ """Label manager.
158
+
159
+ This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
160
+ typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
161
+ with features.
162
+ """
163
+
164
+ def __init__(self, host: Artifact | Collection):
165
+ self._host = host
166
+
167
+ def __repr__(self) -> str:
168
+ msg = print_labels(self._host)
169
+ if len(msg) > 0:
170
+ return msg
171
+ else:
172
+ return "no linked labels"
173
+
174
+ def add(
175
+ self,
176
+ records: Record | list[Record] | QuerySet,
177
+ feature: Feature | None = None,
178
+ ) -> None:
179
+ """Add one or several labels and associate them with a feature.
180
+
181
+ Args:
182
+ records: Label records to add.
183
+ feature: Feature under which to group the labels.
184
+ """
185
+ from ._data import add_labels
186
+
187
+ return add_labels(self._host, records=records, feature=feature)
188
+
189
+ def get(
190
+ self,
191
+ feature: Feature,
192
+ mute: bool = False,
193
+ flat_names: bool = False,
194
+ ) -> QuerySet | dict[str, QuerySet] | list:
195
+ """Get labels given a feature.
196
+
197
+ Args:
198
+ feature: Feature under which labels are grouped.
199
+ mute: Show no logging.
200
+ flat_names: Flatten list to names rather than returning records.
201
+ """
202
+ from ._data import get_labels
203
+
204
+ return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
205
+
206
+ def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
207
+ """Add labels from an artifact or collection to another artifact or collection.
208
+
209
+ Examples:
210
+ >>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
211
+ >>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
212
+ >>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
213
+ >>> ln.save(ulabels)
214
+ >>> labels = ln.ULabel.filter(name__icontains = "label").all()
215
+ >>> artifact1.ulabels.set(labels)
216
+ >>> artifact2.labels.add_from(artifact1)
217
+ """
218
+ from django.db.utils import ProgrammingError
219
+
220
+ if transfer_logs is None:
221
+ transfer_logs = {"mapped": [], "transferred": [], "run": None}
222
+ using_key = settings._using_key
223
+ for related_name, (_, labels) in get_labels_as_dict(data).items():
224
+ labels = labels.all()
225
+ try:
226
+ if not labels.exists():
227
+ continue
228
+ # look for features
229
+ data_name_lower = data.__class__.__name__.lower()
230
+ labels_by_features = defaultdict(list)
231
+ features = set()
232
+ _, new_labels = validate_labels(labels)
233
+ if len(new_labels) > 0:
234
+ transfer_fk_to_default_db_bulk(
235
+ new_labels, using_key, transfer_logs=transfer_logs
236
+ )
237
+ for label in labels:
238
+ # if the link table doesn't follow this convention, we'll ignore it
239
+ if not hasattr(label, f"links_{data_name_lower}"):
240
+ key = None
241
+ else:
242
+ link = getattr(label, f"links_{data_name_lower}").get(
243
+ **{f"{data_name_lower}_id": data.id}
244
+ )
245
+ if link.feature is not None:
246
+ features.add(link.feature)
247
+ key = link.feature.name
248
+ else:
249
+ key = None
250
+ label_returned = transfer_to_default_db(
251
+ label,
252
+ using_key,
253
+ transfer_logs=transfer_logs,
254
+ transfer_fk=False,
255
+ save=True,
256
+ )
257
+ # TODO: refactor return value of transfer to default db
258
+ if label_returned is not None:
259
+ label = label_returned
260
+ labels_by_features[key].append(label)
261
+ # treat features
262
+ _, new_features = validate_labels(list(features))
263
+ if len(new_features) > 0:
264
+ transfer_fk_to_default_db_bulk(
265
+ new_features, using_key, transfer_logs=transfer_logs
266
+ )
267
+ for feature in new_features:
268
+ transfer_to_default_db(
269
+ feature,
270
+ using_key,
271
+ transfer_logs=transfer_logs,
272
+ transfer_fk=False,
273
+ )
274
+ save(new_features)
275
+ if hasattr(self._host, related_name):
276
+ for feature_name, labels in labels_by_features.items():
277
+ if feature_name is not None:
278
+ feature_id = Feature.get(name=feature_name).id
279
+ else:
280
+ feature_id = None
281
+ getattr(self._host, related_name).add(
282
+ *labels, through_defaults={"feature_id": feature_id}
283
+ )
284
+ # ProgrammingError is raised when schemas don't match between source and target instances
285
+ except ProgrammingError:
286
+ logger.warning(
287
+ f"{related_name} labels cannot be transferred because schema module does not exist in target instance: {labels}"
288
+ )
289
+ continue