lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1174
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -382
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -93
  24. lamindb/core/_context.py +574 -558
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -866
  27. lamindb/core/_label_manager.py +253 -252
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -0
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -196
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -245
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.6.dist-info/RECORD +0 -59
@@ -1,252 +1,253 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from typing import TYPE_CHECKING, Dict
5
-
6
- import numpy as np
7
- from lamin_utils import colors
8
- from lnschema_core.models import Feature
9
-
10
- from lamindb._from_values import _print_values
11
- from lamindb._record import (
12
- REGISTRY_UNIQUE_FIELD,
13
- get_name_field,
14
- transfer_fk_to_default_db_bulk,
15
- transfer_to_default_db,
16
- )
17
- from lamindb._save import save
18
-
19
- from ._settings import settings
20
- from .schema import dict_related_model_to_related_name
21
-
22
- if TYPE_CHECKING:
23
- from lnschema_core.models import Artifact, Collection, Record
24
-
25
- from lamindb._query_set import QuerySet
26
-
27
-
28
- def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
29
- exclude_set = {
30
- "feature_sets",
31
- "artifacts",
32
- "input_of_runs",
33
- "collections",
34
- "_source_code_artifact_of",
35
- "_report_of",
36
- "_environment_of",
37
- "links_collection",
38
- "links_artifact",
39
- "links_feature_set",
40
- "previous_runs",
41
- "_feature_values",
42
- "_action_targets",
43
- "_lnschema_core_collection__actions_+", # something seems off with this one
44
- "_actions",
45
- }
46
- labels = {} # type: ignore
47
- if self.id is None:
48
- return labels
49
- for related_model_name, related_name in dict_related_model_to_related_name(
50
- self.__class__, links=links
51
- ).items():
52
- if related_name not in exclude_set:
53
- labels[related_name] = (
54
- related_model_name,
55
- getattr(self, related_name).all(),
56
- )
57
- return labels
58
-
59
-
60
- def print_labels(
61
- self: Artifact | Collection, field: str = "name", print_types: bool = False
62
- ):
63
- labels_msg = ""
64
- for related_name, (related_model, labels) in get_labels_as_dict(self).items():
65
- # there is a try except block here to deal with schema inconsistencies
66
- # during transfer between instances
67
- try:
68
- labels_list = list(labels.values_list(field, flat=True))
69
- if len(labels_list) > 0:
70
- get_name_field(labels)
71
- print_values = _print_values(labels_list, n=10)
72
- type_str = f": {related_model}" if print_types else ""
73
- labels_msg += f" .{related_name}{type_str} = {print_values}\n"
74
- except Exception: # noqa: S112
75
- continue
76
- msg = ""
77
- if labels_msg:
78
- msg += f" {colors.italic('Labels')}\n"
79
- msg += labels_msg
80
- return msg
81
-
82
-
83
- # Alex: is this a label transfer function?
84
- def validate_labels(labels: QuerySet | list | dict):
85
- def validate_labels_registry(
86
- labels: QuerySet | list | dict,
87
- ) -> tuple[list[str], list[str]]:
88
- if len(labels) == 0:
89
- return [], []
90
- registry = labels[0].__class__
91
- field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
92
- if hasattr(registry, "_ontology_id_field"):
93
- field = registry._ontology_id_field
94
- # if the field value is None, use uid field
95
- label_uids = np.array(
96
- [getattr(label, field) for label in labels if label is not None]
97
- )
98
- # save labels from ontology_ids
99
- if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
100
- try:
101
- save(registry.from_values(label_uids, field=field))
102
- except Exception: # noqa S110
103
- pass
104
- field = "uid"
105
- label_uids = np.array(
106
- [getattr(label, field) for label in labels if label is not None]
107
- )
108
- validated = registry.validate(label_uids, field=field, mute=True)
109
- validated_uids = label_uids[validated]
110
- validated_labels = registry.filter(**{f"{field}__in": validated_uids}).list()
111
- new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
112
- return validated_labels, new_labels
113
-
114
- if isinstance(labels, Dict):
115
- result = {}
116
- for registry, labels_registry in labels.items():
117
- result[registry] = validate_labels_registry(labels_registry)
118
- else:
119
- return validate_labels_registry(labels)
120
-
121
-
122
- class LabelManager:
123
- """Label manager.
124
-
125
- This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
126
- typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
127
- with features.
128
- """
129
-
130
- def __init__(self, host: Artifact | Collection):
131
- self._host = host
132
-
133
- def __repr__(self) -> str:
134
- msg = print_labels(self._host)
135
- if len(msg) > 0:
136
- return msg
137
- else:
138
- return "no linked labels"
139
-
140
- def add(
141
- self,
142
- records: Record | list[Record] | QuerySet,
143
- feature: Feature | None = None,
144
- ) -> None:
145
- """Add one or several labels and associate them with a feature.
146
-
147
- Args:
148
- records: Label records to add.
149
- feature: Feature under which to group the labels.
150
- """
151
- from ._data import add_labels
152
-
153
- return add_labels(self._host, records=records, feature=feature)
154
-
155
- def get(
156
- self,
157
- feature: Feature,
158
- mute: bool = False,
159
- flat_names: bool = False,
160
- ) -> QuerySet | dict[str, QuerySet] | list:
161
- """Get labels given a feature.
162
-
163
- Args:
164
- feature: Feature under which labels are grouped.
165
- mute: Show no logging.
166
- flat_names: Flatten list to names rather than returning records.
167
- """
168
- from ._data import get_labels
169
-
170
- return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
171
-
172
- def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
173
- """Add labels from an artifact or collection to another artifact or collection.
174
-
175
- Examples:
176
- >>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
177
- >>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
178
- >>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
179
- >>> ln.save(ulabels)
180
- >>> labels = ln.ULabel.filter(name__icontains = "label").all()
181
- >>> artifact1.ulabels.set(labels)
182
- >>> artifact2.labels.add_from(artifact1)
183
- """
184
- from django.db.utils import ProgrammingError
185
-
186
- if transfer_logs is None:
187
- transfer_logs = {"mapped": [], "transferred": []}
188
- using_key = settings._using_key
189
- for related_name, (_, labels) in get_labels_as_dict(data).items():
190
- labels = labels.all()
191
- try:
192
- if not labels.exists():
193
- continue
194
- # look for features
195
- data_name_lower = data.__class__.__name__.lower()
196
- labels_by_features = defaultdict(list)
197
- features = set()
198
- _, new_labels = validate_labels(labels)
199
- if len(new_labels) > 0:
200
- transfer_fk_to_default_db_bulk(
201
- new_labels, using_key, transfer_logs=transfer_logs
202
- )
203
- for label in labels:
204
- # if the link table doesn't follow this convention, we'll ignore it
205
- if not hasattr(label, f"links_{data_name_lower}"):
206
- key = None
207
- else:
208
- link = getattr(label, f"links_{data_name_lower}").get(
209
- **{f"{data_name_lower}_id": data.id}
210
- )
211
- if link.feature is not None:
212
- features.add(link.feature)
213
- key = link.feature.name
214
- else:
215
- key = None
216
- label_returned = transfer_to_default_db(
217
- label,
218
- using_key,
219
- transfer_logs=transfer_logs,
220
- transfer_fk=False,
221
- save=True,
222
- )
223
- # TODO: refactor return value of transfer to default db
224
- if label_returned is not None:
225
- label = label_returned
226
- labels_by_features[key].append(label)
227
- # treat features
228
- _, new_features = validate_labels(list(features))
229
- if len(new_features) > 0:
230
- transfer_fk_to_default_db_bulk(
231
- new_features, using_key, transfer_logs=transfer_logs
232
- )
233
- for feature in new_features:
234
- transfer_to_default_db(
235
- feature,
236
- using_key,
237
- transfer_logs=transfer_logs,
238
- transfer_fk=False,
239
- )
240
- save(new_features)
241
- if hasattr(self._host, related_name):
242
- for feature_name, labels in labels_by_features.items():
243
- if feature_name is not None:
244
- feature_id = Feature.get(name=feature_name).id
245
- else:
246
- feature_id = None
247
- getattr(self._host, related_name).add(
248
- *labels, through_defaults={"feature_id": feature_id}
249
- )
250
- # ProgrammingError is raised when schemas don't match between source and target instances
251
- except ProgrammingError:
252
- continue
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import TYPE_CHECKING, Dict
5
+
6
+ import numpy as np
7
+ from lamin_utils import colors
8
+ from lnschema_core.models import Feature
9
+
10
+ from lamindb._from_values import _print_values
11
+ from lamindb._record import (
12
+ REGISTRY_UNIQUE_FIELD,
13
+ get_name_field,
14
+ transfer_fk_to_default_db_bulk,
15
+ transfer_to_default_db,
16
+ )
17
+ from lamindb._save import save
18
+
19
+ from ._settings import settings
20
+ from .schema import dict_related_model_to_related_name
21
+
22
+ if TYPE_CHECKING:
23
+ from lnschema_core.models import Artifact, Collection, Record
24
+
25
+ from lamindb._query_set import QuerySet
26
+
27
+
28
+ def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
29
+ exclude_set = {
30
+ "feature_sets",
31
+ "artifacts",
32
+ "input_of_runs",
33
+ "collections",
34
+ "_source_code_artifact_of",
35
+ "_report_of",
36
+ "_environment_of",
37
+ "links_collection",
38
+ "links_artifact",
39
+ "links_feature_set",
40
+ "previous_runs",
41
+ "_feature_values",
42
+ "_action_targets",
43
+ "_lnschema_core_collection__actions_+", # something seems off with this one
44
+ "_actions",
45
+ }
46
+ labels = {} # type: ignore
47
+ if self.id is None:
48
+ return labels
49
+ for related_model_name, related_name in dict_related_model_to_related_name(
50
+ self.__class__, links=links
51
+ ).items():
52
+ if related_name not in exclude_set:
53
+ labels[related_name] = (
54
+ related_model_name,
55
+ getattr(self, related_name).all(),
56
+ )
57
+ return labels
58
+
59
+
60
+ def print_labels(
61
+ self: Artifact | Collection, field: str = "name", print_types: bool = False
62
+ ):
63
+ labels_msg = ""
64
+ for related_name, (related_model, labels) in get_labels_as_dict(self).items():
65
+ # there is a try except block here to deal with schema inconsistencies
66
+ # during transfer between instances
67
+ try:
68
+ labels_list = list(labels.values_list(field, flat=True))
69
+ if len(labels_list) > 0:
70
+ get_name_field(labels)
71
+ print_values = _print_values(labels_list, n=10)
72
+ type_str = f": {related_model}" if print_types else ""
73
+ labels_msg += f" .{related_name}{type_str} = {print_values}\n"
74
+ except Exception: # noqa: S112
75
+ continue
76
+ msg = ""
77
+ if labels_msg:
78
+ msg += f" {colors.italic('Labels')}\n"
79
+ msg += labels_msg
80
+ return msg
81
+
82
+
83
+ # Alex: is this a label transfer function?
84
+ def validate_labels(labels: QuerySet | list | dict):
85
+ def validate_labels_registry(
86
+ labels: QuerySet | list | dict,
87
+ ) -> tuple[list[str], list[str]]:
88
+ if len(labels) == 0:
89
+ return [], []
90
+ registry = labels[0].__class__
91
+ field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
92
+ if hasattr(registry, "_ontology_id_field"):
93
+ field = registry._ontology_id_field
94
+ # if the field value is None, use uid field
95
+ label_uids = np.array(
96
+ [getattr(label, field) for label in labels if label is not None]
97
+ )
98
+ # save labels from ontology_ids
99
+ if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
100
+ try:
101
+ labels_records = registry.from_values(label_uids, field=field)
102
+ save([r for r in labels_records if r._state.adding])
103
+ except Exception: # noqa S110
104
+ pass
105
+ field = "uid"
106
+ label_uids = np.array(
107
+ [getattr(label, field) for label in labels if label is not None]
108
+ )
109
+ validated = registry.validate(label_uids, field=field, mute=True)
110
+ validated_uids = label_uids[validated]
111
+ validated_labels = registry.filter(**{f"{field}__in": validated_uids}).list()
112
+ new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
113
+ return validated_labels, new_labels
114
+
115
+ if isinstance(labels, Dict):
116
+ result = {}
117
+ for registry, labels_registry in labels.items():
118
+ result[registry] = validate_labels_registry(labels_registry)
119
+ else:
120
+ return validate_labels_registry(labels)
121
+
122
+
123
+ class LabelManager:
124
+ """Label manager.
125
+
126
+ This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
127
+ typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
128
+ with features.
129
+ """
130
+
131
+ def __init__(self, host: Artifact | Collection):
132
+ self._host = host
133
+
134
+ def __repr__(self) -> str:
135
+ msg = print_labels(self._host)
136
+ if len(msg) > 0:
137
+ return msg
138
+ else:
139
+ return "no linked labels"
140
+
141
+ def add(
142
+ self,
143
+ records: Record | list[Record] | QuerySet,
144
+ feature: Feature | None = None,
145
+ ) -> None:
146
+ """Add one or several labels and associate them with a feature.
147
+
148
+ Args:
149
+ records: Label records to add.
150
+ feature: Feature under which to group the labels.
151
+ """
152
+ from ._data import add_labels
153
+
154
+ return add_labels(self._host, records=records, feature=feature)
155
+
156
+ def get(
157
+ self,
158
+ feature: Feature,
159
+ mute: bool = False,
160
+ flat_names: bool = False,
161
+ ) -> QuerySet | dict[str, QuerySet] | list:
162
+ """Get labels given a feature.
163
+
164
+ Args:
165
+ feature: Feature under which labels are grouped.
166
+ mute: Show no logging.
167
+ flat_names: Flatten list to names rather than returning records.
168
+ """
169
+ from ._data import get_labels
170
+
171
+ return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
172
+
173
+ def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
174
+ """Add labels from an artifact or collection to another artifact or collection.
175
+
176
+ Examples:
177
+ >>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
178
+ >>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
179
+ >>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
180
+ >>> ln.save(ulabels)
181
+ >>> labels = ln.ULabel.filter(name__icontains = "label").all()
182
+ >>> artifact1.ulabels.set(labels)
183
+ >>> artifact2.labels.add_from(artifact1)
184
+ """
185
+ from django.db.utils import ProgrammingError
186
+
187
+ if transfer_logs is None:
188
+ transfer_logs = {"mapped": [], "transferred": []}
189
+ using_key = settings._using_key
190
+ for related_name, (_, labels) in get_labels_as_dict(data).items():
191
+ labels = labels.all()
192
+ try:
193
+ if not labels.exists():
194
+ continue
195
+ # look for features
196
+ data_name_lower = data.__class__.__name__.lower()
197
+ labels_by_features = defaultdict(list)
198
+ features = set()
199
+ _, new_labels = validate_labels(labels)
200
+ if len(new_labels) > 0:
201
+ transfer_fk_to_default_db_bulk(
202
+ new_labels, using_key, transfer_logs=transfer_logs
203
+ )
204
+ for label in labels:
205
+ # if the link table doesn't follow this convention, we'll ignore it
206
+ if not hasattr(label, f"links_{data_name_lower}"):
207
+ key = None
208
+ else:
209
+ link = getattr(label, f"links_{data_name_lower}").get(
210
+ **{f"{data_name_lower}_id": data.id}
211
+ )
212
+ if link.feature is not None:
213
+ features.add(link.feature)
214
+ key = link.feature.name
215
+ else:
216
+ key = None
217
+ label_returned = transfer_to_default_db(
218
+ label,
219
+ using_key,
220
+ transfer_logs=transfer_logs,
221
+ transfer_fk=False,
222
+ save=True,
223
+ )
224
+ # TODO: refactor return value of transfer to default db
225
+ if label_returned is not None:
226
+ label = label_returned
227
+ labels_by_features[key].append(label)
228
+ # treat features
229
+ _, new_features = validate_labels(list(features))
230
+ if len(new_features) > 0:
231
+ transfer_fk_to_default_db_bulk(
232
+ new_features, using_key, transfer_logs=transfer_logs
233
+ )
234
+ for feature in new_features:
235
+ transfer_to_default_db(
236
+ feature,
237
+ using_key,
238
+ transfer_logs=transfer_logs,
239
+ transfer_fk=False,
240
+ )
241
+ save(new_features)
242
+ if hasattr(self._host, related_name):
243
+ for feature_name, labels in labels_by_features.items():
244
+ if feature_name is not None:
245
+ feature_id = Feature.get(name=feature_name).id
246
+ else:
247
+ feature_id = None
248
+ getattr(self._host, related_name).add(
249
+ *labels, through_defaults={"feature_id": feature_id}
250
+ )
251
+ # ProgrammingError is raised when schemas don't match between source and target instances
252
+ except ProgrammingError:
253
+ continue