lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
@@ -0,0 +1,209 @@
1
+ from django.contrib.postgres.aggregates import ArrayAgg
2
+ from django.db import connection
3
+ from django.db.models import F, OuterRef, Q, Subquery
4
+ from django.db.models.fields.related import ForeignKey, ManyToManyField
5
+ from django.db.models.fields.reverse_related import ManyToManyRel, ManyToOneRel
6
+ from django.db.models.functions import JSONObject
7
+ from lnschema_core.models import Artifact, FeatureSet, Record
8
+
9
+ from .schema import dict_related_model_to_related_name
10
+
11
+
12
+ def get_related_model(model, field_name):
13
+ try:
14
+ field = model._meta.get_field(field_name)
15
+
16
+ if isinstance(field, (ForeignKey, ManyToManyField)):
17
+ # Forward ForeignKey or ManyToManyField
18
+ return field.remote_field.model
19
+ elif isinstance(field, (ManyToOneRel, ManyToManyRel)):
20
+ # Reverse ForeignKey or ManyToManyField
21
+ return field.related_model
22
+ else:
23
+ return f"Unexpected field type: {type(field)}"
24
+ except Exception as e:
25
+ return f"Error: {str(e)}"
26
+
27
+
28
+ def get_artifact_with_related(
29
+ artifact: Record,
30
+ include_fk: bool = False,
31
+ include_m2m: bool = False,
32
+ include_feature_link: bool = False,
33
+ include_featureset: bool = False,
34
+ ) -> dict:
35
+ """Fetch an artifact with its related data."""
36
+ from lamindb._can_validate import get_name_field
37
+
38
+ model = artifact.__class__
39
+ foreign_key_fields = [f.name for f in model._meta.fields if f.is_relation]
40
+
41
+ m2m_relations = (
42
+ []
43
+ if not include_m2m
44
+ else [
45
+ v
46
+ for v in dict_related_model_to_related_name(model).values()
47
+ if not v.startswith("_")
48
+ ]
49
+ )
50
+ link_tables = (
51
+ []
52
+ if not include_feature_link
53
+ else list(dict_related_model_to_related_name(model, links=True).values())
54
+ )
55
+
56
+ # Clear previous queries
57
+ connection.queries_log.clear()
58
+
59
+ annotations = {}
60
+
61
+ if include_fk:
62
+ for fk in foreign_key_fields:
63
+ name_field = get_name_field(get_related_model(model, fk))
64
+ annotations[f"fkfield_{fk}"] = JSONObject(
65
+ id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
66
+ )
67
+
68
+ for name in m2m_relations:
69
+ related_model = get_related_model(model, name)
70
+ name_field = get_name_field(related_model)
71
+ annotations[f"m2mfield_{name}"] = ArrayAgg(
72
+ JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
73
+ filter=Q(**{f"{name}__isnull": False}),
74
+ distinct=True,
75
+ )
76
+
77
+ for link in link_tables:
78
+ link_model = getattr(model, link).rel.related_model
79
+ if not hasattr(link_model, "feature"):
80
+ continue
81
+ label_field = link.removeprefix("links_").replace("_", "")
82
+ annotations[f"linkfield_{link}"] = Subquery(
83
+ link_model.objects.filter(artifact=OuterRef("pk"))
84
+ .annotate(
85
+ data=JSONObject(
86
+ id=F("id"),
87
+ feature=F("feature"),
88
+ **{label_field: F(label_field)},
89
+ )
90
+ )
91
+ .values("artifact")
92
+ .annotate(json_agg=ArrayAgg("data"))
93
+ .values("json_agg")
94
+ )
95
+
96
+ if include_featureset:
97
+ annotations["featuresets"] = Subquery(
98
+ model.feature_sets.through.objects.filter(artifact=OuterRef("pk"))
99
+ .annotate(
100
+ data=JSONObject(
101
+ id=F("id"),
102
+ slot=F("slot"),
103
+ featureset=F("featureset"),
104
+ )
105
+ )
106
+ .values("artifact")
107
+ .annotate(json_agg=ArrayAgg("data"))
108
+ .values("json_agg")
109
+ )
110
+
111
+ artifact_meta = (
112
+ model.objects.using(artifact._state.db)
113
+ .filter(uid=artifact.uid)
114
+ .annotate(**annotations)
115
+ .values(*["id", "uid"], *annotations.keys())
116
+ .first()
117
+ )
118
+
119
+ if not artifact_meta:
120
+ return None
121
+
122
+ related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}}
123
+ for k, v in artifact_meta.items():
124
+ if k.startswith("m2mfield_"):
125
+ related_data["m2m"][k[9:]] = v
126
+ elif k.startswith("fkfield_"):
127
+ related_data["fk"][k[8:]] = v
128
+ elif k.startswith("linkfield_"):
129
+ related_data["link"][k[10:]] = v
130
+ elif k == "featuresets":
131
+ if v:
132
+ related_data["featuresets"] = get_featureset_m2m_relations(
133
+ artifact, {i["featureset"]: i["slot"] for i in v}
134
+ )
135
+
136
+ related_data["m2m"] = {
137
+ k: {item["id"]: item["name"] for item in v}
138
+ for k, v in related_data["m2m"].items()
139
+ if v
140
+ }
141
+
142
+ return {
143
+ **{name: artifact_meta[name] for name in ["id", "uid"]},
144
+ "related_data": related_data,
145
+ }
146
+
147
+
148
+ def get_featureset_m2m_relations(
149
+ artifact: Artifact, slot_featureset: dict, limit: int = 20
150
+ ):
151
+ """Fetch all many-to-many relationships for given feature sets."""
152
+ from lamindb._can_validate import get_name_field
153
+
154
+ m2m_relations = [
155
+ v
156
+ for v in dict_related_model_to_related_name(FeatureSet).values()
157
+ if not v.startswith("_") and v != "artifacts"
158
+ ]
159
+
160
+ annotations = {}
161
+ related_names = {}
162
+ for name in m2m_relations:
163
+ related_model = get_related_model(FeatureSet, name)
164
+ name_field = get_name_field(related_model)
165
+
166
+ # Get the correct field names for the through table
167
+ through_model = getattr(FeatureSet, name).through
168
+ related_field = (
169
+ through_model.__name__.replace("FeatureSet", "").lower().replace("_", "")
170
+ )
171
+
172
+ # Subquery to get limited related records
173
+ limited_related = Subquery(
174
+ through_model.objects.filter(featureset=OuterRef("pk")).values(
175
+ related_field
176
+ )[:limit]
177
+ )
178
+
179
+ annotations[f"m2mfield_{name}"] = ArrayAgg(
180
+ JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
181
+ filter=Q(
182
+ **{
183
+ f"{name}__id__in": limited_related,
184
+ }
185
+ ),
186
+ distinct=True,
187
+ )
188
+ related_names[name] = related_model.__get_name_with_schema__()
189
+
190
+ featureset_m2m = (
191
+ FeatureSet.objects.using(artifact._state.db)
192
+ .filter(id__in=slot_featureset.keys())
193
+ .annotate(**annotations)
194
+ .values("id", *annotations.keys())
195
+ )
196
+
197
+ result = {}
198
+ for fs in featureset_m2m:
199
+ slot = slot_featureset.get(fs["id"])
200
+ result[fs["id"]] = (
201
+ slot,
202
+ {
203
+ related_names.get(k[9:]): [item["name"] for item in v]
204
+ for k, v in fs.items()
205
+ if k.startswith("m2mfield_") and v
206
+ },
207
+ )
208
+
209
+ return result