lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +19 -10
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +99 -86
  7. lamindb/_from_values.py +20 -17
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +121 -46
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +14 -8
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +15 -14
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +19 -14
  27. lamindb/core/_context.py +297 -228
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +59 -44
  31. lamindb/core/_feature_manager.py +192 -168
  32. lamindb/core/_label_manager.py +22 -22
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +18 -2
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +40 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +700 -57
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.2.dist-info/RECORD +0 -63
  89. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/_data.py CHANGED
@@ -1,26 +1,26 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict
4
- from typing import TYPE_CHECKING, Any
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from django.db import connections
7
7
  from lamin_utils import colors, logger
8
8
  from lamindb_setup.core._docs import doc_args
9
- from lnschema_core.models import (
9
+
10
+ from lamindb._query_set import QuerySet
11
+ from lamindb.core._settings import settings
12
+ from lamindb.models import (
10
13
  Artifact,
11
14
  Collection,
12
15
  Feature,
13
- FeatureSet,
14
16
  Record,
15
17
  Run,
18
+ Schema,
16
19
  ULabel,
17
20
  format_field_value,
18
21
  record_repr,
19
22
  )
20
23
 
21
- from lamindb._query_set import QuerySet
22
- from lamindb.core._settings import settings
23
-
24
24
  from ._context import context
25
25
  from ._django import get_artifact_with_related, get_related_model
26
26
  from ._feature_manager import (
@@ -29,15 +29,15 @@ from ._feature_manager import (
29
29
  get_label_links,
30
30
  )
31
31
  from .exceptions import ValidationError
32
- from .schema import (
32
+ from .relations import (
33
+ dict_module_name_to_model_name,
33
34
  dict_related_model_to_related_name,
34
- dict_schema_name_to_model_name,
35
35
  )
36
36
 
37
37
  if TYPE_CHECKING:
38
38
  from collections.abc import Iterable
39
39
 
40
- from lnschema_core.types import StrField
40
+ from lamindb.base.types import StrField
41
41
 
42
42
 
43
43
  WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-run"
@@ -56,49 +56,44 @@ def get_run(run: Run | None) -> Run | None:
56
56
  return run
57
57
 
58
58
 
59
- def add_transform_to_kwargs(kwargs: dict[str, Any], run: Run):
60
- if run is not None:
61
- kwargs["transform"] = run.transform
62
-
63
-
64
- def save_feature_sets(self: Artifact | Collection) -> None:
65
- if hasattr(self, "_feature_sets"):
66
- from lamindb.core._feature_manager import get_feature_set_by_slot_
59
+ def save_staged__schemas_m2m(self: Artifact | Collection) -> None:
60
+ if hasattr(self, "_staged__schemas_m2m"):
61
+ from lamindb.core._feature_manager import get_schema_by_slot_
67
62
 
68
- existing_feature_sets = get_feature_set_by_slot_(self)
69
- saved_feature_sets = {}
70
- for key, feature_set in self._feature_sets.items():
71
- if isinstance(feature_set, FeatureSet) and feature_set._state.adding:
72
- feature_set.save()
73
- saved_feature_sets[key] = feature_set
74
- if key in existing_feature_sets:
63
+ existing_staged__schemas_m2m = get_schema_by_slot_(self)
64
+ saved_staged__schemas_m2m = {}
65
+ for key, schema in self._staged__schemas_m2m.items():
66
+ if isinstance(schema, Schema) and schema._state.adding:
67
+ schema.save()
68
+ saved_staged__schemas_m2m[key] = schema
69
+ if key in existing_staged__schemas_m2m:
75
70
  # remove existing feature set on the same slot
76
- self.feature_sets.remove(existing_feature_sets[key])
77
- if len(saved_feature_sets) > 0:
78
- s = "s" if len(saved_feature_sets) > 1 else ""
79
- display_feature_set_keys = ",".join(
80
- f"'{key}'" for key in saved_feature_sets.keys()
71
+ self._schemas_m2m.remove(existing_staged__schemas_m2m[key])
72
+ if len(saved_staged__schemas_m2m) > 0:
73
+ s = "s" if len(saved_staged__schemas_m2m) > 1 else ""
74
+ display_schema_keys = ",".join(
75
+ f"'{key}'" for key in saved_staged__schemas_m2m.keys()
81
76
  )
82
77
  logger.save(
83
- f"saved {len(saved_feature_sets)} feature set{s} for slot{s}:"
84
- f" {display_feature_set_keys}"
78
+ f"saved {len(saved_staged__schemas_m2m)} feature set{s} for slot{s}:"
79
+ f" {display_schema_keys}"
85
80
  )
86
81
 
87
82
 
88
- def save_feature_set_links(self: Artifact | Collection) -> None:
83
+ def save_schema_links(self: Artifact | Collection) -> None:
89
84
  from lamindb._save import bulk_create
90
85
 
91
86
  Data = self.__class__
92
- if hasattr(self, "_feature_sets"):
87
+ if hasattr(self, "_staged__schemas_m2m"):
93
88
  links = []
94
89
  host_id_field = get_host_id_field(self)
95
- for slot, feature_set in self._feature_sets.items():
90
+ for slot, schema in self._staged__schemas_m2m.items():
96
91
  kwargs = {
97
92
  host_id_field: self.id,
98
- "featureset_id": feature_set.id,
93
+ "schema_id": schema.id,
99
94
  "slot": slot,
100
95
  }
101
- links.append(Data.feature_sets.through(**kwargs))
96
+ links.append(Data._schemas_m2m.through(**kwargs))
102
97
  bulk_create(links, ignore_conflicts=True)
103
98
 
104
99
 
@@ -140,7 +135,7 @@ def _describe_postgres(self: Artifact | Collection, print_types: bool = False):
140
135
  include_feature_link=True,
141
136
  include_fk=True,
142
137
  include_m2m=True,
143
- include_featureset=True,
138
+ include_schema=True,
144
139
  )
145
140
  else:
146
141
  result = get_artifact_with_related(self, include_fk=True, include_m2m=True)
@@ -153,7 +148,7 @@ def _describe_postgres(self: Artifact | Collection, print_types: bool = False):
153
148
  tree=tree,
154
149
  related_data=related_data,
155
150
  with_labels=True,
156
- print_params=hasattr(self, "type") and self.type == "model",
151
+ print_params=hasattr(self, "kind") and self.kind == "model",
157
152
  )
158
153
 
159
154
 
@@ -187,7 +182,7 @@ def _describe_sqlite(self: Artifact | Collection, print_types: bool = False):
187
182
  if isinstance(self, (Collection, Artifact)):
188
183
  many_to_many_fields.append("input_of_runs")
189
184
  if isinstance(self, Artifact):
190
- many_to_many_fields.append("feature_sets")
185
+ many_to_many_fields.append("_schemas_m2m")
191
186
  self = (
192
187
  self.__class__.objects.using(self._state.db)
193
188
  .prefetch_related(*many_to_many_fields)
@@ -198,7 +193,7 @@ def _describe_sqlite(self: Artifact | Collection, print_types: bool = False):
198
193
  self,
199
194
  tree=tree,
200
195
  with_labels=True,
201
- print_params=hasattr(self, "type") and self.type == "model",
196
+ print_params=hasattr(self, "kind") and self.kind == "kind",
202
197
  )
203
198
 
204
199
 
@@ -220,7 +215,7 @@ def validate_feature(feature: Feature, records: list[Record]) -> None:
220
215
  if not isinstance(feature, Feature):
221
216
  raise TypeError("feature has to be of type Feature")
222
217
  if feature._state.adding:
223
- registries = {record.__class__.__get_name_with_schema__() for record in records}
218
+ registries = {record.__class__.__get_name_with_module__() for record in records}
224
219
  registries_str = "|".join(registries)
225
220
  msg = f"ln.Feature(name='{feature.name}', type='cat[{registries_str}]').save()"
226
221
  raise ValidationError(f"Feature not validated. If it looks correct: {msg}")
@@ -303,7 +298,7 @@ def add_labels(
303
298
  " feature=ln.Feature(name='my_feature'))"
304
299
  )
305
300
  if feature.dtype.startswith("cat["):
306
- orm_dict = dict_schema_name_to_model_name(Artifact)
301
+ orm_dict = dict_module_name_to_model_name(Artifact)
307
302
  for reg in feature.dtype.replace("cat[", "").rstrip("]").split("|"):
308
303
  registry = orm_dict.get(reg)
309
304
  records_validated += registry.from_values(records, field=field)
@@ -329,7 +324,7 @@ def add_labels(
329
324
  # strategy: group records by registry to reduce number of transactions
330
325
  records_by_related_name: dict = {}
331
326
  for record in records:
332
- related_name = d.get(record.__class__.__get_name_with_schema__())
327
+ related_name = d.get(record.__class__.__get_name_with_module__())
333
328
  if related_name is None:
334
329
  raise ValueError(f"Can't add labels to {record.__class__} record!")
335
330
  if related_name not in records_by_related_name:
@@ -340,15 +335,15 @@ def add_labels(
340
335
  else:
341
336
  validate_feature(feature, records) # type:ignore
342
337
  records_by_registry = defaultdict(list)
343
- feature_sets = self.feature_sets.filter(registry="Feature").all()
338
+ _schemas_m2m = self._schemas_m2m.filter(itype="Feature").all()
344
339
  internal_features = set() # type: ignore
345
- if len(feature_sets) > 0:
346
- for feature_set in feature_sets:
340
+ if len(_schemas_m2m) > 0:
341
+ for schema in _schemas_m2m:
347
342
  internal_features = internal_features.union(
348
- set(feature_set.members.values_list("name", flat=True))
343
+ set(schema.members.values_list("name", flat=True))
349
344
  ) # type: ignore
350
345
  for record in records:
351
- records_by_registry[record.__class__.__get_name_with_schema__()].append(
346
+ records_by_registry[record.__class__.__get_name_with_module__()].append(
352
347
  record
353
348
  )
354
349
  for registry_name, records in records_by_registry.items():
@@ -388,7 +383,7 @@ def add_labels(
388
383
 
389
384
  def _track_run_input(
390
385
  data: Artifact | Collection | Iterable[Artifact] | Iterable[Collection],
391
- is_run_input: bool | None = None,
386
+ is_run_input: bool | Run | None = None,
392
387
  run: Run | None = None,
393
388
  ):
394
389
  # this is an internal hack right now for project-flow, but we can allow this
lamindb/core/_describe.py CHANGED
@@ -7,18 +7,25 @@ from lamin_utils import logger
7
7
  from rich.text import Text
8
8
  from rich.tree import Tree
9
9
 
10
+ from ._context import is_run_from_ipython
11
+
10
12
  if TYPE_CHECKING:
11
- from lnschema_core.models import Artifact, Collection, Run
13
+ from lamindb.models import Artifact, Collection, Run
12
14
 
13
15
 
14
16
  def highlight_time(iso: str):
15
- tz = datetime.datetime.now().astimezone().tzinfo
16
- res = (
17
- datetime.datetime.fromisoformat(iso)
18
- .replace(tzinfo=datetime.timezone.utc)
19
- .astimezone(tz)
20
- .strftime("%Y-%m-%d %H:%M:%S")
21
- )
17
+ try:
18
+ tz = datetime.datetime.now().astimezone().tzinfo
19
+ res = (
20
+ datetime.datetime.fromisoformat(iso)
21
+ .replace(tzinfo=datetime.timezone.utc)
22
+ .astimezone(tz)
23
+ .strftime("%Y-%m-%d %H:%M:%S")
24
+ )
25
+ except ValueError:
26
+ # raises ValueError: Invalid isoformat string: '<django.db.models.expressions.DatabaseDefault object at 0x1128ac440>'
27
+ # but can't be caught with `isinstance(iso, DatabaseDefault)` for unkown reasons
28
+ return Text("timestamp of unsaved record not available", style="dim")
22
29
  return Text(res, style="dim")
23
30
 
24
31
 
@@ -31,28 +38,33 @@ VALUES_WIDTH = 40
31
38
  def print_rich_tree(tree: Tree, fallback=str):
32
39
  from rich.console import Console
33
40
 
41
+ # If tree has no children, return fallback
42
+ if not tree.children:
43
+ return fallback
44
+
34
45
  console = Console(force_terminal=True)
46
+ printed = False
35
47
 
36
- if tree.children:
37
- try:
48
+ try:
49
+ if not is_run_from_ipython:
38
50
  from IPython import get_ipython
39
51
  from IPython.core.interactiveshell import InteractiveShell
40
52
  from IPython.display import display
41
53
 
42
54
  shell = get_ipython()
43
- if isinstance(shell, InteractiveShell): # Covers all interactive shells
55
+ if isinstance(shell, InteractiveShell):
44
56
  display(tree)
57
+ printed = True
45
58
  return ""
46
- else:
47
- with console.capture() as capture:
48
- console.print(tree)
49
- return capture.get()
50
- except (ImportError, NameError):
51
- with console.capture() as capture:
52
- console.print(tree)
53
- return capture.get()
54
- else:
55
- return fallback
59
+ except (NameError, ImportError):
60
+ pass
61
+
62
+ # If not printed through IPython
63
+ if not printed:
64
+ # be careful to test this on a terminal
65
+ console = Console(force_terminal=True)
66
+ console.print(tree)
67
+ return ""
56
68
 
57
69
 
58
70
  def describe_header(self: Artifact | Collection | Run) -> Tree:
@@ -60,14 +72,14 @@ def describe_header(self: Artifact | Collection | Run) -> Tree:
60
72
  logger.warning(
61
73
  f"This is not the latest version of the {self.__class__.__name__}."
62
74
  )
63
- if hasattr(self, "visibility"):
64
- if self.visibility == 0:
75
+ if hasattr(self, "_branch_code"):
76
+ if self._branch_code == 0:
65
77
  logger.warning("This artifact is hidden.")
66
- elif self.visibility == -1:
78
+ elif self._branch_code == -1:
67
79
  logger.warning("This artifact is the trash.")
68
80
  # initialize tree
69
81
  suffix = self.suffix if hasattr(self, "suffix") and self.suffix else ""
70
- accessor = self._accessor if hasattr(self, "_accessor") and self._accessor else ""
82
+ accessor = self.otype if hasattr(self, "otype") and self.otype else ""
71
83
  suffix_accessor = (
72
84
  f"{suffix}/{accessor}" if suffix and accessor else suffix or accessor or ""
73
85
  )
@@ -89,15 +101,13 @@ def describe_general(self: Artifact | Collection, tree: Tree | None = None) -> T
89
101
  general = tree.add(Text("General", style="bold bright_cyan"))
90
102
  general.add(f".uid = '{self.uid}'")
91
103
  if hasattr(self, "key") and self.key:
92
- general.add(
93
- f".key = '{self.key}'" if self._key_is_virtual else f".key = {self.key}"
94
- )
104
+ general.add(f".key = '{self.key}'")
95
105
  if hasattr(self, "size") and self.size:
96
106
  general.add(f".size = {self.size}")
97
107
  if hasattr(self, "hash") and self.hash:
98
108
  general.add(f".hash = '{self.hash}'")
99
- if hasattr(self, "n_objects") and self.n_objects:
100
- general.add(f".n_objects = {self.n_objects}")
109
+ if hasattr(self, "n_files") and self.n_files:
110
+ general.add(f".n_files = {self.n_files}")
101
111
  if hasattr(self, "n_observations") and self.n_observations:
102
112
  general.add(Text(f".n_observations = {self.n_observations}"))
103
113
  if hasattr(self, "version") and self.version:
@@ -131,7 +141,7 @@ def describe_general(self: Artifact | Collection, tree: Tree | None = None) -> T
131
141
  if hasattr(self, "transform") and self.transform:
132
142
  general.add(
133
143
  Text(
134
- f".transform = '{self.transform.name}'",
144
+ f".transform = '{self.transform.description}'",
135
145
  style="cyan3",
136
146
  )
137
147
  )
lamindb/core/_django.py CHANGED
@@ -1,14 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from functools import reduce
4
+
3
5
  from django.contrib.postgres.aggregates import ArrayAgg
4
6
  from django.db import connection
5
7
  from django.db.models import F, OuterRef, Q, Subquery
6
8
  from django.db.models.fields.related import ForeignKey, ManyToManyField
7
9
  from django.db.models.fields.reverse_related import ManyToManyRel, ManyToOneRel
8
10
  from django.db.models.functions import JSONObject
9
- from lnschema_core.models import Artifact, FeatureSet, Record
10
11
 
11
- from .schema import dict_related_model_to_related_name, get_schemas_modules
12
+ from lamindb.models import Artifact, Record, Schema
13
+
14
+ from .relations import dict_related_model_to_related_name, get_schema_modules
12
15
 
13
16
 
14
17
  def get_related_model(model, field_name):
@@ -32,7 +35,7 @@ def get_artifact_with_related(
32
35
  include_fk: bool = False,
33
36
  include_m2m: bool = False,
34
37
  include_feature_link: bool = False,
35
- include_featureset: bool = False,
38
+ include_schema: bool = False,
36
39
  ) -> dict:
37
40
  """Fetch an artifact with its related data."""
38
41
  from lamindb._can_curate import get_name_field
@@ -40,12 +43,12 @@ def get_artifact_with_related(
40
43
  from ._label_manager import EXCLUDE_LABELS
41
44
 
42
45
  model = artifact.__class__
43
- schema_modules = get_schemas_modules(artifact._state.db)
46
+ schema_modules = get_schema_modules(artifact._state.db)
44
47
 
45
48
  foreign_key_fields = [
46
49
  f.name
47
50
  for f in model._meta.fields
48
- if f.is_relation and f.related_model.__get_schema_name__() in schema_modules
51
+ if f.is_relation and f.related_model.__get_module_name__() in schema_modules
49
52
  ]
50
53
 
51
54
  m2m_relations = (
@@ -81,15 +84,6 @@ def get_artifact_with_related(
81
84
  id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
82
85
  )
83
86
 
84
- for name in m2m_relations:
85
- related_model = get_related_model(model, name)
86
- name_field = get_name_field(related_model)
87
- annotations[f"m2mfield_{name}"] = ArrayAgg(
88
- JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
89
- filter=Q(**{f"{name}__isnull": False}),
90
- distinct=True,
91
- )
92
-
93
87
  for link in link_tables:
94
88
  link_model = getattr(model, link).rel.related_model
95
89
  if not hasattr(link_model, "feature"):
@@ -109,14 +103,14 @@ def get_artifact_with_related(
109
103
  .values("json_agg")
110
104
  )
111
105
 
112
- if include_featureset:
113
- annotations["featuresets"] = Subquery(
114
- model.feature_sets.through.objects.filter(artifact=OuterRef("pk"))
106
+ if include_schema:
107
+ annotations["schemas"] = Subquery(
108
+ model._schemas_m2m.through.objects.filter(artifact=OuterRef("pk"))
115
109
  .annotate(
116
110
  data=JSONObject(
117
111
  id=F("id"),
118
112
  slot=F("slot"),
119
- featureset=F("featureset"),
113
+ schema=F("schema"),
120
114
  )
121
115
  )
122
116
  .values("artifact")
@@ -135,25 +129,45 @@ def get_artifact_with_related(
135
129
  if not artifact_meta:
136
130
  return None
137
131
 
138
- related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}}
132
+ related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "schemas": {}}
139
133
  for k, v in artifact_meta.items():
140
- if k.startswith("m2mfield_"):
141
- related_data["m2m"][k[9:]] = v
142
- elif k.startswith("fkfield_"):
134
+ if k.startswith("fkfield_"):
143
135
  related_data["fk"][k[8:]] = v
144
136
  elif k.startswith("linkfield_"):
145
137
  related_data["link"][k[10:]] = v
146
- elif k == "featuresets":
138
+ elif k == "schemas":
147
139
  if v:
148
- related_data["featuresets"] = get_featureset_m2m_relations(
149
- artifact, {i["featureset"]: i["slot"] for i in v}
140
+ related_data["schemas"] = get_schema_m2m_relations(
141
+ artifact, {i["schema"]: i["slot"] for i in v}
150
142
  )
151
143
 
152
- related_data["m2m"] = {
153
- k: {item["id"]: item["name"] for item in v}
154
- for k, v in related_data["m2m"].items()
155
- if v
156
- }
144
+ if len(m2m_relations) == 0:
145
+ m2m_any = False
146
+ else:
147
+ m2m_any_expr = reduce(
148
+ lambda a, b: a | b,
149
+ (Q(**{f"{m2m_name}__isnull": False}) for m2m_name in m2m_relations),
150
+ )
151
+ # this is needed to avoid querying all m2m relations even if they are all empty
152
+ # this checks if non-empty m2m relations are present in the record
153
+ m2m_any = (
154
+ model.objects.using(artifact._state.db)
155
+ .filter(uid=artifact.uid)
156
+ .filter(m2m_any_expr)
157
+ .exists()
158
+ )
159
+ if m2m_any:
160
+ m2m_data = related_data["m2m"]
161
+ for m2m_name in m2m_relations:
162
+ related_model = get_related_model(model, m2m_name)
163
+ name_field = get_name_field(related_model)
164
+ m2m_records = (
165
+ getattr(artifact, m2m_name).values_list("id", name_field).distinct()
166
+ )
167
+ for rec_id, rec_name in m2m_records:
168
+ if m2m_name not in m2m_data:
169
+ m2m_data[m2m_name] = {}
170
+ m2m_data[m2m_name][rec_id] = rec_name
157
171
 
158
172
  return {
159
173
  **{name: artifact_meta[name] for name in ["id", "uid"]},
@@ -161,30 +175,31 @@ def get_artifact_with_related(
161
175
  }
162
176
 
163
177
 
164
- def get_featureset_m2m_relations(
165
- artifact: Artifact, slot_featureset: dict, limit: int = 20
166
- ):
178
+ def get_schema_m2m_relations(artifact: Artifact, slot_schema: dict, limit: int = 20):
167
179
  """Fetch all many-to-many relationships for given feature sets."""
168
180
  from lamindb._can_curate import get_name_field
169
181
 
170
182
  m2m_relations = [
171
183
  v
172
- for v in dict_related_model_to_related_name(FeatureSet).values()
173
- if not v.startswith("_") and v != "artifacts"
184
+ for v in dict_related_model_to_related_name(Schema).values()
185
+ if v is not None and not v.startswith("_") and v != "artifacts"
174
186
  ]
175
187
 
176
188
  annotations = {}
177
189
  related_names = {}
178
190
  for name in m2m_relations:
179
- related_model = get_related_model(FeatureSet, name)
191
+ related_model = get_related_model(Schema, name)
192
+ if related_model is Schema:
193
+ # this is for the `type` field
194
+ continue
180
195
  name_field = get_name_field(related_model)
181
196
 
182
197
  # Get the correct field names for the through table
183
- through_model = getattr(FeatureSet, name).through
198
+ through_model = getattr(Schema, name).through
184
199
 
185
200
  # Subquery to get limited related records
186
201
  limited_related = Subquery(
187
- through_model.objects.filter(featureset=OuterRef("pk")).values(
202
+ through_model.objects.filter(schema=OuterRef("pk")).values(
188
203
  related_model.__name__.lower()
189
204
  )[:limit]
190
205
  )
@@ -198,18 +213,18 @@ def get_featureset_m2m_relations(
198
213
  ),
199
214
  distinct=True,
200
215
  )
201
- related_names[name] = related_model.__get_name_with_schema__()
216
+ related_names[name] = related_model.__get_name_with_module__()
202
217
 
203
- featureset_m2m = (
204
- FeatureSet.objects.using(artifact._state.db)
205
- .filter(id__in=slot_featureset.keys())
218
+ schema_m2m = (
219
+ Schema.objects.using(artifact._state.db)
220
+ .filter(id__in=slot_schema.keys())
206
221
  .annotate(**annotations)
207
222
  .values("id", *annotations.keys())
208
223
  )
209
224
 
210
225
  result = {}
211
- for fs in featureset_m2m:
212
- slot = slot_featureset.get(fs["id"])
226
+ for fs in schema_m2m:
227
+ slot = slot_schema.get(fs["id"])
213
228
  result[fs["id"]] = (
214
229
  slot,
215
230
  {