lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lamindb/__init__.py +14 -5
  2. lamindb/_artifact.py +174 -57
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +85 -51
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +222 -81
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +59 -17
  10. lamindb/_record.py +171 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +33 -10
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +106 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/validation.py +2 -6
  19. lamindb/core/__init__.py +13 -14
  20. lamindb/core/_context.py +39 -36
  21. lamindb/core/_data.py +29 -25
  22. lamindb/core/_describe.py +1 -1
  23. lamindb/core/_django.py +1 -1
  24. lamindb/core/_feature_manager.py +54 -44
  25. lamindb/core/_label_manager.py +4 -4
  26. lamindb/core/_mapped_collection.py +20 -7
  27. lamindb/core/datasets/__init__.py +6 -1
  28. lamindb/core/datasets/_core.py +12 -11
  29. lamindb/core/datasets/_small.py +66 -20
  30. lamindb/core/exceptions.py +1 -90
  31. lamindb/core/loaders.py +7 -13
  32. lamindb/core/relations.py +6 -4
  33. lamindb/core/storage/_anndata_accessor.py +41 -0
  34. lamindb/core/storage/_backed_access.py +2 -2
  35. lamindb/core/storage/_pyarrow_dataset.py +25 -15
  36. lamindb/core/storage/_tiledbsoma.py +56 -12
  37. lamindb/core/storage/paths.py +41 -22
  38. lamindb/core/subsettings/_creation_settings.py +4 -16
  39. lamindb/curators/__init__.py +2168 -833
  40. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  41. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  42. lamindb/errors.py +96 -0
  43. lamindb/integrations/_vitessce.py +3 -3
  44. lamindb/migrations/0069_squashed.py +76 -75
  45. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  46. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  47. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  48. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  49. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  50. lamindb/migrations/0086_various.py +95 -0
  51. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  52. lamindb/migrations/0088_schema_components.py +273 -0
  53. lamindb/migrations/0088_squashed.py +4372 -0
  54. lamindb/models.py +423 -156
  55. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
  56. lamindb-1.1.0.dist-info/RECORD +95 -0
  57. lamindb/curators/_spatial.py +0 -528
  58. lamindb/migrations/0052_squashed.py +0 -1261
  59. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  60. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  61. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  62. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  63. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  64. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  65. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  66. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  67. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  68. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  69. lamindb/migrations/0063_populate_latest_field.py +0 -45
  70. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  71. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  72. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  73. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  74. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  75. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  76. lamindb-1.0.4.dist-info/RECORD +0 -102
  77. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
  78. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
lamindb/core/_context.py CHANGED
@@ -19,15 +19,14 @@ from lamindb.base import ids
19
19
  from lamindb.base.ids import base62_12
20
20
  from lamindb.models import Run, Transform, format_field_value
21
21
 
22
- from ._settings import settings
23
- from ._sync_git import get_transform_reference_from_git_repo
24
- from ._track_environment import track_environment
25
- from .exceptions import (
22
+ from ..errors import (
26
23
  InconsistentKey,
27
- NotebookNotSaved,
28
24
  TrackNotCalled,
29
25
  UpdateContext,
30
26
  )
27
+ from ._settings import settings
28
+ from ._sync_git import get_transform_reference_from_git_repo
29
+ from ._track_environment import track_environment
31
30
  from .versioning import bump_version as bump_version_function
32
31
  from .versioning import increment_base62, message_update_key_in_version_family
33
32
 
@@ -201,6 +200,7 @@ class Context:
201
200
  self._logging_message_track: str = ""
202
201
  self._logging_message_imports: str = ""
203
202
  self._stream_tracker: LogStreamTracker = LogStreamTracker()
203
+ self._is_finish_retry: bool = False
204
204
 
205
205
  @property
206
206
  def transform(self) -> Transform | None:
@@ -307,11 +307,15 @@ class Context:
307
307
  ) = self._track_source_code(path=path)
308
308
  if description is None:
309
309
  description = self._description
310
+ # temporarily until the hub displays the key by default
311
+ # populate the description with the filename again
312
+ if description is None:
313
+ description = self._path.name
310
314
  self._create_or_load_transform(
311
315
  description=description,
312
316
  transform_ref=transform_ref,
313
317
  transform_ref_type=transform_ref_type,
314
- transform_type=transform_type,
318
+ transform_type=transform_type, # type: ignore
315
319
  )
316
320
  else:
317
321
  if transform.type in {"notebook", "script"}:
@@ -348,7 +352,7 @@ class Context:
348
352
  self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
349
353
 
350
354
  if run is None: # create new run
351
- run = Run(
355
+ run = Run( # type: ignore
352
356
  transform=self._transform,
353
357
  params=params,
354
358
  )
@@ -494,15 +498,19 @@ class Context:
494
498
  if aux_transform.key in self._path.as_posix():
495
499
  key = aux_transform.key
496
500
  if (
497
- # if the transform source code wasn't yet saved
498
- aux_transform.source_code is None
499
- # if the transform source code is unchanged
500
- # if aux_transform.type == "notebook", we anticipate the user makes changes to the notebook source code
501
- # in an interactive session, hence we *pro-actively bump* the version number by setting `revises`
502
- # in the second part of the if condition even though the source code is unchanged at point of running track()
503
- or (
504
- aux_transform.hash == hash
505
- and aux_transform.type != "notebook"
501
+ # has to be the same user
502
+ aux_transform.created_by_id == ln_setup.settings.user.id
503
+ and (
504
+ # if the transform source code wasn't yet saved
505
+ aux_transform.source_code is None
506
+ # if the transform source code is unchanged
507
+ # if aux_transform.type == "notebook", we anticipate the user makes changes to the notebook source code
508
+ # in an interactive session, hence we *pro-actively bump* the version number by setting `revises`
509
+ # in the second part of the if condition even though the source code is unchanged at point of running track()
510
+ or (
511
+ aux_transform.hash == hash
512
+ and aux_transform.type != "notebook"
513
+ )
506
514
  )
507
515
  ):
508
516
  uid = aux_transform.uid
@@ -514,9 +522,13 @@ class Context:
514
522
  aux_transform.hash == hash
515
523
  and aux_transform.type == "notebook"
516
524
  ):
517
- message += " -- notebook source code is unchanged, but anticipating changes during this run"
525
+ message += " -- anticipating changes"
518
526
  elif aux_transform.hash != hash:
519
- message += " -- source code changed"
527
+ message += "" # could log "source code changed", but this seems too much
528
+ elif (
529
+ aux_transform.created_by_id != ln_setup.settings.user.id
530
+ ):
531
+ message += f" -- {aux_transform.created_by.handle} already works on this draft"
520
532
  message += f", creating new version '{uid}'"
521
533
  revises = aux_transform
522
534
  found_key = True
@@ -575,7 +587,7 @@ class Context:
575
587
  assert key is not None # noqa: S101
576
588
  raise_update_context = False
577
589
  try:
578
- transform = Transform(
590
+ transform = Transform( # type: ignore
579
591
  uid=self.uid,
580
592
  version=self.version,
581
593
  description=description,
@@ -613,7 +625,7 @@ class Context:
613
625
  and not transform_was_saved
614
626
  ):
615
627
  raise UpdateContext(
616
- f'{transform.created_by.description} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filedescription and `ln.track("{ids.base62_12()}0000")`.'
628
+ f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filedescription and `ln.track("{ids.base62_12()}0000")`.'
617
629
  )
618
630
  # check whether transform source code was already saved
619
631
  if transform_was_saved:
@@ -648,12 +660,12 @@ class Context:
648
660
 
649
661
  - writes a timestamp: `run.finished_at`
650
662
  - saves the source code: `transform.source_code`
663
+ - saves a run report: `run.report`
651
664
 
652
665
  When called in the last cell of a notebook:
653
666
 
667
+ - prompts to save the notebook in your editor right before
654
668
  - prompts for user input if not consecutively executed
655
- - requires to save the notebook in your editor right before
656
- - saves a run report: `run.report`
657
669
 
658
670
  Args:
659
671
  ignore_non_consecutive: Whether to ignore if a notebook was non-consecutively executed.
@@ -670,8 +682,6 @@ class Context:
670
682
 
671
683
  """
672
684
  from lamindb._finish import (
673
- get_save_notebook_message,
674
- get_seconds_since_modified,
675
685
  save_context_core,
676
686
  )
677
687
 
@@ -686,24 +696,17 @@ class Context:
686
696
  self.run.save()
687
697
  # nothing else to do
688
698
  return None
689
- if is_run_from_ipython: # notebooks
690
- import nbproject
691
-
692
- # it might be that the user modifies the title just before ln.finish()
693
- if (
694
- nbproject_title := nbproject.meta.live.title
695
- ) != self.transform.description:
696
- self.transform.description = nbproject_title
697
- self.transform.save()
698
- if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
699
- raise NotebookNotSaved(get_save_notebook_message())
700
- save_context_core(
699
+ return_code = save_context_core(
701
700
  run=self.run,
702
701
  transform=self.run.transform,
703
702
  filepath=self._path,
704
703
  finished_at=True,
705
704
  ignore_non_consecutive=ignore_non_consecutive,
705
+ is_retry=self._is_finish_retry,
706
706
  )
707
+ if return_code == "retry":
708
+ self._is_finish_retry = True
709
+ return None
707
710
  if self.transform.type != "notebook":
708
711
  self._stream_tracker.finish()
709
712
  # reset the context attributes so that somebody who runs `track()` after finish
lamindb/core/_data.py CHANGED
@@ -21,6 +21,8 @@ from lamindb.models import (
21
21
  record_repr,
22
22
  )
23
23
 
24
+ from .._tracked import get_current_tracked_run
25
+ from ..errors import ValidationError
24
26
  from ._context import context
25
27
  from ._django import get_artifact_with_related, get_related_model
26
28
  from ._feature_manager import (
@@ -28,7 +30,6 @@ from ._feature_manager import (
28
30
  get_host_id_field,
29
31
  get_label_links,
30
32
  )
31
- from .exceptions import ValidationError
32
33
  from .relations import (
33
34
  dict_module_name_to_model_name,
34
35
  dict_related_model_to_related_name,
@@ -45,9 +46,12 @@ WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-r
45
46
  WARNING_NO_INPUT = "run input wasn't tracked, call `ln.track()` and re-run"
46
47
 
47
48
 
49
+ # also see current_run() in core._data
48
50
  def get_run(run: Run | None) -> Run | None:
49
51
  if run is None:
50
- run = context.run
52
+ run = get_current_tracked_run()
53
+ if run is None:
54
+ run = context.run
51
55
  if run is None and not settings.creation.artifact_silence_missing_run_warning:
52
56
  logger.warning(WARNING_RUN_TRANSFORM)
53
57
  # suppress run by passing False
@@ -56,26 +60,26 @@ def get_run(run: Run | None) -> Run | None:
56
60
  return run
57
61
 
58
62
 
59
- def save_staged__schemas_m2m(self: Artifact | Collection) -> None:
60
- if hasattr(self, "_staged__schemas_m2m"):
63
+ def save_staged_feature_sets(self: Artifact | Collection) -> None:
64
+ if hasattr(self, "_staged_feature_sets"):
61
65
  from lamindb.core._feature_manager import get_schema_by_slot_
62
66
 
63
- existing_staged__schemas_m2m = get_schema_by_slot_(self)
64
- saved_staged__schemas_m2m = {}
65
- for key, schema in self._staged__schemas_m2m.items():
67
+ existing_staged_feature_sets = get_schema_by_slot_(self)
68
+ saved_staged_feature_sets = {}
69
+ for key, schema in self._staged_feature_sets.items():
66
70
  if isinstance(schema, Schema) and schema._state.adding:
67
71
  schema.save()
68
- saved_staged__schemas_m2m[key] = schema
69
- if key in existing_staged__schemas_m2m:
72
+ saved_staged_feature_sets[key] = schema
73
+ if key in existing_staged_feature_sets:
70
74
  # remove existing feature set on the same slot
71
- self._schemas_m2m.remove(existing_staged__schemas_m2m[key])
72
- if len(saved_staged__schemas_m2m) > 0:
73
- s = "s" if len(saved_staged__schemas_m2m) > 1 else ""
75
+ self.feature_sets.remove(existing_staged_feature_sets[key])
76
+ if len(saved_staged_feature_sets) > 0:
77
+ s = "s" if len(saved_staged_feature_sets) > 1 else ""
74
78
  display_schema_keys = ",".join(
75
- f"'{key}'" for key in saved_staged__schemas_m2m.keys()
79
+ f"'{key}'" for key in saved_staged_feature_sets.keys()
76
80
  )
77
81
  logger.save(
78
- f"saved {len(saved_staged__schemas_m2m)} feature set{s} for slot{s}:"
82
+ f"saved {len(saved_staged_feature_sets)} feature set{s} for slot{s}:"
79
83
  f" {display_schema_keys}"
80
84
  )
81
85
 
@@ -84,16 +88,16 @@ def save_schema_links(self: Artifact | Collection) -> None:
84
88
  from lamindb._save import bulk_create
85
89
 
86
90
  Data = self.__class__
87
- if hasattr(self, "_staged__schemas_m2m"):
91
+ if hasattr(self, "_staged_feature_sets"):
88
92
  links = []
89
93
  host_id_field = get_host_id_field(self)
90
- for slot, schema in self._staged__schemas_m2m.items():
94
+ for slot, schema in self._staged_feature_sets.items():
91
95
  kwargs = {
92
96
  host_id_field: self.id,
93
97
  "schema_id": schema.id,
94
98
  "slot": slot,
95
99
  }
96
- links.append(Data._schemas_m2m.through(**kwargs))
100
+ links.append(Data.feature_sets.through(**kwargs))
97
101
  bulk_create(links, ignore_conflicts=True)
98
102
 
99
103
 
@@ -182,7 +186,7 @@ def _describe_sqlite(self: Artifact | Collection, print_types: bool = False):
182
186
  if isinstance(self, (Collection, Artifact)):
183
187
  many_to_many_fields.append("input_of_runs")
184
188
  if isinstance(self, Artifact):
185
- many_to_many_fields.append("_schemas_m2m")
189
+ many_to_many_fields.append("feature_sets")
186
190
  self = (
187
191
  self.__class__.objects.using(self._state.db)
188
192
  .prefetch_related(*many_to_many_fields)
@@ -335,10 +339,10 @@ def add_labels(
335
339
  else:
336
340
  validate_feature(feature, records) # type:ignore
337
341
  records_by_registry = defaultdict(list)
338
- _schemas_m2m = self._schemas_m2m.filter(itype="Feature").all()
342
+ feature_sets = self.feature_sets.filter(itype="Feature").all()
339
343
  internal_features = set() # type: ignore
340
- if len(_schemas_m2m) > 0:
341
- for schema in _schemas_m2m:
344
+ if len(feature_sets) > 0:
345
+ for schema in feature_sets:
342
346
  internal_features = internal_features.union(
343
347
  set(schema.members.values_list("name", flat=True))
344
348
  ) # type: ignore
@@ -357,7 +361,7 @@ def add_labels(
357
361
  f"Feature {feature.name} needs dtype='cat' for label annotation, currently has dtype='{feature.dtype}'"
358
362
  )
359
363
  if feature.dtype == "cat":
360
- feature.dtype = f"cat[{registry_name}]"
364
+ feature.dtype = f"cat[{registry_name}]" # type: ignore
361
365
  feature.save()
362
366
  elif registry_name not in feature.dtype:
363
367
  new_dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
@@ -386,13 +390,13 @@ def _track_run_input(
386
390
  is_run_input: bool | Run | None = None,
387
391
  run: Run | None = None,
388
392
  ):
389
- # this is an internal hack right now for project-flow, but we can allow this
390
- # for the user in the future
391
393
  if isinstance(is_run_input, Run):
392
394
  run = is_run_input
393
395
  is_run_input = True
394
396
  elif run is None:
395
- run = context.run
397
+ run = get_current_tracked_run()
398
+ if run is None:
399
+ run = context.run
396
400
  # consider that data is an iterable of Data
397
401
  data_iter: Iterable[Artifact] | Iterable[Collection] = (
398
402
  [data] if isinstance(data, (Artifact, Collection)) else data
lamindb/core/_describe.py CHANGED
@@ -76,7 +76,7 @@ def describe_header(self: Artifact | Collection | Run) -> Tree:
76
76
  if self._branch_code == 0:
77
77
  logger.warning("This artifact is hidden.")
78
78
  elif self._branch_code == -1:
79
- logger.warning("This artifact is the trash.")
79
+ logger.warning("This artifact is in the trash.")
80
80
  # initialize tree
81
81
  suffix = self.suffix if hasattr(self, "suffix") and self.suffix else ""
82
82
  accessor = self.otype if hasattr(self, "otype") and self.otype else ""
lamindb/core/_django.py CHANGED
@@ -105,7 +105,7 @@ def get_artifact_with_related(
105
105
 
106
106
  if include_schema:
107
107
  annotations["schemas"] = Subquery(
108
- model._schemas_m2m.through.objects.filter(artifact=OuterRef("pk"))
108
+ model.feature_sets.through.objects.filter(artifact=OuterRef("pk"))
109
109
  .annotate(
110
110
  data=JSONObject(
111
111
  id=F("id"),
@@ -33,8 +33,8 @@ from lamindb._record import (
33
33
  )
34
34
  from lamindb._save import save
35
35
  from lamindb._schema import DICT_KEYS_TYPE, Schema
36
- from lamindb.core.exceptions import DoesNotExist, ValidationError
37
36
  from lamindb.core.storage import LocalPathClasses
37
+ from lamindb.errors import DoesNotExist, ValidationError
38
38
  from lamindb.models import (
39
39
  Artifact,
40
40
  Collection,
@@ -96,8 +96,8 @@ def get_schema_by_slot_(host: Artifact | Collection) -> dict:
96
96
  return {}
97
97
  # if the host is not yet saved
98
98
  if host._state.adding:
99
- if hasattr(host, "_staged__schemas_m2m"):
100
- return host._staged__schemas_m2m
99
+ if hasattr(host, "_staged_feature_sets"):
100
+ return host._staged_feature_sets
101
101
  else:
102
102
  return {}
103
103
  host_db = host._state.db
@@ -105,7 +105,7 @@ def get_schema_by_slot_(host: Artifact | Collection) -> dict:
105
105
  kwargs = {host_id_field: host.id}
106
106
  # otherwise, we need a query
107
107
  links_schema = (
108
- host._schemas_m2m.through.objects.using(host_db)
108
+ host.feature_sets.through.objects.using(host_db)
109
109
  .filter(**kwargs)
110
110
  .select_related("schema")
111
111
  )
@@ -118,7 +118,7 @@ def get_label_links(
118
118
  host_id_field = get_host_id_field(host)
119
119
  kwargs = {host_id_field: host.id, "feature_id": feature.id}
120
120
  link_records = (
121
- getattr(host, host.features._accessor_by_registry[registry])
121
+ getattr(host, host.features._accessor_by_registry[registry]) # type: ignore
122
122
  .through.objects.using(host._state.db)
123
123
  .filter(**kwargs)
124
124
  )
@@ -128,14 +128,14 @@ def get_label_links(
128
128
  def get_schema_links(host: Artifact | Collection) -> QuerySet:
129
129
  host_id_field = get_host_id_field(host)
130
130
  kwargs = {host_id_field: host.id}
131
- links_schema = host._schemas_m2m.through.objects.filter(**kwargs)
131
+ links_schema = host.feature_sets.through.objects.filter(**kwargs)
132
132
  return links_schema
133
133
 
134
134
 
135
135
  def get_link_attr(link: LinkORM | type[LinkORM], data: Artifact | Collection) -> str:
136
136
  link_model_name = link.__class__.__name__
137
137
  if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
138
- link_model_name = link.__name__
138
+ link_model_name = link.__name__ # type: ignore
139
139
  return link_model_name.replace(data.__class__.__name__, "").lower()
140
140
 
141
141
 
@@ -348,10 +348,10 @@ def describe_features(
348
348
 
349
349
  internal_feature_names: dict[str, str] = {}
350
350
  if isinstance(self, Artifact):
351
- _schemas_m2m = self._schemas_m2m.filter(itype="Feature").all()
351
+ feature_sets = self.feature_sets.filter(itype="Feature").all()
352
352
  internal_feature_names = {}
353
- if len(_schemas_m2m) > 0:
354
- for schema in _schemas_m2m:
353
+ if len(feature_sets) > 0:
354
+ for schema in feature_sets:
355
355
  internal_feature_names.update(
356
356
  dict(schema.members.values_list("name", "dtype"))
357
357
  )
@@ -466,7 +466,7 @@ def describe_features(
466
466
  Text.assemble(
467
467
  ("Dataset features", "bold bright_magenta"),
468
468
  ("/", "dim"),
469
- ("._schemas_m2m", "dim bold"),
469
+ ("schema", "dim bold"),
470
470
  )
471
471
  )
472
472
  for child in int_features_tree_children:
@@ -500,7 +500,7 @@ def describe_features(
500
500
  return tree
501
501
 
502
502
 
503
- def parse_staged__schemas_m2m_from_anndata(
503
+ def parse_staged_feature_sets_from_anndata(
504
504
  adata: AnnData,
505
505
  var_field: FieldAttr | None = None,
506
506
  obs_field: FieldAttr = Feature.name,
@@ -524,7 +524,7 @@ def parse_staged__schemas_m2m_from_anndata(
524
524
  if adata.X is None
525
525
  else convert_pandas_dtype_to_lamin_dtype(adata.X.dtype)
526
526
  )
527
- _schemas_m2m = {}
527
+ feature_sets = {}
528
528
  if var_field is not None:
529
529
  logger.info("parsing feature names of X stored in slot 'var'")
530
530
  logger.indent = " "
@@ -537,7 +537,7 @@ def parse_staged__schemas_m2m_from_anndata(
537
537
  raise_validation_error=False,
538
538
  )
539
539
  if schema_var is not None:
540
- _schemas_m2m["var"] = schema_var
540
+ feature_sets["var"] = schema_var
541
541
  logger.save(f"linked: {schema_var}")
542
542
  logger.indent = ""
543
543
  if schema_var is None:
@@ -552,12 +552,12 @@ def parse_staged__schemas_m2m_from_anndata(
552
552
  organism=organism,
553
553
  )
554
554
  if schema_obs is not None:
555
- _schemas_m2m["obs"] = schema_obs
555
+ feature_sets["obs"] = schema_obs
556
556
  logger.save(f"linked: {schema_obs}")
557
557
  logger.indent = ""
558
558
  if schema_obs is None:
559
559
  logger.warning("skip linking features to artifact in slot 'obs'")
560
- return _schemas_m2m
560
+ return feature_sets
561
561
 
562
562
 
563
563
  def is_valid_datetime_str(date_string: str) -> bool | str:
@@ -818,6 +818,8 @@ def _add_values(
818
818
  feature_param_field: The field of a reference registry to map keys of the
819
819
  dictionary.
820
820
  """
821
+ from .._tracked import get_current_tracked_run
822
+
821
823
  # rename to distinguish from the values inside the dict
822
824
  features_values = values
823
825
  keys = features_values.keys()
@@ -849,12 +851,20 @@ def _add_values(
849
851
  (key, infer_feature_type_convert_json(key, features_values[key]))
850
852
  for key in not_validated_keys
851
853
  ]
852
- hint = "\n".join(
853
- [
854
- f" ln.{model_name}(name='{key}', dtype='{dtype}').save(){message}"
855
- for key, (dtype, _, message) in not_validated_keys_dtype_message
856
- ]
857
- )
854
+ run = get_current_tracked_run()
855
+ if run is not None:
856
+ name = f"{run.transform.type}[{run.transform.key}]"
857
+ type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
858
+ elements = [type_hint]
859
+ type_kwarg = f", type={model_name.lower()}_type"
860
+ else:
861
+ elements = []
862
+ type_kwarg = ""
863
+ elements += [
864
+ f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
865
+ for key, (dtype, _, message) in not_validated_keys_dtype_message
866
+ ]
867
+ hint = "\n".join(elements)
858
868
  msg = (
859
869
  f"These keys could not be validated: {not_validated_keys.tolist()}\n"
860
870
  f"Here is how to create a {model_name.lower()}:\n\n{hint}"
@@ -928,7 +938,7 @@ def _add_values(
928
938
  validated_values = values_array[validated]
929
939
  if validated.sum() != len(values):
930
940
  not_validated_values += values_array[~validated].tolist()
931
- label_records = ULabel.from_values(validated_values, field="name")
941
+ label_records = ULabel.from_values(validated_values, field="name") # type: ignore
932
942
  features_labels["ULabel"] += [
933
943
  (feature, label_record) for label_record in label_records
934
944
  ]
@@ -1012,8 +1022,8 @@ def remove_values(
1012
1022
  if isinstance(feature, str):
1013
1023
  feature = Feature.get(name=feature)
1014
1024
  filter_kwargs = {"feature": feature}
1015
- if feature.dtype.startswith("cat["):
1016
- feature_registry = feature.dtype.replace("cat[", "").replace("]", "")
1025
+ if feature.dtype.startswith("cat["): # type: ignore
1026
+ feature_registry = feature.dtype.replace("cat[", "").replace("]", "") # type: ignore
1017
1027
  if value is not None:
1018
1028
  assert isinstance(value, Record) # noqa: S101
1019
1029
  # the below uses our convention for field names in link models
@@ -1071,12 +1081,12 @@ def add_schema(self, schema: Schema, slot: str) -> None:
1071
1081
  "slot": slot,
1072
1082
  }
1073
1083
  link_record = (
1074
- self._host._schemas_m2m.through.objects.using(host_db)
1084
+ self._host.feature_sets.through.objects.using(host_db)
1075
1085
  .filter(**kwargs)
1076
1086
  .one_or_none()
1077
1087
  )
1078
1088
  if link_record is None:
1079
- self._host._schemas_m2m.through(**kwargs).save(using=host_db)
1089
+ self._host.feature_sets.through(**kwargs).save(using=host_db)
1080
1090
  if slot in self._schema_by_slot:
1081
1091
  logger.debug(f"replaced existing {slot} feature set")
1082
1092
  self._schema_by_slot_[slot] = schema # type: ignore
@@ -1101,7 +1111,7 @@ def _add_set_from_df(
1101
1111
  mute=mute,
1102
1112
  organism=organism,
1103
1113
  )
1104
- self._host._staged__schemas_m2m = {"columns": schema}
1114
+ self._host._staged_feature_sets = {"columns": schema}
1105
1115
  self._host.save()
1106
1116
 
1107
1117
 
@@ -1120,7 +1130,7 @@ def _add_set_from_anndata(
1120
1130
 
1121
1131
  # parse and register features
1122
1132
  adata = self._host.load()
1123
- _schemas_m2m = parse_staged__schemas_m2m_from_anndata(
1133
+ feature_sets = parse_staged_feature_sets_from_anndata(
1124
1134
  adata,
1125
1135
  var_field=var_field,
1126
1136
  obs_field=obs_field,
@@ -1129,7 +1139,7 @@ def _add_set_from_anndata(
1129
1139
  )
1130
1140
 
1131
1141
  # link feature sets
1132
- self._host._staged__schemas_m2m = _schemas_m2m
1142
+ self._host._staged_feature_sets = feature_sets
1133
1143
  self._host.save()
1134
1144
 
1135
1145
 
@@ -1150,12 +1160,12 @@ def _add_set_from_mudata(
1150
1160
 
1151
1161
  # parse and register features
1152
1162
  mdata = self._host.load()
1153
- _schemas_m2m = {}
1154
- obs_features = Feature.from_values(mdata.obs.columns)
1163
+ feature_sets = {}
1164
+ obs_features = Feature.from_values(mdata.obs.columns) # type: ignore
1155
1165
  if len(obs_features) > 0:
1156
- _schemas_m2m["obs"] = Schema(features=obs_features)
1166
+ feature_sets["obs"] = Schema(features=obs_features)
1157
1167
  for modality, field in var_fields.items():
1158
- modality_fs = parse_staged__schemas_m2m_from_anndata(
1168
+ modality_fs = parse_staged_feature_sets_from_anndata(
1159
1169
  mdata[modality],
1160
1170
  var_field=field,
1161
1171
  obs_field=obs_fields.get(modality, Feature.name),
@@ -1163,22 +1173,22 @@ def _add_set_from_mudata(
1163
1173
  organism=organism,
1164
1174
  )
1165
1175
  for k, v in modality_fs.items():
1166
- _schemas_m2m[f"['{modality}'].{k}"] = v
1176
+ feature_sets[f"['{modality}'].{k}"] = v
1167
1177
 
1168
- def unify_staged__schemas_m2m_by_hash(_schemas_m2m):
1178
+ def unify_staged_feature_sets_by_hash(feature_sets):
1169
1179
  unique_values = {}
1170
1180
 
1171
- for key, value in _schemas_m2m.items():
1181
+ for key, value in feature_sets.items():
1172
1182
  value_hash = value.hash # Assuming each value has a .hash attribute
1173
1183
  if value_hash in unique_values:
1174
- _schemas_m2m[key] = unique_values[value_hash]
1184
+ feature_sets[key] = unique_values[value_hash]
1175
1185
  else:
1176
1186
  unique_values[value_hash] = value
1177
1187
 
1178
- return _schemas_m2m
1188
+ return feature_sets
1179
1189
 
1180
1190
  # link feature sets
1181
- self._host._staged__schemas_m2m = unify_staged__schemas_m2m_by_hash(_schemas_m2m)
1191
+ self._host._staged_feature_sets = unify_staged_feature_sets_by_hash(feature_sets)
1182
1192
  self._host.save()
1183
1193
 
1184
1194
 
@@ -1188,7 +1198,7 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
1188
1198
  if transfer_logs is None:
1189
1199
  transfer_logs = {"mapped": [], "transferred": [], "run": None}
1190
1200
  using_key = settings._using_key
1191
- for slot, schema in data.features._schema_by_slot.items():
1201
+ for slot, schema in data.features._schema_by_slot.items(): # type: ignore
1192
1202
  members = schema.members
1193
1203
  if len(members) == 0:
1194
1204
  continue
@@ -1248,8 +1258,8 @@ def make_external(self, feature: Feature) -> None:
1248
1258
  """
1249
1259
  if not isinstance(feature, Feature):
1250
1260
  raise TypeError("feature must be a Feature record!")
1251
- _schemas_m2m = Schema.filter(features=feature).all()
1252
- for fs in _schemas_m2m:
1261
+ feature_sets = Schema.filter(features=feature).all()
1262
+ for fs in feature_sets:
1253
1263
  f = Feature.filter(uid=feature.uid).all()
1254
1264
  features_updated = fs.members.difference(f)
1255
1265
  if len(features_updated) > 0:
@@ -1266,10 +1276,10 @@ def make_external(self, feature: Feature) -> None:
1266
1276
  if len(features_updated) == 0:
1267
1277
  logger.warning(f"deleting empty feature set: {fs}")
1268
1278
  fs.artifacts.set([])
1269
- fs._artifacts_m2m.set([])
1270
1279
  fs.delete()
1271
1280
 
1272
1281
 
1282
+ # mypy: ignore-errors
1273
1283
  FeatureManager.__init__ = __init__
1274
1284
  ParamManager.__init__ = __init__
1275
1285
  FeatureManager.__repr__ = __repr__
@@ -35,7 +35,7 @@ if TYPE_CHECKING:
35
35
  from lamindb._query_set import QuerySet
36
36
  from lamindb.models import Artifact, Collection, Record
37
37
 
38
- EXCLUDE_LABELS = {"_schemas_m2m"}
38
+ EXCLUDE_LABELS = {"feature_sets"}
39
39
 
40
40
 
41
41
  def _get_labels(
@@ -106,7 +106,7 @@ def describe_labels(
106
106
  pad_edge=False,
107
107
  )
108
108
  for related_name, labels in labels_data.items():
109
- if not labels or related_name == "_schemas_m2m":
109
+ if not labels or related_name == "feature_sets":
110
110
  continue
111
111
  if isinstance(labels, dict): # postgres, labels are a dict[id, name]
112
112
  print_values = _format_values(labels.values(), n=10, quotes=False)
@@ -286,12 +286,12 @@ class LabelManager:
286
286
  )
287
287
  for feature in new_features:
288
288
  transfer_to_default_db(
289
- feature,
289
+ feature, # type: ignore
290
290
  using_key,
291
291
  transfer_logs=transfer_logs,
292
292
  transfer_fk=False,
293
293
  )
294
- save(new_features)
294
+ save(new_features) # type: ignore
295
295
  if hasattr(self._host, related_name):
296
296
  for feature_name, feature_labels in labels_by_features.items():
297
297
  if feature_name is not None: