lamindb 1.12.1__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- """A data lakehouse for biology.
1
+ """A data framework for biology.
2
2
 
3
3
  Data lineage
4
4
  ============
@@ -110,7 +110,7 @@ Backwards compatibility.
110
110
 
111
111
  # ruff: noqa: I001
112
112
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
113
- __version__ = "1.12.1"
113
+ __version__ = "1.13.0"
114
114
 
115
115
  import warnings as _warnings
116
116
 
lamindb/_finish.py CHANGED
@@ -495,7 +495,7 @@ def save_context_core(
495
495
  )
496
496
 
497
497
  logger.important(
498
- f"finished Run('{run.uid[:8]}') after {formatted_run_time} at {format_field_value(run.finished_at)}"
498
+ f"finished Run('{run.uid}') after {formatted_run_time} at {format_field_value(run.finished_at)}"
499
499
  )
500
500
  if ln_setup.settings.instance.is_on_hub:
501
501
  instance_slug = ln_setup.settings.instance.slug
lamindb/_tracked.py CHANGED
@@ -4,9 +4,8 @@ from contextvars import ContextVar
4
4
  from datetime import datetime, timezone
5
5
  from typing import Callable, ParamSpec, TypeVar
6
6
 
7
- from .core._context import context
7
+ from .core._context import context, serialize_params_to_json
8
8
  from .models import Run, Transform
9
- from .models._feature_manager import infer_feature_type_convert_json
10
9
 
11
10
  P = ParamSpec("P")
12
11
  R = TypeVar("R")
@@ -92,26 +91,15 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
92
91
  run = Run(transform=transform, initiated_by_run=initiated_by_run) # type: ignore
93
92
  run.started_at = datetime.now(timezone.utc)
94
93
  run._status_code = -1 # started
95
- run.save()
96
94
 
97
95
  # Bind arguments to get a mapping of parameter names to values
98
96
  bound_args = sig.bind(*args, **kwargs)
99
97
  bound_args.apply_defaults()
100
98
  params = dict(bound_args.arguments)
101
99
 
102
- # Remove the run parameter if it exists (we'll inject our own)
103
- params.pop("run", None)
104
-
105
- # Deal with non-trivial parameter values
106
- filtered_params = {}
107
- for key, value in params.items():
108
- dtype, _, _ = infer_feature_type_convert_json(key, value)
109
- if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
110
- continue
111
- filtered_params[key] = value
112
-
113
100
  # Add parameters to the run
114
- run.features.add_values(filtered_params)
101
+ run.params = serialize_params_to_json(params)
102
+ run.save()
115
103
 
116
104
  # Set the run in context and execute function
117
105
  token = current_tracked_run.set(run)
lamindb/core/_context.py CHANGED
@@ -23,7 +23,8 @@ from ..errors import (
23
23
  TrackNotCalled,
24
24
  UpdateContext,
25
25
  )
26
- from ..models import Run, Transform, format_field_value
26
+ from ..models import Run, SQLRecord, Transform, format_field_value
27
+ from ..models._feature_manager import infer_feature_type_convert_json
27
28
  from ..models._is_versioned import bump_version as bump_version_function
28
29
  from ..models._is_versioned import (
29
30
  increment_base62,
@@ -235,6 +236,22 @@ class LogStreamTracker:
235
236
  self.original_excepthook(exc_type, exc_value, exc_traceback)
236
237
 
237
238
 
239
+ def serialize_params_to_json(params: dict) -> dict:
240
+ serialized_params = {}
241
+ for key, value in params.items():
242
+ if isinstance(value, SQLRecord):
243
+ value = f"{value.__class__.__get_name_with_module__()}[{value.uid}]"
244
+ else:
245
+ dtype, _, _ = infer_feature_type_convert_json(key, value)
246
+ if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
247
+ logger.warning(
248
+ f"skipping param {key} because dtype not JSON serializable"
249
+ )
250
+ continue
251
+ serialized_params[key] = value
252
+ return serialized_params
253
+
254
+
238
255
  class Context:
239
256
  """Run context.
240
257
 
@@ -325,6 +342,7 @@ class Context:
325
342
  project: str | Project | None = None,
326
343
  space: str | Space | None = None,
327
344
  branch: str | Branch | None = None,
345
+ features: dict | None = None,
328
346
  params: dict | None = None,
329
347
  new_run: bool | None = None,
330
348
  path: str | None = None,
@@ -343,7 +361,8 @@ class Context:
343
361
  Default: the `"all"` space. Note that bionty entities ignore this setting and always get written to the `"all"` space.
344
362
  If you want to manually move entities to a different space, set the `.space` field (:doc:`docs:access`).
345
363
  branch: A branch (or its `name` or `uid`) on which to store records.
346
- params: A dictionary of parameters to track for the run.
364
+ features: A dictionary of features & values to track for the run.
365
+ params: A dictionary of params & values to track for the run.
347
366
  new_run: If `False`, loads the latest run of transform
348
367
  (default notebook), if `True`, creates new run (default non-notebook).
349
368
  path: Filepath of notebook or script. Only needed if it can't be
@@ -465,10 +484,14 @@ class Context:
465
484
  transform_exists = Transform.filter(id=transform.id).first()
466
485
  if transform_exists is None:
467
486
  transform.save()
468
- self._logging_message_track += f"created Transform('{transform.uid}')"
487
+ self._logging_message_track += (
488
+ f"created Transform('{transform.uid}', key='{transform.key}')"
489
+ )
469
490
  transform_exists = transform
470
491
  else:
471
- self._logging_message_track += f"loaded Transform('{transform.uid}')"
492
+ self._logging_message_track += (
493
+ f"loaded Transform('{transform.uid}', key='{transform.key}')"
494
+ )
472
495
  self._transform = transform_exists
473
496
 
474
497
  if new_run is None: # for notebooks, default to loading latest runs
@@ -493,25 +516,26 @@ class Context:
493
516
  if run is not None: # loaded latest run
494
517
  run.started_at = datetime.now(timezone.utc) # update run time
495
518
  run._status_code = -2 # re-started
496
- self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
519
+ self._logging_message_track += f", re-started Run('{run.uid}') at {format_field_value(run.started_at)}"
497
520
 
498
521
  if run is None: # create new run
499
- run = Run( # type: ignore
500
- transform=self._transform,
501
- params=params,
502
- )
522
+ run = Run(transform=self._transform)
503
523
  run.started_at = datetime.now(timezone.utc)
504
524
  run._status_code = -1 # started
505
- self._logging_message_track += f", started new Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
525
+ self._logging_message_track += f", started new Run('{run.uid}') at {format_field_value(run.started_at)}"
506
526
  # can only determine at ln.finish() if run was consecutive in
507
527
  # interactive session, otherwise, is consecutive
508
528
  run.is_consecutive = True if is_run_from_ipython else None
509
- # need to save in all cases
510
- run.save()
511
529
  if params is not None:
512
- run.features.add_values(params)
530
+ run.params = serialize_params_to_json(params)
513
531
  self._logging_message_track += "\n→ params: " + ", ".join(
514
- f"{key}={value}" for key, value in params.items()
532
+ f"{key}={value}" for key, value in run.params.items()
533
+ )
534
+ run.save() # need to save now
535
+ if features is not None:
536
+ run.features.add_values(features)
537
+ self._logging_message_track += "\n→ features: " + ", ".join(
538
+ f"{key}={value}" for key, value in features.items()
515
539
  )
516
540
  self._run = run
517
541
  track_python_environment(run)
@@ -835,7 +859,9 @@ class Context:
835
859
  reference_type=transform_ref_type,
836
860
  type=transform_type,
837
861
  ).save()
838
- self._logging_message_track += f"created Transform('{transform.uid}')"
862
+ self._logging_message_track += (
863
+ f"created Transform('{transform.uid}', key='{transform.key}')"
864
+ )
839
865
  else:
840
866
  uid = transform.uid
841
867
  # transform was already saved via `finish()`
@@ -874,9 +900,7 @@ class Context:
874
900
  if transform_hash != transform.hash:
875
901
  bump_revision = True
876
902
  else:
877
- self._logging_message_track += (
878
- f"loaded Transform('{transform.uid}')"
879
- )
903
+ self._logging_message_track += f"loaded Transform('{transform.uid}', key='{transform.key}')"
880
904
  if bump_revision:
881
905
  change_type = (
882
906
  "re-running notebook with already-saved source code"
@@ -890,7 +914,9 @@ class Context:
890
914
  f'✗ {change_type}, please update the `uid` argument in `track()` to "{uid[:-4]}{increment_base62(uid[-4:])}"'
891
915
  )
892
916
  else:
893
- self._logging_message_track += f"loaded Transform('{transform.uid}')"
917
+ self._logging_message_track += (
918
+ f"loaded Transform('{transform.uid}', key='{transform.key}')"
919
+ )
894
920
  self._transform = transform
895
921
 
896
922
  def _finish(self, ignore_non_consecutive: None | bool = None) -> None:
@@ -924,7 +924,7 @@ class SpatialDataCatManager(CatManager):
924
924
  )
925
925
 
926
926
 
927
- class TiledbsomaCatManager(CatManager):
927
+ class TiledbsomaCatManager(CatManager): # pragma: no cover
928
928
  """Categorical manager for `tiledbsoma.Experiment`."""
929
929
 
930
930
  def __init__(
lamindb/curators/core.py CHANGED
@@ -289,13 +289,7 @@ class Curator:
289
289
 
290
290
  artifact_info = ""
291
291
  if self._artifact is not None:
292
- artifact_uid = getattr(self._artifact, "uid", str(self._artifact))
293
- short_uid = (
294
- str(artifact_uid)[:8] + "..."
295
- if len(str(artifact_uid)) > 8
296
- else str(artifact_uid)
297
- )
298
- artifact_info = f", artifact: {colors.italic(short_uid)}"
292
+ artifact_info = f", artifact: {colors.italic(self._artifact.uid)}"
299
293
 
300
294
  return (
301
295
  f"{cls_name}{artifact_info}(Schema: {schema_str}{extra_info}{status_str})"
@@ -337,7 +331,7 @@ class SlotsCurator(Curator):
337
331
  def validate(self) -> None:
338
332
  """{}""" # noqa: D415
339
333
  for slot, curator in self._slots.items():
340
- logger.info(f"validating slot {slot} ...")
334
+ logger.debug(f"validating slot {slot} ...")
341
335
  curator.validate()
342
336
  # set _is_validated to True as no slot raised an error
343
337
  self._is_validated = True
@@ -403,6 +397,16 @@ class SlotsCurator(Curator):
403
397
  )
404
398
 
405
399
 
400
+ def convert_dict_to_dataframe_for_validation(d: dict, schema: Schema) -> pd.DataFrame:
401
+ """Convert a dictionary to a DataFrame for validation against a schema."""
402
+ df = pd.DataFrame([d])
403
+ for feature in schema.members:
404
+ if feature.dtype.startswith("cat"):
405
+ if feature.name in df.columns:
406
+ df[feature.name] = pd.Categorical(df[feature.name])
407
+ return df
408
+
409
+
406
410
  # This is also currently used as DictCurator by flattening dictionaries into wide DataFrames.
407
411
  # Such an approach was never intended and there is room for a DictCurator in the future.
408
412
  # For more context, read https://laminlabs.slack.com/archives/C07DB677JF6/p1753994077716099 and
@@ -702,13 +706,11 @@ class DataFrameCurator(SlotsCurator):
702
706
  ) -> None:
703
707
  super().__init__(dataset=dataset, schema=schema)
704
708
 
705
- # Create atomic curator for features only
706
- if len(self._schema.features.all()) > 0:
707
- self._atomic_curator = ComponentCurator(
708
- dataset=dataset,
709
- schema=schema,
710
- slot=slot,
711
- )
709
+ self._atomic_curator = ComponentCurator(
710
+ dataset=dataset,
711
+ schema=schema,
712
+ slot=slot,
713
+ )
712
714
 
713
715
  # Handle (nested) attrs
714
716
  if slot is None and schema.slots:
@@ -724,11 +726,11 @@ class DataFrameCurator(SlotsCurator):
724
726
  data = _resolve_schema_slot_path(
725
727
  attrs_dict, deeper_keys, slot_name, "attrs"
726
728
  )
727
- df = pd.DataFrame([data])
729
+ df = convert_dict_to_dataframe_for_validation(data, slot_schema)
728
730
  self._slots[slot_name] = ComponentCurator(
729
731
  df, slot_schema, slot=slot_name
730
732
  )
731
- else:
733
+ elif slot_name != "__external__":
732
734
  raise ValueError(
733
735
  f"Slot '{slot_name}' is not supported for DataFrameCurator. Must be 'attrs'."
734
736
  )
@@ -783,6 +785,26 @@ class DataFrameCurator(SlotsCurator):
783
785
  )
784
786
 
785
787
 
788
+ class ExperimentalDictCurator(DataFrameCurator):
789
+ """Curator for `dict` based on `DataFrameCurator`."""
790
+
791
+ def __init__(
792
+ self,
793
+ dataset: dict | Artifact,
794
+ schema: Schema,
795
+ slot: str | None = None,
796
+ ) -> None:
797
+ if not isinstance(dataset, dict) and not isinstance(dataset, Artifact):
798
+ raise InvalidArgument("The dataset must be a dict or dict-like artifact.")
799
+ if isinstance(dataset, Artifact):
800
+ assert dataset.otype == "dict", "Artifact must be of otype 'dict'." # noqa: S101
801
+ d = dataset.load(is_run_input=False)
802
+ else:
803
+ d = dataset
804
+ df = convert_dict_to_dataframe_for_validation(d, schema)
805
+ super().__init__(df, schema, slot=slot)
806
+
807
+
786
808
  def _resolve_schema_slot_path(
787
809
  target_dict: dict[str, Any], slot_keys: Iterable[str], slot: str, base_path: str
788
810
  ) -> Any:
@@ -803,13 +825,18 @@ def _resolve_schema_slot_path(
803
825
  base_path += f"['{key}']"
804
826
  try:
805
827
  current = current[key]
806
- except KeyError:
828
+ except (
829
+ KeyError,
830
+ TypeError,
831
+ ): # if not a dict, raises TypeError; if a dict and key not found, raises KeyError
807
832
  available = (
808
- list(current.keys()) if isinstance(current, dict) else "not a dict"
833
+ list(current.keys())
834
+ if isinstance(current, dict)
835
+ else "none (not a dict)"
809
836
  )
810
837
  raise InvalidArgument(
811
838
  f"Schema slot '{slot}' requires keys {base_path} but key '{key}' "
812
- f"not found. Available keys at this level: {available}"
839
+ f"not found. Available keys at this level: {available}."
813
840
  ) from None
814
841
 
815
842
  return current
@@ -1478,7 +1505,10 @@ class CatVector:
1478
1505
  if type_record is not None:
1479
1506
  # if subtype_str is set, we need to set the type for new records
1480
1507
  init_kwargs["type"] = type_record
1481
- non_validated_records.append(registry(**init_kwargs, **create_kwargs))
1508
+ # here we create non-validated records skipping validation since we already ensured that they don't exist
1509
+ non_validated_records.append(
1510
+ registry(**init_kwargs, **create_kwargs, _skip_validation=True)
1511
+ )
1482
1512
  if len(non_validated_records) > 0:
1483
1513
  ln_save(non_validated_records)
1484
1514
  model_field = colors.italic(registry.__get_name_with_module__())
lamindb/errors.py CHANGED
@@ -52,6 +52,12 @@ class UnknownStorageLocation(Exception):
52
52
  pass
53
53
 
54
54
 
55
+ class NoStorageLocationForSpace(Exception):
56
+ """No storage location found for space."""
57
+
58
+ pass
59
+
60
+
55
61
  # equivalent to Django's DoesNotExist
56
62
  # and SQLAlchemy's NoResultFound
57
63
  class DoesNotExist(Exception):
@@ -267,7 +267,7 @@ def anndata_file_pbmc68k_test() -> Path:
267
267
 
268
268
  To reproduce::
269
269
 
270
- pbmc68k = ln.core.datasets.anndata_pbmc68k_reduced()
270
+ pbmc68k = ln.examples.datasets.anndata_pbmc68k_reduced()
271
271
  pbmc68k_test = pbmc68k[:30, :200].copy()
272
272
  pbmc68k_test.raw = pbmc68k_test[:, :100]
273
273
  pbmc68k_test.obsp["test"] = sparse.eye(pbmc68k_test.shape[0], format="csr")
@@ -9,29 +9,11 @@
9
9
  lightning
10
10
  """
11
11
 
12
- from typing import Any
13
-
14
-
15
- def __getattr__(attr_name: str) -> Any:
16
- # Defers import until accessed to avoid requiring PyTorch Lightning
17
- if attr_name == "lightning":
18
- from lamindb.integrations import _lightning
19
-
20
- return _lightning
21
- raise AttributeError(f"module has no attribute {attr_name!r}")
22
-
23
-
24
12
  from lamindb.core.storage import save_tiledbsoma_experiment
25
13
 
26
14
  from ._croissant import curate_from_croissant
27
15
  from ._vitessce import save_vitessce_config
28
16
 
29
-
30
- def __dir__():
31
- # Makes lazy imports discoverable to dir() to enable autocomplete including lazy modules
32
- return __all__
33
-
34
-
35
17
  __all__ = [
36
18
  "lightning",
37
19
  "save_tiledbsoma_experiment",
@@ -1,4 +1,4 @@
1
- """PyTorch Lightning integrations.
1
+ """PyTorch Lightning.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree: .
@@ -20,21 +20,24 @@ class Callback(pl.Callback):
20
20
 
21
21
  Creates version families of artifacts for given `key` (relative file path).
22
22
 
23
+ See also: :doc:`docs:mlflow` & :doc:`docs:wandb`.
24
+
23
25
  Args:
24
- path: Path to the checkpoint
25
- key: Artifact key
26
- features: Additional feature values that every checkpoint gets annotated by.
26
+ path: A local path to the checkpoint.
27
+ key: The `key` for the checkpoint artifact.
28
+ features: Features to annotate the checkpoint.
27
29
 
28
30
  Examples:
29
31
 
30
- Create a callback which creates artifacts for checkpoints and annotates them by the MLflow run ID
32
+ Create a callback that creates artifacts for checkpoints and annotates them by the MLflow run ID::
31
33
 
32
- lamindb_callback = ln.integrations.lightning.Callback(
33
- path=checkpoint_filename, key=artifact_key, annotate_by={ "mlflow_run_id": mlflow_run.info.run_id }
34
- )
35
- trainer = pl.Trainer(
36
- callbacks=[lamindb_callback]
34
+ import lightning as pl
35
+ from lamindb.integrations import lightning as ll
36
+
37
+ lamindb_callback = ll.Callback(
38
+ path=checkpoint_filename, key=artifact_key, features={"mlflow_run_id": mlflow_run.info.run_id}
37
39
  )
40
+ trainer = pl.Trainer(callbacks=[lamindb_callback])
38
41
  """
39
42
 
40
43
  def __init__(
@@ -0,0 +1,17 @@
1
+ # Generated by Django 5.2 on 2025-10-13 07:42
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+ dependencies = [
8
+ ("lamindb", "0133_artifactuser_artifact_users"),
9
+ ]
10
+
11
+ operations = [
12
+ migrations.AddField(
13
+ model_name="run",
14
+ name="params",
15
+ field=models.JSONField(null=True),
16
+ ),
17
+ ]