lamindb 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/models/record.py CHANGED
@@ -58,11 +58,11 @@ from lamindb_setup._connect_instance import (
58
58
  update_db_using_local,
59
59
  )
60
60
  from lamindb_setup.core._docs import doc_args
61
- from lamindb_setup.core._hub_core import connect_instance_hub
61
+ from lamindb_setup.core._hub_core import access_db, connect_instance_hub
62
62
  from lamindb_setup.core._settings_store import instance_settings_file
63
+ from lamindb_setup.core.django import db_token_manager
63
64
  from lamindb_setup.core.upath import extract_suffix_from_path
64
65
 
65
- from lamindb.base import deprecated
66
66
  from lamindb.base.fields import (
67
67
  CharField,
68
68
  DateTimeField,
@@ -186,8 +186,7 @@ def update_attributes(record: Record, attributes: dict[str, str]):
186
186
  if (
187
187
  getattr(record, key) != value
188
188
  and value is not None
189
- and key != "dtype"
190
- and key != "_aux"
189
+ and key not in {"dtype", "otype", "_aux"}
191
190
  ):
192
191
  logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
193
192
  setattr(record, key, value)
@@ -370,6 +369,8 @@ class Registry(ModelBase):
370
369
  Note: `Registry` inherits from Django's `ModelBase`.
371
370
  """
372
371
 
372
+ _available_fields: set[str] = None
373
+
373
374
  def __new__(cls, name, bases, attrs, **kwargs):
374
375
  new_class = super().__new__(cls, name, bases, attrs, **kwargs)
375
376
  return new_class
@@ -487,10 +488,12 @@ class Registry(ModelBase):
487
488
  - Guide: :doc:`docs:registries`
488
489
  - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
489
490
 
490
- Examples::
491
+ Examples:
492
+
493
+ ::
491
494
 
492
- ulabel = ln.ULabel.get("FvtpPJLJ")
493
- ulabel = ln.ULabel.get(name="my-label")
495
+ ulabel = ln.ULabel.get("FvtpPJLJ")
496
+ ulabel = ln.ULabel.get(name="my-label")
494
497
  """
495
498
  from .query_set import QuerySet
496
499
 
@@ -594,17 +597,18 @@ class Registry(ModelBase):
594
597
  """
595
598
  from .query_set import QuerySet
596
599
 
597
- if instance is None:
600
+ # we're in the default instance
601
+ if instance is None or instance == "default":
598
602
  return QuerySet(model=cls, using=None)
599
-
600
603
  owner, name = get_owner_name_from_identifier(instance)
601
- if f"{owner}/{name}" == setup_settings.instance.slug:
604
+ if [owner, name] == setup_settings.instance.slug.split("/"):
602
605
  return QuerySet(model=cls, using=None)
603
606
 
604
- settings_file = instance_settings_file(name, owner)
605
- cache_filepath = (
606
- ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
607
+ # move on to different instances
608
+ cache_using_filepath = (
609
+ setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
607
610
  )
611
+ settings_file = instance_settings_file(name, owner)
608
612
  if not settings_file.exists():
609
613
  result = connect_instance_hub(owner=owner, name=name)
610
614
  if isinstance(result, str):
@@ -612,23 +616,47 @@ class Registry(ModelBase):
612
616
  f"Failed to load instance {instance}, please check your permissions!"
613
617
  )
614
618
  iresult, _ = result
615
- source_module = {
616
- modules for modules in iresult["schema_str"].split(",") if modules != ""
617
- } # type: ignore
618
- target_module = ln_setup.settings.instance.modules
619
- if not source_module.issubset(target_module):
620
- missing_members = source_module - target_module
621
- logger.warning(
622
- f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
623
- )
624
- cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
625
- settings_file = instance_settings_file(name, owner)
619
+ # do not use {} syntax below, it gives rise to a dict if the schema modules
620
+ # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
621
+ source_modules = set( # noqa
622
+ [mod for mod in iresult["schema_str"].split(",") if mod != ""]
623
+ )
624
+ # this just retrives the full connection string from iresult
626
625
  db = update_db_using_local(iresult, settings_file)
626
+ cache_using_filepath.write_text(
627
+ f"{iresult['lnid']}\n{iresult['schema_str']}"
628
+ )
629
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
630
+ is_fine_grained_access = (
631
+ iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
632
+ )
633
+ # access_db can take both: the dict from connect_instance_hub and isettings
634
+ into_access_db = iresult
627
635
  else:
628
636
  isettings = load_instance_settings(settings_file)
637
+ source_modules = isettings.modules
629
638
  db = isettings.db
630
- cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
639
+ cache_using_filepath.write_text(
640
+ f"{isettings.uid}\n{','.join(source_modules)}"
641
+ )
642
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
643
+ is_fine_grained_access = (
644
+ isettings._fine_grained_access and isettings._db_permissions == "jwt"
645
+ )
646
+ # access_db can take both: the dict from connect_instance_hub and isettings
647
+ into_access_db = isettings
648
+
649
+ target_modules = setup_settings.instance.modules
650
+ if not (missing_members := source_modules - target_modules):
651
+ logger.warning(
652
+ f"source modules has additional modules: {missing_members}\n"
653
+ "consider mounting these registry modules to transfer all metadata"
654
+ )
655
+
631
656
  add_db_connection(db, instance)
657
+ if is_fine_grained_access:
658
+ db_token = access_db(into_access_db)
659
+ db_token_manager.set(db_token, instance)
632
660
  return QuerySet(model=cls, using=instance)
633
661
 
634
662
  def __get_module_name__(cls) -> str:
@@ -638,10 +666,6 @@ class Registry(ModelBase):
638
666
  module_name = "core"
639
667
  return module_name
640
668
 
641
- @deprecated("__get_module_name__")
642
- def __get_schema_name__(cls) -> str:
643
- return cls.__get_module_name__()
644
-
645
669
  def __get_name_with_module__(cls) -> str:
646
670
  module_name = cls.__get_module_name__()
647
671
  if module_name == "core":
@@ -650,9 +674,19 @@ class Registry(ModelBase):
650
674
  module_prefix = f"{module_name}."
651
675
  return f"{module_prefix}{cls.__name__}"
652
676
 
653
- @deprecated("__get_name_with_module__")
654
- def __get_name_with_schema__(cls) -> str:
655
- return cls.__get_name_with_module__()
677
+ def __get_available_fields__(cls) -> set[str]:
678
+ if cls._available_fields is None:
679
+ cls._available_fields = {
680
+ f.name
681
+ for f in cls._meta.get_fields()
682
+ if not f.name.startswith("_")
683
+ and not f.name.startswith("links_")
684
+ and not f.name.endswith("_id")
685
+ }
686
+ if cls.__name__ == "Artifact":
687
+ cls._available_fields.add("visibility")
688
+ cls._available_fields.add("transform")
689
+ return cls._available_fields
656
690
 
657
691
 
658
692
  class BasicRecord(models.Model, metaclass=Registry):
@@ -668,75 +702,86 @@ class BasicRecord(models.Model, metaclass=Registry):
668
702
 
669
703
  def __init__(self, *args, **kwargs):
670
704
  skip_validation = kwargs.pop("_skip_validation", False)
671
- if not args and skip_validation:
672
- super().__init__(**kwargs)
673
- elif not args and not skip_validation:
674
- from ..core._settings import settings
675
- from .can_curate import CanCurate
676
- from .collection import Collection
677
- from .schema import Schema
678
- from .transform import Transform
679
-
680
- validate_fields(self, kwargs)
681
-
682
- # do not search for names if an id is passed; this is important
683
- # e.g. when synching ids from the notebook store to lamindb
684
- has_consciously_provided_uid = False
685
- if "_has_consciously_provided_uid" in kwargs:
686
- has_consciously_provided_uid = kwargs.pop(
687
- "_has_consciously_provided_uid"
688
- )
705
+ if not args:
689
706
  if (
690
- isinstance(self, (CanCurate, Collection, Transform))
691
- and settings.creation.search_names
692
- and not has_consciously_provided_uid
707
+ issubclass(self.__class__, Record)
708
+ and not self.__class__.__name__ == "Storage"
709
+ # do not save bionty entities in restricted spaces by default
710
+ and self.__class__.__module__ != "bionty.models"
693
711
  ):
694
- name_field = getattr(self, "_name_field", "name")
695
- exact_match = suggest_records_with_similar_names(
696
- self, name_field, kwargs
697
- )
698
- if exact_match is not None:
699
- if "version" in kwargs:
700
- if kwargs["version"] is not None:
701
- version_comment = " and version"
702
- existing_record = self.__class__.filter(
703
- **{
704
- name_field: kwargs[name_field],
705
- "version": kwargs["version"],
706
- }
707
- ).one_or_none()
712
+ from lamindb import context as run_context
713
+
714
+ if run_context.space is not None:
715
+ kwargs["space"] = run_context.space
716
+ if skip_validation:
717
+ super().__init__(**kwargs)
718
+ else:
719
+ from ..core._settings import settings
720
+ from .can_curate import CanCurate
721
+ from .collection import Collection
722
+ from .schema import Schema
723
+ from .transform import Transform
724
+
725
+ validate_fields(self, kwargs)
726
+
727
+ # do not search for names if an id is passed; this is important
728
+ # e.g. when synching ids from the notebook store to lamindb
729
+ has_consciously_provided_uid = False
730
+ if "_has_consciously_provided_uid" in kwargs:
731
+ has_consciously_provided_uid = kwargs.pop(
732
+ "_has_consciously_provided_uid"
733
+ )
734
+ if (
735
+ isinstance(self, (CanCurate, Collection, Transform))
736
+ and settings.creation.search_names
737
+ and not has_consciously_provided_uid
738
+ ):
739
+ name_field = getattr(self, "_name_field", "name")
740
+ exact_match = suggest_records_with_similar_names(
741
+ self, name_field, kwargs
742
+ )
743
+ if exact_match is not None:
744
+ if "version" in kwargs:
745
+ if kwargs["version"] is not None:
746
+ version_comment = " and version"
747
+ existing_record = self.__class__.filter(
748
+ **{
749
+ name_field: kwargs[name_field],
750
+ "version": kwargs["version"],
751
+ }
752
+ ).one_or_none()
753
+ else:
754
+ # for a versioned record, an exact name match is not a criterion
755
+ # for retrieving a record in case `version` isn't passed -
756
+ # we'd always pull out many records with exactly the same name
757
+ existing_record = None
708
758
  else:
709
- # for a versioned record, an exact name match is not a criterion
710
- # for retrieving a record in case `version` isn't passed -
711
- # we'd always pull out many records with exactly the same name
712
- existing_record = None
713
- else:
714
- version_comment = ""
715
- existing_record = exact_match
716
- if existing_record is not None:
717
- logger.important(
718
- f"returning existing {self.__class__.__name__} record with same"
719
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
720
- )
721
- if isinstance(self, Schema):
722
- if existing_record.hash != kwargs["hash"]:
723
- raise ValueError(
724
- f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
725
- )
726
- init_self_from_db(self, existing_record)
727
- update_attributes(self, kwargs)
728
- return None
729
- super().__init__(**kwargs)
730
- if isinstance(self, ValidateFields):
731
- # this will trigger validation against django validators
732
- try:
733
- if hasattr(self, "clean_fields"):
734
- self.clean_fields()
735
- else:
736
- self._Model__clean_fields()
737
- except DjangoValidationError as e:
738
- message = _format_django_validation_error(self, e)
739
- raise FieldValidationError(message) from e
759
+ version_comment = ""
760
+ existing_record = exact_match
761
+ if existing_record is not None:
762
+ logger.important(
763
+ f"returning existing {self.__class__.__name__} record with same"
764
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
765
+ )
766
+ if isinstance(self, Schema):
767
+ if existing_record.hash != kwargs["hash"]:
768
+ logger.warning(
769
+ f"You're updating schema {existing_record.uid}, which might already have been used to validate datasets. Be careful."
770
+ )
771
+ init_self_from_db(self, existing_record)
772
+ update_attributes(self, kwargs)
773
+ return None
774
+ super().__init__(**kwargs)
775
+ if isinstance(self, ValidateFields):
776
+ # this will trigger validation against django validators
777
+ try:
778
+ if hasattr(self, "clean_fields"):
779
+ self.clean_fields()
780
+ else:
781
+ self._Model__clean_fields()
782
+ except DjangoValidationError as e:
783
+ message = _format_django_validation_error(self, e)
784
+ raise FieldValidationError(message) from e
740
785
  elif len(args) != len(self._meta.concrete_fields):
741
786
  raise FieldValidationError(
742
787
  f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
@@ -838,16 +883,20 @@ class BasicRecord(models.Model, metaclass=Registry):
838
883
  if k != "run":
839
884
  logger.important(f"{k} records: {', '.join(v)}")
840
885
 
841
- if self.__class__.__name__ in {
842
- "Artifact",
843
- "Transform",
844
- "Run",
845
- "ULabel",
846
- "Feature",
847
- "Schema",
848
- "Collection",
849
- "Reference",
850
- }:
886
+ if (
887
+ self.__class__.__name__
888
+ in {
889
+ "Artifact",
890
+ "Transform",
891
+ "Run",
892
+ "ULabel",
893
+ "Feature",
894
+ "Schema",
895
+ "Collection",
896
+ "Reference",
897
+ }
898
+ and self._branch_code >= 1
899
+ ):
851
900
  import lamindb as ln
852
901
 
853
902
  if ln.context.project is not None:
@@ -1313,10 +1362,12 @@ def get_transfer_run(record) -> Run:
1313
1362
 
1314
1363
  slug = record._state.db
1315
1364
  owner, name = get_owner_name_from_identifier(slug)
1316
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1317
- if not cache_filepath.exists():
1365
+ cache_using_filepath = (
1366
+ ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1367
+ )
1368
+ if not cache_using_filepath.exists():
1318
1369
  raise SystemExit("Need to call .using() before")
1319
- instance_uid = cache_filepath.read_text().split("\n")[0]
1370
+ instance_uid = cache_using_filepath.read_text().split("\n")[0]
1320
1371
  key = f"transfers/{instance_uid}"
1321
1372
  uid = instance_uid + "0000"
1322
1373
  transform = Transform.filter(uid=uid).one_or_none()
@@ -1399,11 +1450,13 @@ def transfer_to_default_db(
1399
1450
  def track_current_key_and_name_values(record: Record):
1400
1451
  from lamindb.models import Artifact
1401
1452
 
1453
+ # below, we're using __dict__ to avoid triggering the refresh from the database
1454
+ # which can lead to a recursion
1402
1455
  if isinstance(record, Artifact):
1403
- record._old_key = record.key
1404
- record._old_suffix = record.suffix
1456
+ record._old_key = record.__dict__.get("key")
1457
+ record._old_suffix = record.__dict__.get("suffix")
1405
1458
  elif hasattr(record, "_name_field"):
1406
- record._old_name = getattr(record, record._name_field)
1459
+ record._old_name = record.__dict__.get(record._name_field)
1407
1460
 
1408
1461
 
1409
1462
  def check_name_change(record: Record):
@@ -1437,9 +1490,6 @@ def check_name_change(record: Record):
1437
1490
  label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1438
1491
  )
1439
1492
  .exclude(feature_id=None) # must have a feature
1440
- .exclude(
1441
- feature_ref_is_name=None
1442
- ) # must be linked via Curator and therefore part of a schema
1443
1493
  .distinct()
1444
1494
  )
1445
1495
  artifact_ids = linked_records.list("artifact__uid")
lamindb/models/run.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Any, overload
4
4
 
5
+ import numpy as np
5
6
  from django.db import models
6
7
  from django.db.models import (
7
8
  CASCADE,
@@ -19,7 +20,7 @@ from lamindb.base.fields import (
19
20
  ForeignKey,
20
21
  )
21
22
  from lamindb.base.users import current_user_id
22
- from lamindb.errors import ValidationError
23
+ from lamindb.errors import InvalidArgument, ValidationError
23
24
 
24
25
  from ..base.ids import base62_20
25
26
  from .can_curate import CanCurate
@@ -33,6 +34,7 @@ if TYPE_CHECKING:
33
34
  from .artifact import Artifact
34
35
  from .collection import Collection
35
36
  from .project import Project
37
+ from .query_set import QuerySet
36
38
  from .schema import Schema
37
39
  from .transform import Transform
38
40
  from .ulabel import ULabel
@@ -538,6 +540,56 @@ class Run(Record):
538
540
  delete_run_artifacts(self)
539
541
  super().delete()
540
542
 
543
+ @classmethod
544
+ def filter(
545
+ cls,
546
+ *queries,
547
+ **expressions,
548
+ ) -> QuerySet:
549
+ """Query a set of artifacts.
550
+
551
+ Args:
552
+ *queries: `Q` expressions.
553
+ **expressions: Params, fields, and values passed via the Django query syntax.
554
+
555
+ See Also:
556
+ - Guide: :doc:`docs:registries`
557
+
558
+ Examples:
559
+
560
+ Query by fields::
561
+
562
+ ln.Run.filter(key="my_datasets/my_file.parquet")
563
+
564
+ Query by params::
565
+
566
+ ln.Run.filter(hyperparam_x=100)
567
+ """
568
+ from ._feature_manager import filter_base
569
+ from .query_set import QuerySet
570
+
571
+ if expressions:
572
+ keys_normalized = [key.split("__")[0] for key in expressions]
573
+ field_or_feature_or_param = keys_normalized[0].split("__")[0]
574
+ if field_or_feature_or_param in Run.__get_available_fields__():
575
+ return QuerySet(model=cls).filter(*queries, **expressions)
576
+ elif all(
577
+ params_validated := Param.validate(
578
+ keys_normalized, field="name", mute=True
579
+ )
580
+ ):
581
+ return filter_base(ParamManagerRun, **expressions)
582
+ else:
583
+ params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
584
+ message = f"param names: {params}"
585
+ fields = ", ".join(sorted(cls.__get_available_fields__()))
586
+ raise InvalidArgument(
587
+ f"You can query either by available fields: {fields}\n"
588
+ f"Or fix invalid {message}"
589
+ )
590
+ else:
591
+ return QuerySet(model=cls).filter(*queries, **expressions)
592
+
541
593
 
542
594
  def delete_run_artifacts(run: Run) -> None:
543
595
  environment = None
@@ -555,7 +607,9 @@ def delete_run_artifacts(run: Run) -> None:
555
607
  if environment._environment_of.count() == 0:
556
608
  environment.delete(permanent=True)
557
609
  if report is not None:
558
- report.delete(permanent=True)
610
+ # only delete if there are no other runs attached to this environment
611
+ if report._report_of.count() == 0:
612
+ report.delete(permanent=True)
559
613
 
560
614
 
561
615
  class RunParamValue(BasicRecord, LinkORM):
lamindb/models/save.py CHANGED
@@ -192,9 +192,7 @@ def copy_or_move_to_cache(
192
192
  # non-local storage_path further
193
193
  if local_path != cache_path:
194
194
  if cache_path.exists():
195
- logger.warning(
196
- f"The cache path {cache_path.as_posix()} already exists, replacing it."
197
- )
195
+ logger.warning(f"replacing the existing cache path {cache_path.as_posix()}")
198
196
  if cache_path.is_dir():
199
197
  shutil.rmtree(cache_path)
200
198
  else: