lamindb 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/models/record.py CHANGED
@@ -58,11 +58,11 @@ from lamindb_setup._connect_instance import (
58
58
  update_db_using_local,
59
59
  )
60
60
  from lamindb_setup.core._docs import doc_args
61
- from lamindb_setup.core._hub_core import connect_instance_hub
61
+ from lamindb_setup.core._hub_core import access_db, connect_instance_hub
62
62
  from lamindb_setup.core._settings_store import instance_settings_file
63
+ from lamindb_setup.core.django import db_token_manager
63
64
  from lamindb_setup.core.upath import extract_suffix_from_path
64
65
 
65
- from lamindb.base import deprecated
66
66
  from lamindb.base.fields import (
67
67
  CharField,
68
68
  DateTimeField,
@@ -186,8 +186,7 @@ def update_attributes(record: Record, attributes: dict[str, str]):
186
186
  if (
187
187
  getattr(record, key) != value
188
188
  and value is not None
189
- and key != "dtype"
190
- and key != "_aux"
189
+ and key not in {"dtype", "otype", "_aux"}
191
190
  ):
192
191
  logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
193
192
  setattr(record, key, value)
@@ -370,6 +369,8 @@ class Registry(ModelBase):
370
369
  Note: `Registry` inherits from Django's `ModelBase`.
371
370
  """
372
371
 
372
+ _available_fields: set[str] = None
373
+
373
374
  def __new__(cls, name, bases, attrs, **kwargs):
374
375
  new_class = super().__new__(cls, name, bases, attrs, **kwargs)
375
376
  return new_class
@@ -487,10 +488,12 @@ class Registry(ModelBase):
487
488
  - Guide: :doc:`docs:registries`
488
489
  - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
489
490
 
490
- Examples::
491
+ Examples:
492
+
493
+ ::
491
494
 
492
- ulabel = ln.ULabel.get("FvtpPJLJ")
493
- ulabel = ln.ULabel.get(name="my-label")
495
+ ulabel = ln.ULabel.get("FvtpPJLJ")
496
+ ulabel = ln.ULabel.get(name="my-label")
494
497
  """
495
498
  from .query_set import QuerySet
496
499
 
@@ -594,17 +597,18 @@ class Registry(ModelBase):
594
597
  """
595
598
  from .query_set import QuerySet
596
599
 
597
- if instance is None:
600
+ # we're in the default instance
601
+ if instance is None or instance == "default":
598
602
  return QuerySet(model=cls, using=None)
599
-
600
603
  owner, name = get_owner_name_from_identifier(instance)
601
- if f"{owner}/{name}" == setup_settings.instance.slug:
604
+ if [owner, name] == setup_settings.instance.slug.split("/"):
602
605
  return QuerySet(model=cls, using=None)
603
606
 
604
- settings_file = instance_settings_file(name, owner)
605
- cache_filepath = (
606
- ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
607
+ # move on to different instances
608
+ cache_using_filepath = (
609
+ setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
607
610
  )
611
+ settings_file = instance_settings_file(name, owner)
608
612
  if not settings_file.exists():
609
613
  result = connect_instance_hub(owner=owner, name=name)
610
614
  if isinstance(result, str):
@@ -613,24 +617,46 @@ class Registry(ModelBase):
613
617
  )
614
618
  iresult, _ = result
615
619
  # do not use {} syntax below, it gives rise to a dict if the schema modules
616
- # are empty and then triggers a TypeError in missing_members = source_module - target_module
617
- source_module = set( # noqa
620
+ # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
621
+ source_modules = set( # noqa
618
622
  [mod for mod in iresult["schema_str"].split(",") if mod != ""]
619
623
  )
620
- target_module = ln_setup.settings.instance.modules
621
- if not source_module.issubset(target_module):
622
- missing_members = source_module - target_module
623
- logger.warning(
624
- f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
625
- )
626
- cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
627
- settings_file = instance_settings_file(name, owner)
624
+ # this just retrives the full connection string from iresult
628
625
  db = update_db_using_local(iresult, settings_file)
626
+ cache_using_filepath.write_text(
627
+ f"{iresult['lnid']}\n{iresult['schema_str']}"
628
+ )
629
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
630
+ is_fine_grained_access = (
631
+ iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
632
+ )
633
+ # access_db can take both: the dict from connect_instance_hub and isettings
634
+ into_access_db = iresult
629
635
  else:
630
636
  isettings = load_instance_settings(settings_file)
637
+ source_modules = isettings.modules
631
638
  db = isettings.db
632
- cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
639
+ cache_using_filepath.write_text(
640
+ f"{isettings.uid}\n{','.join(source_modules)}"
641
+ )
642
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
643
+ is_fine_grained_access = (
644
+ isettings._fine_grained_access and isettings._db_permissions == "jwt"
645
+ )
646
+ # access_db can take both: the dict from connect_instance_hub and isettings
647
+ into_access_db = isettings
648
+
649
+ target_modules = setup_settings.instance.modules
650
+ if not (missing_members := source_modules - target_modules):
651
+ logger.warning(
652
+ f"source modules has additional modules: {missing_members}\n"
653
+ "consider mounting these registry modules to transfer all metadata"
654
+ )
655
+
633
656
  add_db_connection(db, instance)
657
+ if is_fine_grained_access:
658
+ db_token = access_db(into_access_db)
659
+ db_token_manager.set(db_token, instance)
634
660
  return QuerySet(model=cls, using=instance)
635
661
 
636
662
  def __get_module_name__(cls) -> str:
@@ -640,10 +666,6 @@ class Registry(ModelBase):
640
666
  module_name = "core"
641
667
  return module_name
642
668
 
643
- @deprecated("__get_module_name__")
644
- def __get_schema_name__(cls) -> str:
645
- return cls.__get_module_name__()
646
-
647
669
  def __get_name_with_module__(cls) -> str:
648
670
  module_name = cls.__get_module_name__()
649
671
  if module_name == "core":
@@ -652,9 +674,19 @@ class Registry(ModelBase):
652
674
  module_prefix = f"{module_name}."
653
675
  return f"{module_prefix}{cls.__name__}"
654
676
 
655
- @deprecated("__get_name_with_module__")
656
- def __get_name_with_schema__(cls) -> str:
657
- return cls.__get_name_with_module__()
677
+ def __get_available_fields__(cls) -> set[str]:
678
+ if cls._available_fields is None:
679
+ cls._available_fields = {
680
+ f.name
681
+ for f in cls._meta.get_fields()
682
+ if not f.name.startswith("_")
683
+ and not f.name.startswith("links_")
684
+ and not f.name.endswith("_id")
685
+ }
686
+ if cls.__name__ == "Artifact":
687
+ cls._available_fields.add("visibility")
688
+ cls._available_fields.add("transform")
689
+ return cls._available_fields
658
690
 
659
691
 
660
692
  class BasicRecord(models.Model, metaclass=Registry):
@@ -670,75 +702,86 @@ class BasicRecord(models.Model, metaclass=Registry):
670
702
 
671
703
  def __init__(self, *args, **kwargs):
672
704
  skip_validation = kwargs.pop("_skip_validation", False)
673
- if not args and skip_validation:
674
- super().__init__(**kwargs)
675
- elif not args and not skip_validation:
676
- from ..core._settings import settings
677
- from .can_curate import CanCurate
678
- from .collection import Collection
679
- from .schema import Schema
680
- from .transform import Transform
681
-
682
- validate_fields(self, kwargs)
683
-
684
- # do not search for names if an id is passed; this is important
685
- # e.g. when synching ids from the notebook store to lamindb
686
- has_consciously_provided_uid = False
687
- if "_has_consciously_provided_uid" in kwargs:
688
- has_consciously_provided_uid = kwargs.pop(
689
- "_has_consciously_provided_uid"
690
- )
705
+ if not args:
691
706
  if (
692
- isinstance(self, (CanCurate, Collection, Transform))
693
- and settings.creation.search_names
694
- and not has_consciously_provided_uid
707
+ issubclass(self.__class__, Record)
708
+ and not self.__class__.__name__ == "Storage"
709
+ # do not save bionty entities in restricted spaces by default
710
+ and self.__class__.__module__ != "bionty.models"
695
711
  ):
696
- name_field = getattr(self, "_name_field", "name")
697
- exact_match = suggest_records_with_similar_names(
698
- self, name_field, kwargs
699
- )
700
- if exact_match is not None:
701
- if "version" in kwargs:
702
- if kwargs["version"] is not None:
703
- version_comment = " and version"
704
- existing_record = self.__class__.filter(
705
- **{
706
- name_field: kwargs[name_field],
707
- "version": kwargs["version"],
708
- }
709
- ).one_or_none()
712
+ from lamindb import context as run_context
713
+
714
+ if run_context.space is not None:
715
+ kwargs["space"] = run_context.space
716
+ if skip_validation:
717
+ super().__init__(**kwargs)
718
+ else:
719
+ from ..core._settings import settings
720
+ from .can_curate import CanCurate
721
+ from .collection import Collection
722
+ from .schema import Schema
723
+ from .transform import Transform
724
+
725
+ validate_fields(self, kwargs)
726
+
727
+ # do not search for names if an id is passed; this is important
728
+ # e.g. when synching ids from the notebook store to lamindb
729
+ has_consciously_provided_uid = False
730
+ if "_has_consciously_provided_uid" in kwargs:
731
+ has_consciously_provided_uid = kwargs.pop(
732
+ "_has_consciously_provided_uid"
733
+ )
734
+ if (
735
+ isinstance(self, (CanCurate, Collection, Transform))
736
+ and settings.creation.search_names
737
+ and not has_consciously_provided_uid
738
+ ):
739
+ name_field = getattr(self, "_name_field", "name")
740
+ exact_match = suggest_records_with_similar_names(
741
+ self, name_field, kwargs
742
+ )
743
+ if exact_match is not None:
744
+ if "version" in kwargs:
745
+ if kwargs["version"] is not None:
746
+ version_comment = " and version"
747
+ existing_record = self.__class__.filter(
748
+ **{
749
+ name_field: kwargs[name_field],
750
+ "version": kwargs["version"],
751
+ }
752
+ ).one_or_none()
753
+ else:
754
+ # for a versioned record, an exact name match is not a criterion
755
+ # for retrieving a record in case `version` isn't passed -
756
+ # we'd always pull out many records with exactly the same name
757
+ existing_record = None
710
758
  else:
711
- # for a versioned record, an exact name match is not a criterion
712
- # for retrieving a record in case `version` isn't passed -
713
- # we'd always pull out many records with exactly the same name
714
- existing_record = None
715
- else:
716
- version_comment = ""
717
- existing_record = exact_match
718
- if existing_record is not None:
719
- logger.important(
720
- f"returning existing {self.__class__.__name__} record with same"
721
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
722
- )
723
- if isinstance(self, Schema):
724
- if existing_record.hash != kwargs["hash"]:
725
- raise ValueError(
726
- f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
727
- )
728
- init_self_from_db(self, existing_record)
729
- update_attributes(self, kwargs)
730
- return None
731
- super().__init__(**kwargs)
732
- if isinstance(self, ValidateFields):
733
- # this will trigger validation against django validators
734
- try:
735
- if hasattr(self, "clean_fields"):
736
- self.clean_fields()
737
- else:
738
- self._Model__clean_fields()
739
- except DjangoValidationError as e:
740
- message = _format_django_validation_error(self, e)
741
- raise FieldValidationError(message) from e
759
+ version_comment = ""
760
+ existing_record = exact_match
761
+ if existing_record is not None:
762
+ logger.important(
763
+ f"returning existing {self.__class__.__name__} record with same"
764
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
765
+ )
766
+ if isinstance(self, Schema):
767
+ if existing_record.hash != kwargs["hash"]:
768
+ logger.warning(
769
+ f"You're updating schema {existing_record.uid}, which might already have been used to validate datasets. Be careful."
770
+ )
771
+ init_self_from_db(self, existing_record)
772
+ update_attributes(self, kwargs)
773
+ return None
774
+ super().__init__(**kwargs)
775
+ if isinstance(self, ValidateFields):
776
+ # this will trigger validation against django validators
777
+ try:
778
+ if hasattr(self, "clean_fields"):
779
+ self.clean_fields()
780
+ else:
781
+ self._Model__clean_fields()
782
+ except DjangoValidationError as e:
783
+ message = _format_django_validation_error(self, e)
784
+ raise FieldValidationError(message) from e
742
785
  elif len(args) != len(self._meta.concrete_fields):
743
786
  raise FieldValidationError(
744
787
  f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
@@ -840,16 +883,20 @@ class BasicRecord(models.Model, metaclass=Registry):
840
883
  if k != "run":
841
884
  logger.important(f"{k} records: {', '.join(v)}")
842
885
 
843
- if self.__class__.__name__ in {
844
- "Artifact",
845
- "Transform",
846
- "Run",
847
- "ULabel",
848
- "Feature",
849
- "Schema",
850
- "Collection",
851
- "Reference",
852
- }:
886
+ if (
887
+ self.__class__.__name__
888
+ in {
889
+ "Artifact",
890
+ "Transform",
891
+ "Run",
892
+ "ULabel",
893
+ "Feature",
894
+ "Schema",
895
+ "Collection",
896
+ "Reference",
897
+ }
898
+ and self._branch_code >= 1
899
+ ):
853
900
  import lamindb as ln
854
901
 
855
902
  if ln.context.project is not None:
@@ -1315,10 +1362,12 @@ def get_transfer_run(record) -> Run:
1315
1362
 
1316
1363
  slug = record._state.db
1317
1364
  owner, name = get_owner_name_from_identifier(slug)
1318
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1319
- if not cache_filepath.exists():
1365
+ cache_using_filepath = (
1366
+ ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1367
+ )
1368
+ if not cache_using_filepath.exists():
1320
1369
  raise SystemExit("Need to call .using() before")
1321
- instance_uid = cache_filepath.read_text().split("\n")[0]
1370
+ instance_uid = cache_using_filepath.read_text().split("\n")[0]
1322
1371
  key = f"transfers/{instance_uid}"
1323
1372
  uid = instance_uid + "0000"
1324
1373
  transform = Transform.filter(uid=uid).one_or_none()
@@ -1401,11 +1450,13 @@ def transfer_to_default_db(
1401
1450
  def track_current_key_and_name_values(record: Record):
1402
1451
  from lamindb.models import Artifact
1403
1452
 
1453
+ # below, we're using __dict__ to avoid triggering the refresh from the database
1454
+ # which can lead to a recursion
1404
1455
  if isinstance(record, Artifact):
1405
- record._old_key = record.key
1406
- record._old_suffix = record.suffix
1456
+ record._old_key = record.__dict__.get("key")
1457
+ record._old_suffix = record.__dict__.get("suffix")
1407
1458
  elif hasattr(record, "_name_field"):
1408
- record._old_name = getattr(record, record._name_field)
1459
+ record._old_name = record.__dict__.get(record._name_field)
1409
1460
 
1410
1461
 
1411
1462
  def check_name_change(record: Record):
@@ -1439,9 +1490,6 @@ def check_name_change(record: Record):
1439
1490
  label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1440
1491
  )
1441
1492
  .exclude(feature_id=None) # must have a feature
1442
- .exclude(
1443
- feature_ref_is_name=None
1444
- ) # must be linked via Curator and therefore part of a schema
1445
1493
  .distinct()
1446
1494
  )
1447
1495
  artifact_ids = linked_records.list("artifact__uid")
lamindb/models/run.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Any, overload
4
4
 
5
+ import numpy as np
5
6
  from django.db import models
6
7
  from django.db.models import (
7
8
  CASCADE,
@@ -19,7 +20,7 @@ from lamindb.base.fields import (
19
20
  ForeignKey,
20
21
  )
21
22
  from lamindb.base.users import current_user_id
22
- from lamindb.errors import ValidationError
23
+ from lamindb.errors import InvalidArgument, ValidationError
23
24
 
24
25
  from ..base.ids import base62_20
25
26
  from .can_curate import CanCurate
@@ -33,6 +34,7 @@ if TYPE_CHECKING:
33
34
  from .artifact import Artifact
34
35
  from .collection import Collection
35
36
  from .project import Project
37
+ from .query_set import QuerySet
36
38
  from .schema import Schema
37
39
  from .transform import Transform
38
40
  from .ulabel import ULabel
@@ -538,6 +540,56 @@ class Run(Record):
538
540
  delete_run_artifacts(self)
539
541
  super().delete()
540
542
 
543
+ @classmethod
544
+ def filter(
545
+ cls,
546
+ *queries,
547
+ **expressions,
548
+ ) -> QuerySet:
549
+ """Query a set of artifacts.
550
+
551
+ Args:
552
+ *queries: `Q` expressions.
553
+ **expressions: Params, fields, and values passed via the Django query syntax.
554
+
555
+ See Also:
556
+ - Guide: :doc:`docs:registries`
557
+
558
+ Examples:
559
+
560
+ Query by fields::
561
+
562
+ ln.Run.filter(key="my_datasets/my_file.parquet")
563
+
564
+ Query by params::
565
+
566
+ ln.Run.filter(hyperparam_x=100)
567
+ """
568
+ from ._feature_manager import filter_base
569
+ from .query_set import QuerySet
570
+
571
+ if expressions:
572
+ keys_normalized = [key.split("__")[0] for key in expressions]
573
+ field_or_feature_or_param = keys_normalized[0].split("__")[0]
574
+ if field_or_feature_or_param in Run.__get_available_fields__():
575
+ return QuerySet(model=cls).filter(*queries, **expressions)
576
+ elif all(
577
+ params_validated := Param.validate(
578
+ keys_normalized, field="name", mute=True
579
+ )
580
+ ):
581
+ return filter_base(ParamManagerRun, **expressions)
582
+ else:
583
+ params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
584
+ message = f"param names: {params}"
585
+ fields = ", ".join(sorted(cls.__get_available_fields__()))
586
+ raise InvalidArgument(
587
+ f"You can query either by available fields: {fields}\n"
588
+ f"Or fix invalid {message}"
589
+ )
590
+ else:
591
+ return QuerySet(model=cls).filter(*queries, **expressions)
592
+
541
593
 
542
594
  def delete_run_artifacts(run: Run) -> None:
543
595
  environment = None
@@ -555,7 +607,9 @@ def delete_run_artifacts(run: Run) -> None:
555
607
  if environment._environment_of.count() == 0:
556
608
  environment.delete(permanent=True)
557
609
  if report is not None:
558
- report.delete(permanent=True)
610
+ # only delete if there are no other runs attached to this environment
611
+ if report._report_of.count() == 0:
612
+ report.delete(permanent=True)
559
613
 
560
614
 
561
615
  class RunParamValue(BasicRecord, LinkORM):
lamindb/models/save.py CHANGED
@@ -192,9 +192,7 @@ def copy_or_move_to_cache(
192
192
  # non-local storage_path further
193
193
  if local_path != cache_path:
194
194
  if cache_path.exists():
195
- logger.warning(
196
- f"The cache path {cache_path.as_posix()} already exists, replacing it."
197
- )
195
+ logger.warning(f"replacing the existing cache path {cache_path.as_posix()}")
198
196
  if cache_path.is_dir():
199
197
  shutil.rmtree(cache_path)
200
198
  else: