lamindb 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/core/_context.py +64 -69
- lamindb/core/datasets/_small.py +2 -2
- lamindb/curators/__init__.py +683 -893
- lamindb/models/__init__.py +8 -1
- lamindb/models/_feature_manager.py +23 -19
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/artifact.py +210 -111
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +6 -4
- lamindb/models/feature.py +27 -30
- lamindb/models/has_parents.py +22 -7
- lamindb/models/project.py +2 -2
- lamindb/models/query_set.py +6 -35
- lamindb/models/record.py +164 -116
- lamindb/models/run.py +56 -2
- lamindb/models/save.py +1 -3
- lamindb/models/schema.py +277 -77
- lamindb/models/transform.py +4 -13
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/METADATA +6 -5
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/RECORD +24 -24
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py
CHANGED
@@ -58,11 +58,11 @@ from lamindb_setup._connect_instance import (
|
|
58
58
|
update_db_using_local,
|
59
59
|
)
|
60
60
|
from lamindb_setup.core._docs import doc_args
|
61
|
-
from lamindb_setup.core._hub_core import connect_instance_hub
|
61
|
+
from lamindb_setup.core._hub_core import access_db, connect_instance_hub
|
62
62
|
from lamindb_setup.core._settings_store import instance_settings_file
|
63
|
+
from lamindb_setup.core.django import db_token_manager
|
63
64
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
64
65
|
|
65
|
-
from lamindb.base import deprecated
|
66
66
|
from lamindb.base.fields import (
|
67
67
|
CharField,
|
68
68
|
DateTimeField,
|
@@ -186,8 +186,7 @@ def update_attributes(record: Record, attributes: dict[str, str]):
|
|
186
186
|
if (
|
187
187
|
getattr(record, key) != value
|
188
188
|
and value is not None
|
189
|
-
and key
|
190
|
-
and key != "_aux"
|
189
|
+
and key not in {"dtype", "otype", "_aux"}
|
191
190
|
):
|
192
191
|
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
193
192
|
setattr(record, key, value)
|
@@ -370,6 +369,8 @@ class Registry(ModelBase):
|
|
370
369
|
Note: `Registry` inherits from Django's `ModelBase`.
|
371
370
|
"""
|
372
371
|
|
372
|
+
_available_fields: set[str] = None
|
373
|
+
|
373
374
|
def __new__(cls, name, bases, attrs, **kwargs):
|
374
375
|
new_class = super().__new__(cls, name, bases, attrs, **kwargs)
|
375
376
|
return new_class
|
@@ -487,10 +488,12 @@ class Registry(ModelBase):
|
|
487
488
|
- Guide: :doc:`docs:registries`
|
488
489
|
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
489
490
|
|
490
|
-
Examples
|
491
|
+
Examples:
|
492
|
+
|
493
|
+
::
|
491
494
|
|
492
|
-
|
493
|
-
|
495
|
+
ulabel = ln.ULabel.get("FvtpPJLJ")
|
496
|
+
ulabel = ln.ULabel.get(name="my-label")
|
494
497
|
"""
|
495
498
|
from .query_set import QuerySet
|
496
499
|
|
@@ -594,17 +597,18 @@ class Registry(ModelBase):
|
|
594
597
|
"""
|
595
598
|
from .query_set import QuerySet
|
596
599
|
|
597
|
-
|
600
|
+
# we're in the default instance
|
601
|
+
if instance is None or instance == "default":
|
598
602
|
return QuerySet(model=cls, using=None)
|
599
|
-
|
600
603
|
owner, name = get_owner_name_from_identifier(instance)
|
601
|
-
if
|
604
|
+
if [owner, name] == setup_settings.instance.slug.split("/"):
|
602
605
|
return QuerySet(model=cls, using=None)
|
603
606
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
+
# move on to different instances
|
608
|
+
cache_using_filepath = (
|
609
|
+
setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
607
610
|
)
|
611
|
+
settings_file = instance_settings_file(name, owner)
|
608
612
|
if not settings_file.exists():
|
609
613
|
result = connect_instance_hub(owner=owner, name=name)
|
610
614
|
if isinstance(result, str):
|
@@ -613,24 +617,46 @@ class Registry(ModelBase):
|
|
613
617
|
)
|
614
618
|
iresult, _ = result
|
615
619
|
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
616
|
-
# are empty and then triggers a TypeError in missing_members =
|
617
|
-
|
620
|
+
# are empty and then triggers a TypeError in missing_members = source_modules - target_modules
|
621
|
+
source_modules = set( # noqa
|
618
622
|
[mod for mod in iresult["schema_str"].split(",") if mod != ""]
|
619
623
|
)
|
620
|
-
|
621
|
-
if not source_module.issubset(target_module):
|
622
|
-
missing_members = source_module - target_module
|
623
|
-
logger.warning(
|
624
|
-
f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
|
625
|
-
)
|
626
|
-
cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
|
627
|
-
settings_file = instance_settings_file(name, owner)
|
624
|
+
# this just retrives the full connection string from iresult
|
628
625
|
db = update_db_using_local(iresult, settings_file)
|
626
|
+
cache_using_filepath.write_text(
|
627
|
+
f"{iresult['lnid']}\n{iresult['schema_str']}"
|
628
|
+
)
|
629
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
630
|
+
is_fine_grained_access = (
|
631
|
+
iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
|
632
|
+
)
|
633
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
634
|
+
into_access_db = iresult
|
629
635
|
else:
|
630
636
|
isettings = load_instance_settings(settings_file)
|
637
|
+
source_modules = isettings.modules
|
631
638
|
db = isettings.db
|
632
|
-
|
639
|
+
cache_using_filepath.write_text(
|
640
|
+
f"{isettings.uid}\n{','.join(source_modules)}"
|
641
|
+
)
|
642
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
643
|
+
is_fine_grained_access = (
|
644
|
+
isettings._fine_grained_access and isettings._db_permissions == "jwt"
|
645
|
+
)
|
646
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
647
|
+
into_access_db = isettings
|
648
|
+
|
649
|
+
target_modules = setup_settings.instance.modules
|
650
|
+
if not (missing_members := source_modules - target_modules):
|
651
|
+
logger.warning(
|
652
|
+
f"source modules has additional modules: {missing_members}\n"
|
653
|
+
"consider mounting these registry modules to transfer all metadata"
|
654
|
+
)
|
655
|
+
|
633
656
|
add_db_connection(db, instance)
|
657
|
+
if is_fine_grained_access:
|
658
|
+
db_token = access_db(into_access_db)
|
659
|
+
db_token_manager.set(db_token, instance)
|
634
660
|
return QuerySet(model=cls, using=instance)
|
635
661
|
|
636
662
|
def __get_module_name__(cls) -> str:
|
@@ -640,10 +666,6 @@ class Registry(ModelBase):
|
|
640
666
|
module_name = "core"
|
641
667
|
return module_name
|
642
668
|
|
643
|
-
@deprecated("__get_module_name__")
|
644
|
-
def __get_schema_name__(cls) -> str:
|
645
|
-
return cls.__get_module_name__()
|
646
|
-
|
647
669
|
def __get_name_with_module__(cls) -> str:
|
648
670
|
module_name = cls.__get_module_name__()
|
649
671
|
if module_name == "core":
|
@@ -652,9 +674,19 @@ class Registry(ModelBase):
|
|
652
674
|
module_prefix = f"{module_name}."
|
653
675
|
return f"{module_prefix}{cls.__name__}"
|
654
676
|
|
655
|
-
|
656
|
-
|
657
|
-
|
677
|
+
def __get_available_fields__(cls) -> set[str]:
|
678
|
+
if cls._available_fields is None:
|
679
|
+
cls._available_fields = {
|
680
|
+
f.name
|
681
|
+
for f in cls._meta.get_fields()
|
682
|
+
if not f.name.startswith("_")
|
683
|
+
and not f.name.startswith("links_")
|
684
|
+
and not f.name.endswith("_id")
|
685
|
+
}
|
686
|
+
if cls.__name__ == "Artifact":
|
687
|
+
cls._available_fields.add("visibility")
|
688
|
+
cls._available_fields.add("transform")
|
689
|
+
return cls._available_fields
|
658
690
|
|
659
691
|
|
660
692
|
class BasicRecord(models.Model, metaclass=Registry):
|
@@ -670,75 +702,86 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
670
702
|
|
671
703
|
def __init__(self, *args, **kwargs):
|
672
704
|
skip_validation = kwargs.pop("_skip_validation", False)
|
673
|
-
if not args
|
674
|
-
super().__init__(**kwargs)
|
675
|
-
elif not args and not skip_validation:
|
676
|
-
from ..core._settings import settings
|
677
|
-
from .can_curate import CanCurate
|
678
|
-
from .collection import Collection
|
679
|
-
from .schema import Schema
|
680
|
-
from .transform import Transform
|
681
|
-
|
682
|
-
validate_fields(self, kwargs)
|
683
|
-
|
684
|
-
# do not search for names if an id is passed; this is important
|
685
|
-
# e.g. when synching ids from the notebook store to lamindb
|
686
|
-
has_consciously_provided_uid = False
|
687
|
-
if "_has_consciously_provided_uid" in kwargs:
|
688
|
-
has_consciously_provided_uid = kwargs.pop(
|
689
|
-
"_has_consciously_provided_uid"
|
690
|
-
)
|
705
|
+
if not args:
|
691
706
|
if (
|
692
|
-
|
693
|
-
and
|
694
|
-
|
707
|
+
issubclass(self.__class__, Record)
|
708
|
+
and not self.__class__.__name__ == "Storage"
|
709
|
+
# do not save bionty entities in restricted spaces by default
|
710
|
+
and self.__class__.__module__ != "bionty.models"
|
695
711
|
):
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
712
|
+
from lamindb import context as run_context
|
713
|
+
|
714
|
+
if run_context.space is not None:
|
715
|
+
kwargs["space"] = run_context.space
|
716
|
+
if skip_validation:
|
717
|
+
super().__init__(**kwargs)
|
718
|
+
else:
|
719
|
+
from ..core._settings import settings
|
720
|
+
from .can_curate import CanCurate
|
721
|
+
from .collection import Collection
|
722
|
+
from .schema import Schema
|
723
|
+
from .transform import Transform
|
724
|
+
|
725
|
+
validate_fields(self, kwargs)
|
726
|
+
|
727
|
+
# do not search for names if an id is passed; this is important
|
728
|
+
# e.g. when synching ids from the notebook store to lamindb
|
729
|
+
has_consciously_provided_uid = False
|
730
|
+
if "_has_consciously_provided_uid" in kwargs:
|
731
|
+
has_consciously_provided_uid = kwargs.pop(
|
732
|
+
"_has_consciously_provided_uid"
|
733
|
+
)
|
734
|
+
if (
|
735
|
+
isinstance(self, (CanCurate, Collection, Transform))
|
736
|
+
and settings.creation.search_names
|
737
|
+
and not has_consciously_provided_uid
|
738
|
+
):
|
739
|
+
name_field = getattr(self, "_name_field", "name")
|
740
|
+
exact_match = suggest_records_with_similar_names(
|
741
|
+
self, name_field, kwargs
|
742
|
+
)
|
743
|
+
if exact_match is not None:
|
744
|
+
if "version" in kwargs:
|
745
|
+
if kwargs["version"] is not None:
|
746
|
+
version_comment = " and version"
|
747
|
+
existing_record = self.__class__.filter(
|
748
|
+
**{
|
749
|
+
name_field: kwargs[name_field],
|
750
|
+
"version": kwargs["version"],
|
751
|
+
}
|
752
|
+
).one_or_none()
|
753
|
+
else:
|
754
|
+
# for a versioned record, an exact name match is not a criterion
|
755
|
+
# for retrieving a record in case `version` isn't passed -
|
756
|
+
# we'd always pull out many records with exactly the same name
|
757
|
+
existing_record = None
|
710
758
|
else:
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
else:
|
738
|
-
self._Model__clean_fields()
|
739
|
-
except DjangoValidationError as e:
|
740
|
-
message = _format_django_validation_error(self, e)
|
741
|
-
raise FieldValidationError(message) from e
|
759
|
+
version_comment = ""
|
760
|
+
existing_record = exact_match
|
761
|
+
if existing_record is not None:
|
762
|
+
logger.important(
|
763
|
+
f"returning existing {self.__class__.__name__} record with same"
|
764
|
+
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
765
|
+
)
|
766
|
+
if isinstance(self, Schema):
|
767
|
+
if existing_record.hash != kwargs["hash"]:
|
768
|
+
logger.warning(
|
769
|
+
f"You're updating schema {existing_record.uid}, which might already have been used to validate datasets. Be careful."
|
770
|
+
)
|
771
|
+
init_self_from_db(self, existing_record)
|
772
|
+
update_attributes(self, kwargs)
|
773
|
+
return None
|
774
|
+
super().__init__(**kwargs)
|
775
|
+
if isinstance(self, ValidateFields):
|
776
|
+
# this will trigger validation against django validators
|
777
|
+
try:
|
778
|
+
if hasattr(self, "clean_fields"):
|
779
|
+
self.clean_fields()
|
780
|
+
else:
|
781
|
+
self._Model__clean_fields()
|
782
|
+
except DjangoValidationError as e:
|
783
|
+
message = _format_django_validation_error(self, e)
|
784
|
+
raise FieldValidationError(message) from e
|
742
785
|
elif len(args) != len(self._meta.concrete_fields):
|
743
786
|
raise FieldValidationError(
|
744
787
|
f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
|
@@ -840,16 +883,20 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
840
883
|
if k != "run":
|
841
884
|
logger.important(f"{k} records: {', '.join(v)}")
|
842
885
|
|
843
|
-
if
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
886
|
+
if (
|
887
|
+
self.__class__.__name__
|
888
|
+
in {
|
889
|
+
"Artifact",
|
890
|
+
"Transform",
|
891
|
+
"Run",
|
892
|
+
"ULabel",
|
893
|
+
"Feature",
|
894
|
+
"Schema",
|
895
|
+
"Collection",
|
896
|
+
"Reference",
|
897
|
+
}
|
898
|
+
and self._branch_code >= 1
|
899
|
+
):
|
853
900
|
import lamindb as ln
|
854
901
|
|
855
902
|
if ln.context.project is not None:
|
@@ -1315,10 +1362,12 @@ def get_transfer_run(record) -> Run:
|
|
1315
1362
|
|
1316
1363
|
slug = record._state.db
|
1317
1364
|
owner, name = get_owner_name_from_identifier(slug)
|
1318
|
-
|
1319
|
-
|
1365
|
+
cache_using_filepath = (
|
1366
|
+
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
1367
|
+
)
|
1368
|
+
if not cache_using_filepath.exists():
|
1320
1369
|
raise SystemExit("Need to call .using() before")
|
1321
|
-
instance_uid =
|
1370
|
+
instance_uid = cache_using_filepath.read_text().split("\n")[0]
|
1322
1371
|
key = f"transfers/{instance_uid}"
|
1323
1372
|
uid = instance_uid + "0000"
|
1324
1373
|
transform = Transform.filter(uid=uid).one_or_none()
|
@@ -1401,11 +1450,13 @@ def transfer_to_default_db(
|
|
1401
1450
|
def track_current_key_and_name_values(record: Record):
|
1402
1451
|
from lamindb.models import Artifact
|
1403
1452
|
|
1453
|
+
# below, we're using __dict__ to avoid triggering the refresh from the database
|
1454
|
+
# which can lead to a recursion
|
1404
1455
|
if isinstance(record, Artifact):
|
1405
|
-
record._old_key = record.key
|
1406
|
-
record._old_suffix = record.suffix
|
1456
|
+
record._old_key = record.__dict__.get("key")
|
1457
|
+
record._old_suffix = record.__dict__.get("suffix")
|
1407
1458
|
elif hasattr(record, "_name_field"):
|
1408
|
-
record._old_name =
|
1459
|
+
record._old_name = record.__dict__.get(record._name_field)
|
1409
1460
|
|
1410
1461
|
|
1411
1462
|
def check_name_change(record: Record):
|
@@ -1439,9 +1490,6 @@ def check_name_change(record: Record):
|
|
1439
1490
|
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|
1440
1491
|
)
|
1441
1492
|
.exclude(feature_id=None) # must have a feature
|
1442
|
-
.exclude(
|
1443
|
-
feature_ref_is_name=None
|
1444
|
-
) # must be linked via Curator and therefore part of a schema
|
1445
1493
|
.distinct()
|
1446
1494
|
)
|
1447
1495
|
artifact_ids = linked_records.list("artifact__uid")
|
lamindb/models/run.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Any, overload
|
4
4
|
|
5
|
+
import numpy as np
|
5
6
|
from django.db import models
|
6
7
|
from django.db.models import (
|
7
8
|
CASCADE,
|
@@ -19,7 +20,7 @@ from lamindb.base.fields import (
|
|
19
20
|
ForeignKey,
|
20
21
|
)
|
21
22
|
from lamindb.base.users import current_user_id
|
22
|
-
from lamindb.errors import ValidationError
|
23
|
+
from lamindb.errors import InvalidArgument, ValidationError
|
23
24
|
|
24
25
|
from ..base.ids import base62_20
|
25
26
|
from .can_curate import CanCurate
|
@@ -33,6 +34,7 @@ if TYPE_CHECKING:
|
|
33
34
|
from .artifact import Artifact
|
34
35
|
from .collection import Collection
|
35
36
|
from .project import Project
|
37
|
+
from .query_set import QuerySet
|
36
38
|
from .schema import Schema
|
37
39
|
from .transform import Transform
|
38
40
|
from .ulabel import ULabel
|
@@ -538,6 +540,56 @@ class Run(Record):
|
|
538
540
|
delete_run_artifacts(self)
|
539
541
|
super().delete()
|
540
542
|
|
543
|
+
@classmethod
|
544
|
+
def filter(
|
545
|
+
cls,
|
546
|
+
*queries,
|
547
|
+
**expressions,
|
548
|
+
) -> QuerySet:
|
549
|
+
"""Query a set of artifacts.
|
550
|
+
|
551
|
+
Args:
|
552
|
+
*queries: `Q` expressions.
|
553
|
+
**expressions: Params, fields, and values passed via the Django query syntax.
|
554
|
+
|
555
|
+
See Also:
|
556
|
+
- Guide: :doc:`docs:registries`
|
557
|
+
|
558
|
+
Examples:
|
559
|
+
|
560
|
+
Query by fields::
|
561
|
+
|
562
|
+
ln.Run.filter(key="my_datasets/my_file.parquet")
|
563
|
+
|
564
|
+
Query by params::
|
565
|
+
|
566
|
+
ln.Run.filter(hyperparam_x=100)
|
567
|
+
"""
|
568
|
+
from ._feature_manager import filter_base
|
569
|
+
from .query_set import QuerySet
|
570
|
+
|
571
|
+
if expressions:
|
572
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
573
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
574
|
+
if field_or_feature_or_param in Run.__get_available_fields__():
|
575
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
576
|
+
elif all(
|
577
|
+
params_validated := Param.validate(
|
578
|
+
keys_normalized, field="name", mute=True
|
579
|
+
)
|
580
|
+
):
|
581
|
+
return filter_base(ParamManagerRun, **expressions)
|
582
|
+
else:
|
583
|
+
params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
|
584
|
+
message = f"param names: {params}"
|
585
|
+
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
586
|
+
raise InvalidArgument(
|
587
|
+
f"You can query either by available fields: {fields}\n"
|
588
|
+
f"Or fix invalid {message}"
|
589
|
+
)
|
590
|
+
else:
|
591
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
592
|
+
|
541
593
|
|
542
594
|
def delete_run_artifacts(run: Run) -> None:
|
543
595
|
environment = None
|
@@ -555,7 +607,9 @@ def delete_run_artifacts(run: Run) -> None:
|
|
555
607
|
if environment._environment_of.count() == 0:
|
556
608
|
environment.delete(permanent=True)
|
557
609
|
if report is not None:
|
558
|
-
|
610
|
+
# only delete if there are no other runs attached to this environment
|
611
|
+
if report._report_of.count() == 0:
|
612
|
+
report.delete(permanent=True)
|
559
613
|
|
560
614
|
|
561
615
|
class RunParamValue(BasicRecord, LinkORM):
|
lamindb/models/save.py
CHANGED
@@ -192,9 +192,7 @@ def copy_or_move_to_cache(
|
|
192
192
|
# non-local storage_path further
|
193
193
|
if local_path != cache_path:
|
194
194
|
if cache_path.exists():
|
195
|
-
logger.warning(
|
196
|
-
f"The cache path {cache_path.as_posix()} already exists, replacing it."
|
197
|
-
)
|
195
|
+
logger.warning(f"replacing the existing cache path {cache_path.as_posix()}")
|
198
196
|
if cache_path.is_dir():
|
199
197
|
shutil.rmtree(cache_path)
|
200
198
|
else:
|