lamindb 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/core/_context.py +64 -69
- lamindb/core/datasets/_small.py +2 -2
- lamindb/curators/__init__.py +683 -893
- lamindb/models/__init__.py +8 -1
- lamindb/models/_feature_manager.py +23 -19
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/artifact.py +210 -111
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +6 -4
- lamindb/models/feature.py +27 -30
- lamindb/models/has_parents.py +22 -7
- lamindb/models/project.py +2 -2
- lamindb/models/query_set.py +6 -35
- lamindb/models/record.py +167 -117
- lamindb/models/run.py +56 -2
- lamindb/models/save.py +1 -3
- lamindb/models/schema.py +277 -77
- lamindb/models/transform.py +4 -13
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/METADATA +6 -5
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/RECORD +24 -24
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py
CHANGED
@@ -58,11 +58,11 @@ from lamindb_setup._connect_instance import (
|
|
58
58
|
update_db_using_local,
|
59
59
|
)
|
60
60
|
from lamindb_setup.core._docs import doc_args
|
61
|
-
from lamindb_setup.core._hub_core import connect_instance_hub
|
61
|
+
from lamindb_setup.core._hub_core import access_db, connect_instance_hub
|
62
62
|
from lamindb_setup.core._settings_store import instance_settings_file
|
63
|
+
from lamindb_setup.core.django import db_token_manager
|
63
64
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
64
65
|
|
65
|
-
from lamindb.base import deprecated
|
66
66
|
from lamindb.base.fields import (
|
67
67
|
CharField,
|
68
68
|
DateTimeField,
|
@@ -186,8 +186,7 @@ def update_attributes(record: Record, attributes: dict[str, str]):
|
|
186
186
|
if (
|
187
187
|
getattr(record, key) != value
|
188
188
|
and value is not None
|
189
|
-
and key
|
190
|
-
and key != "_aux"
|
189
|
+
and key not in {"dtype", "otype", "_aux"}
|
191
190
|
):
|
192
191
|
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
193
192
|
setattr(record, key, value)
|
@@ -370,6 +369,8 @@ class Registry(ModelBase):
|
|
370
369
|
Note: `Registry` inherits from Django's `ModelBase`.
|
371
370
|
"""
|
372
371
|
|
372
|
+
_available_fields: set[str] = None
|
373
|
+
|
373
374
|
def __new__(cls, name, bases, attrs, **kwargs):
|
374
375
|
new_class = super().__new__(cls, name, bases, attrs, **kwargs)
|
375
376
|
return new_class
|
@@ -487,10 +488,12 @@ class Registry(ModelBase):
|
|
487
488
|
- Guide: :doc:`docs:registries`
|
488
489
|
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
489
490
|
|
490
|
-
Examples
|
491
|
+
Examples:
|
492
|
+
|
493
|
+
::
|
491
494
|
|
492
|
-
|
493
|
-
|
495
|
+
ulabel = ln.ULabel.get("FvtpPJLJ")
|
496
|
+
ulabel = ln.ULabel.get(name="my-label")
|
494
497
|
"""
|
495
498
|
from .query_set import QuerySet
|
496
499
|
|
@@ -594,17 +597,18 @@ class Registry(ModelBase):
|
|
594
597
|
"""
|
595
598
|
from .query_set import QuerySet
|
596
599
|
|
597
|
-
|
600
|
+
# we're in the default instance
|
601
|
+
if instance is None or instance == "default":
|
598
602
|
return QuerySet(model=cls, using=None)
|
599
|
-
|
600
603
|
owner, name = get_owner_name_from_identifier(instance)
|
601
|
-
if
|
604
|
+
if [owner, name] == setup_settings.instance.slug.split("/"):
|
602
605
|
return QuerySet(model=cls, using=None)
|
603
606
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
+
# move on to different instances
|
608
|
+
cache_using_filepath = (
|
609
|
+
setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
607
610
|
)
|
611
|
+
settings_file = instance_settings_file(name, owner)
|
608
612
|
if not settings_file.exists():
|
609
613
|
result = connect_instance_hub(owner=owner, name=name)
|
610
614
|
if isinstance(result, str):
|
@@ -612,23 +616,47 @@ class Registry(ModelBase):
|
|
612
616
|
f"Failed to load instance {instance}, please check your permissions!"
|
613
617
|
)
|
614
618
|
iresult, _ = result
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
logger.warning(
|
622
|
-
f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
|
623
|
-
)
|
624
|
-
cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
|
625
|
-
settings_file = instance_settings_file(name, owner)
|
619
|
+
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
620
|
+
# are empty and then triggers a TypeError in missing_members = source_modules - target_modules
|
621
|
+
source_modules = set( # noqa
|
622
|
+
[mod for mod in iresult["schema_str"].split(",") if mod != ""]
|
623
|
+
)
|
624
|
+
# this just retrives the full connection string from iresult
|
626
625
|
db = update_db_using_local(iresult, settings_file)
|
626
|
+
cache_using_filepath.write_text(
|
627
|
+
f"{iresult['lnid']}\n{iresult['schema_str']}"
|
628
|
+
)
|
629
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
630
|
+
is_fine_grained_access = (
|
631
|
+
iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
|
632
|
+
)
|
633
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
634
|
+
into_access_db = iresult
|
627
635
|
else:
|
628
636
|
isettings = load_instance_settings(settings_file)
|
637
|
+
source_modules = isettings.modules
|
629
638
|
db = isettings.db
|
630
|
-
|
639
|
+
cache_using_filepath.write_text(
|
640
|
+
f"{isettings.uid}\n{','.join(source_modules)}"
|
641
|
+
)
|
642
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
643
|
+
is_fine_grained_access = (
|
644
|
+
isettings._fine_grained_access and isettings._db_permissions == "jwt"
|
645
|
+
)
|
646
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
647
|
+
into_access_db = isettings
|
648
|
+
|
649
|
+
target_modules = setup_settings.instance.modules
|
650
|
+
if not (missing_members := source_modules - target_modules):
|
651
|
+
logger.warning(
|
652
|
+
f"source modules has additional modules: {missing_members}\n"
|
653
|
+
"consider mounting these registry modules to transfer all metadata"
|
654
|
+
)
|
655
|
+
|
631
656
|
add_db_connection(db, instance)
|
657
|
+
if is_fine_grained_access:
|
658
|
+
db_token = access_db(into_access_db)
|
659
|
+
db_token_manager.set(db_token, instance)
|
632
660
|
return QuerySet(model=cls, using=instance)
|
633
661
|
|
634
662
|
def __get_module_name__(cls) -> str:
|
@@ -638,10 +666,6 @@ class Registry(ModelBase):
|
|
638
666
|
module_name = "core"
|
639
667
|
return module_name
|
640
668
|
|
641
|
-
@deprecated("__get_module_name__")
|
642
|
-
def __get_schema_name__(cls) -> str:
|
643
|
-
return cls.__get_module_name__()
|
644
|
-
|
645
669
|
def __get_name_with_module__(cls) -> str:
|
646
670
|
module_name = cls.__get_module_name__()
|
647
671
|
if module_name == "core":
|
@@ -650,9 +674,19 @@ class Registry(ModelBase):
|
|
650
674
|
module_prefix = f"{module_name}."
|
651
675
|
return f"{module_prefix}{cls.__name__}"
|
652
676
|
|
653
|
-
|
654
|
-
|
655
|
-
|
677
|
+
def __get_available_fields__(cls) -> set[str]:
|
678
|
+
if cls._available_fields is None:
|
679
|
+
cls._available_fields = {
|
680
|
+
f.name
|
681
|
+
for f in cls._meta.get_fields()
|
682
|
+
if not f.name.startswith("_")
|
683
|
+
and not f.name.startswith("links_")
|
684
|
+
and not f.name.endswith("_id")
|
685
|
+
}
|
686
|
+
if cls.__name__ == "Artifact":
|
687
|
+
cls._available_fields.add("visibility")
|
688
|
+
cls._available_fields.add("transform")
|
689
|
+
return cls._available_fields
|
656
690
|
|
657
691
|
|
658
692
|
class BasicRecord(models.Model, metaclass=Registry):
|
@@ -668,75 +702,86 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
668
702
|
|
669
703
|
def __init__(self, *args, **kwargs):
|
670
704
|
skip_validation = kwargs.pop("_skip_validation", False)
|
671
|
-
if not args
|
672
|
-
super().__init__(**kwargs)
|
673
|
-
elif not args and not skip_validation:
|
674
|
-
from ..core._settings import settings
|
675
|
-
from .can_curate import CanCurate
|
676
|
-
from .collection import Collection
|
677
|
-
from .schema import Schema
|
678
|
-
from .transform import Transform
|
679
|
-
|
680
|
-
validate_fields(self, kwargs)
|
681
|
-
|
682
|
-
# do not search for names if an id is passed; this is important
|
683
|
-
# e.g. when synching ids from the notebook store to lamindb
|
684
|
-
has_consciously_provided_uid = False
|
685
|
-
if "_has_consciously_provided_uid" in kwargs:
|
686
|
-
has_consciously_provided_uid = kwargs.pop(
|
687
|
-
"_has_consciously_provided_uid"
|
688
|
-
)
|
705
|
+
if not args:
|
689
706
|
if (
|
690
|
-
|
691
|
-
and
|
692
|
-
|
707
|
+
issubclass(self.__class__, Record)
|
708
|
+
and not self.__class__.__name__ == "Storage"
|
709
|
+
# do not save bionty entities in restricted spaces by default
|
710
|
+
and self.__class__.__module__ != "bionty.models"
|
693
711
|
):
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
712
|
+
from lamindb import context as run_context
|
713
|
+
|
714
|
+
if run_context.space is not None:
|
715
|
+
kwargs["space"] = run_context.space
|
716
|
+
if skip_validation:
|
717
|
+
super().__init__(**kwargs)
|
718
|
+
else:
|
719
|
+
from ..core._settings import settings
|
720
|
+
from .can_curate import CanCurate
|
721
|
+
from .collection import Collection
|
722
|
+
from .schema import Schema
|
723
|
+
from .transform import Transform
|
724
|
+
|
725
|
+
validate_fields(self, kwargs)
|
726
|
+
|
727
|
+
# do not search for names if an id is passed; this is important
|
728
|
+
# e.g. when synching ids from the notebook store to lamindb
|
729
|
+
has_consciously_provided_uid = False
|
730
|
+
if "_has_consciously_provided_uid" in kwargs:
|
731
|
+
has_consciously_provided_uid = kwargs.pop(
|
732
|
+
"_has_consciously_provided_uid"
|
733
|
+
)
|
734
|
+
if (
|
735
|
+
isinstance(self, (CanCurate, Collection, Transform))
|
736
|
+
and settings.creation.search_names
|
737
|
+
and not has_consciously_provided_uid
|
738
|
+
):
|
739
|
+
name_field = getattr(self, "_name_field", "name")
|
740
|
+
exact_match = suggest_records_with_similar_names(
|
741
|
+
self, name_field, kwargs
|
742
|
+
)
|
743
|
+
if exact_match is not None:
|
744
|
+
if "version" in kwargs:
|
745
|
+
if kwargs["version"] is not None:
|
746
|
+
version_comment = " and version"
|
747
|
+
existing_record = self.__class__.filter(
|
748
|
+
**{
|
749
|
+
name_field: kwargs[name_field],
|
750
|
+
"version": kwargs["version"],
|
751
|
+
}
|
752
|
+
).one_or_none()
|
753
|
+
else:
|
754
|
+
# for a versioned record, an exact name match is not a criterion
|
755
|
+
# for retrieving a record in case `version` isn't passed -
|
756
|
+
# we'd always pull out many records with exactly the same name
|
757
|
+
existing_record = None
|
708
758
|
else:
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
else:
|
736
|
-
self._Model__clean_fields()
|
737
|
-
except DjangoValidationError as e:
|
738
|
-
message = _format_django_validation_error(self, e)
|
739
|
-
raise FieldValidationError(message) from e
|
759
|
+
version_comment = ""
|
760
|
+
existing_record = exact_match
|
761
|
+
if existing_record is not None:
|
762
|
+
logger.important(
|
763
|
+
f"returning existing {self.__class__.__name__} record with same"
|
764
|
+
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
765
|
+
)
|
766
|
+
if isinstance(self, Schema):
|
767
|
+
if existing_record.hash != kwargs["hash"]:
|
768
|
+
logger.warning(
|
769
|
+
f"You're updating schema {existing_record.uid}, which might already have been used to validate datasets. Be careful."
|
770
|
+
)
|
771
|
+
init_self_from_db(self, existing_record)
|
772
|
+
update_attributes(self, kwargs)
|
773
|
+
return None
|
774
|
+
super().__init__(**kwargs)
|
775
|
+
if isinstance(self, ValidateFields):
|
776
|
+
# this will trigger validation against django validators
|
777
|
+
try:
|
778
|
+
if hasattr(self, "clean_fields"):
|
779
|
+
self.clean_fields()
|
780
|
+
else:
|
781
|
+
self._Model__clean_fields()
|
782
|
+
except DjangoValidationError as e:
|
783
|
+
message = _format_django_validation_error(self, e)
|
784
|
+
raise FieldValidationError(message) from e
|
740
785
|
elif len(args) != len(self._meta.concrete_fields):
|
741
786
|
raise FieldValidationError(
|
742
787
|
f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
|
@@ -838,16 +883,20 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
838
883
|
if k != "run":
|
839
884
|
logger.important(f"{k} records: {', '.join(v)}")
|
840
885
|
|
841
|
-
if
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
886
|
+
if (
|
887
|
+
self.__class__.__name__
|
888
|
+
in {
|
889
|
+
"Artifact",
|
890
|
+
"Transform",
|
891
|
+
"Run",
|
892
|
+
"ULabel",
|
893
|
+
"Feature",
|
894
|
+
"Schema",
|
895
|
+
"Collection",
|
896
|
+
"Reference",
|
897
|
+
}
|
898
|
+
and self._branch_code >= 1
|
899
|
+
):
|
851
900
|
import lamindb as ln
|
852
901
|
|
853
902
|
if ln.context.project is not None:
|
@@ -1313,10 +1362,12 @@ def get_transfer_run(record) -> Run:
|
|
1313
1362
|
|
1314
1363
|
slug = record._state.db
|
1315
1364
|
owner, name = get_owner_name_from_identifier(slug)
|
1316
|
-
|
1317
|
-
|
1365
|
+
cache_using_filepath = (
|
1366
|
+
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
1367
|
+
)
|
1368
|
+
if not cache_using_filepath.exists():
|
1318
1369
|
raise SystemExit("Need to call .using() before")
|
1319
|
-
instance_uid =
|
1370
|
+
instance_uid = cache_using_filepath.read_text().split("\n")[0]
|
1320
1371
|
key = f"transfers/{instance_uid}"
|
1321
1372
|
uid = instance_uid + "0000"
|
1322
1373
|
transform = Transform.filter(uid=uid).one_or_none()
|
@@ -1399,11 +1450,13 @@ def transfer_to_default_db(
|
|
1399
1450
|
def track_current_key_and_name_values(record: Record):
|
1400
1451
|
from lamindb.models import Artifact
|
1401
1452
|
|
1453
|
+
# below, we're using __dict__ to avoid triggering the refresh from the database
|
1454
|
+
# which can lead to a recursion
|
1402
1455
|
if isinstance(record, Artifact):
|
1403
|
-
record._old_key = record.key
|
1404
|
-
record._old_suffix = record.suffix
|
1456
|
+
record._old_key = record.__dict__.get("key")
|
1457
|
+
record._old_suffix = record.__dict__.get("suffix")
|
1405
1458
|
elif hasattr(record, "_name_field"):
|
1406
|
-
record._old_name =
|
1459
|
+
record._old_name = record.__dict__.get(record._name_field)
|
1407
1460
|
|
1408
1461
|
|
1409
1462
|
def check_name_change(record: Record):
|
@@ -1437,9 +1490,6 @@ def check_name_change(record: Record):
|
|
1437
1490
|
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|
1438
1491
|
)
|
1439
1492
|
.exclude(feature_id=None) # must have a feature
|
1440
|
-
.exclude(
|
1441
|
-
feature_ref_is_name=None
|
1442
|
-
) # must be linked via Curator and therefore part of a schema
|
1443
1493
|
.distinct()
|
1444
1494
|
)
|
1445
1495
|
artifact_ids = linked_records.list("artifact__uid")
|
lamindb/models/run.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Any, overload
|
4
4
|
|
5
|
+
import numpy as np
|
5
6
|
from django.db import models
|
6
7
|
from django.db.models import (
|
7
8
|
CASCADE,
|
@@ -19,7 +20,7 @@ from lamindb.base.fields import (
|
|
19
20
|
ForeignKey,
|
20
21
|
)
|
21
22
|
from lamindb.base.users import current_user_id
|
22
|
-
from lamindb.errors import ValidationError
|
23
|
+
from lamindb.errors import InvalidArgument, ValidationError
|
23
24
|
|
24
25
|
from ..base.ids import base62_20
|
25
26
|
from .can_curate import CanCurate
|
@@ -33,6 +34,7 @@ if TYPE_CHECKING:
|
|
33
34
|
from .artifact import Artifact
|
34
35
|
from .collection import Collection
|
35
36
|
from .project import Project
|
37
|
+
from .query_set import QuerySet
|
36
38
|
from .schema import Schema
|
37
39
|
from .transform import Transform
|
38
40
|
from .ulabel import ULabel
|
@@ -538,6 +540,56 @@ class Run(Record):
|
|
538
540
|
delete_run_artifacts(self)
|
539
541
|
super().delete()
|
540
542
|
|
543
|
+
@classmethod
|
544
|
+
def filter(
|
545
|
+
cls,
|
546
|
+
*queries,
|
547
|
+
**expressions,
|
548
|
+
) -> QuerySet:
|
549
|
+
"""Query a set of artifacts.
|
550
|
+
|
551
|
+
Args:
|
552
|
+
*queries: `Q` expressions.
|
553
|
+
**expressions: Params, fields, and values passed via the Django query syntax.
|
554
|
+
|
555
|
+
See Also:
|
556
|
+
- Guide: :doc:`docs:registries`
|
557
|
+
|
558
|
+
Examples:
|
559
|
+
|
560
|
+
Query by fields::
|
561
|
+
|
562
|
+
ln.Run.filter(key="my_datasets/my_file.parquet")
|
563
|
+
|
564
|
+
Query by params::
|
565
|
+
|
566
|
+
ln.Run.filter(hyperparam_x=100)
|
567
|
+
"""
|
568
|
+
from ._feature_manager import filter_base
|
569
|
+
from .query_set import QuerySet
|
570
|
+
|
571
|
+
if expressions:
|
572
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
573
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
574
|
+
if field_or_feature_or_param in Run.__get_available_fields__():
|
575
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
576
|
+
elif all(
|
577
|
+
params_validated := Param.validate(
|
578
|
+
keys_normalized, field="name", mute=True
|
579
|
+
)
|
580
|
+
):
|
581
|
+
return filter_base(ParamManagerRun, **expressions)
|
582
|
+
else:
|
583
|
+
params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
|
584
|
+
message = f"param names: {params}"
|
585
|
+
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
586
|
+
raise InvalidArgument(
|
587
|
+
f"You can query either by available fields: {fields}\n"
|
588
|
+
f"Or fix invalid {message}"
|
589
|
+
)
|
590
|
+
else:
|
591
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
592
|
+
|
541
593
|
|
542
594
|
def delete_run_artifacts(run: Run) -> None:
|
543
595
|
environment = None
|
@@ -555,7 +607,9 @@ def delete_run_artifacts(run: Run) -> None:
|
|
555
607
|
if environment._environment_of.count() == 0:
|
556
608
|
environment.delete(permanent=True)
|
557
609
|
if report is not None:
|
558
|
-
|
610
|
+
# only delete if there are no other runs attached to this environment
|
611
|
+
if report._report_of.count() == 0:
|
612
|
+
report.delete(permanent=True)
|
559
613
|
|
560
614
|
|
561
615
|
class RunParamValue(BasicRecord, LinkORM):
|
lamindb/models/save.py
CHANGED
@@ -192,9 +192,7 @@ def copy_or_move_to_cache(
|
|
192
192
|
# non-local storage_path further
|
193
193
|
if local_path != cache_path:
|
194
194
|
if cache_path.exists():
|
195
|
-
logger.warning(
|
196
|
-
f"The cache path {cache_path.as_posix()} already exists, replacing it."
|
197
|
-
)
|
195
|
+
logger.warning(f"replacing the existing cache path {cache_path.as_posix()}")
|
198
196
|
if cache_path.is_dir():
|
199
197
|
shutil.rmtree(cache_path)
|
200
198
|
else:
|