lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +216 -133
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3559
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1546 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/models/__init__.py +12 -2
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +384 -301
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +354 -177
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +79 -56
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +78 -47
- lamindb/models/has_parents.py +24 -9
- lamindb/models/project.py +3 -3
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +251 -206
- lamindb/models/record.py +211 -344
- lamindb/models/run.py +59 -5
- lamindb/models/save.py +9 -5
- lamindb/models/schema.py +673 -196
- lamindb/models/transform.py +5 -14
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
- lamindb-1.5.0.dist-info/RECORD +108 -0
- lamindb-1.3.2.dist-info/RECORD +0 -95
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py
CHANGED
@@ -5,7 +5,6 @@ import inspect
|
|
5
5
|
import re
|
6
6
|
import sys
|
7
7
|
from collections import defaultdict
|
8
|
-
from functools import reduce
|
9
8
|
from itertools import chain
|
10
9
|
from pathlib import PurePosixPath
|
11
10
|
from typing import (
|
@@ -21,36 +20,15 @@ from typing import (
|
|
21
20
|
import dj_database_url
|
22
21
|
import lamindb_setup as ln_setup
|
23
22
|
from django.core.exceptions import ValidationError as DjangoValidationError
|
24
|
-
from django.db import IntegrityError, connections, models, transaction
|
25
|
-
from django.db.models import
|
26
|
-
CASCADE,
|
27
|
-
PROTECT,
|
28
|
-
Field,
|
29
|
-
IntegerField,
|
30
|
-
Manager,
|
31
|
-
Q,
|
32
|
-
QuerySet,
|
33
|
-
Value,
|
34
|
-
)
|
23
|
+
from django.db import IntegrityError, ProgrammingError, connections, models, transaction
|
24
|
+
from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
|
35
25
|
from django.db.models.base import ModelBase
|
36
26
|
from django.db.models.fields.related import (
|
37
27
|
ManyToManyField,
|
38
28
|
ManyToManyRel,
|
39
29
|
ManyToOneRel,
|
40
30
|
)
|
41
|
-
from django.db.models.functions import Cast, Coalesce
|
42
|
-
from django.db.models.lookups import (
|
43
|
-
Contains,
|
44
|
-
Exact,
|
45
|
-
IContains,
|
46
|
-
IExact,
|
47
|
-
IRegex,
|
48
|
-
IStartsWith,
|
49
|
-
Regex,
|
50
|
-
StartsWith,
|
51
|
-
)
|
52
31
|
from lamin_utils import colors, logger
|
53
|
-
from lamin_utils._lookup import Lookup
|
54
32
|
from lamindb_setup import settings as setup_settings
|
55
33
|
from lamindb_setup._connect_instance import (
|
56
34
|
get_owner_name_from_identifier,
|
@@ -60,25 +38,25 @@ from lamindb_setup._connect_instance import (
|
|
60
38
|
from lamindb_setup.core._docs import doc_args
|
61
39
|
from lamindb_setup.core._hub_core import connect_instance_hub
|
62
40
|
from lamindb_setup.core._settings_store import instance_settings_file
|
41
|
+
from lamindb_setup.core.django import DBToken, db_token_manager
|
63
42
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
64
43
|
|
65
|
-
from
|
66
|
-
from lamindb.base.fields import (
|
44
|
+
from ..base.fields import (
|
67
45
|
CharField,
|
68
46
|
DateTimeField,
|
69
47
|
ForeignKey,
|
70
48
|
JSONField,
|
71
|
-
TextField,
|
72
49
|
)
|
73
|
-
from
|
74
|
-
from lamindb.errors import FieldValidationError
|
75
|
-
|
50
|
+
from ..base.types import FieldAttr, StrField
|
76
51
|
from ..errors import (
|
52
|
+
FieldValidationError,
|
77
53
|
InvalidArgument,
|
54
|
+
NoWriteAccess,
|
78
55
|
RecordNameChangeIntegrityError,
|
79
56
|
ValidationError,
|
80
57
|
)
|
81
58
|
from ._is_versioned import IsVersioned
|
59
|
+
from .query_manager import QueryManager, _lookup, _search
|
82
60
|
|
83
61
|
if TYPE_CHECKING:
|
84
62
|
from datetime import datetime
|
@@ -183,14 +161,19 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
183
161
|
|
184
162
|
def update_attributes(record: Record, attributes: dict[str, str]):
|
185
163
|
for key, value in attributes.items():
|
186
|
-
if (
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
164
|
+
if getattr(record, key) != value and value is not None:
|
165
|
+
if key not in {"uid", "dtype", "otype", "hash"}:
|
166
|
+
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
167
|
+
setattr(record, key, value)
|
168
|
+
else:
|
169
|
+
hash_message = (
|
170
|
+
"recomputing on .save()"
|
171
|
+
if key == "hash"
|
172
|
+
else f"keeping {getattr(record, key)}"
|
173
|
+
)
|
174
|
+
logger.warning(
|
175
|
+
f"ignoring tentative value {value} for {key}, {hash_message}"
|
176
|
+
)
|
194
177
|
|
195
178
|
|
196
179
|
def validate_literal_fields(record: Record, kwargs) -> None:
|
@@ -278,9 +261,12 @@ def validate_fields(record: Record, kwargs):
|
|
278
261
|
"uid"
|
279
262
|
).max_length # triggers FieldDoesNotExist
|
280
263
|
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
281
|
-
|
282
|
-
|
283
|
-
)
|
264
|
+
if not (
|
265
|
+
record.__class__ is Schema and len(kwargs["uid"]) == 16
|
266
|
+
): # no error for schema
|
267
|
+
raise ValidationError(
|
268
|
+
f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
|
269
|
+
)
|
284
270
|
# validate is_type
|
285
271
|
if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
|
286
272
|
if kwargs["name"].endswith("s"):
|
@@ -370,6 +356,8 @@ class Registry(ModelBase):
|
|
370
356
|
Note: `Registry` inherits from Django's `ModelBase`.
|
371
357
|
"""
|
372
358
|
|
359
|
+
_available_fields: set[str] = None
|
360
|
+
|
373
361
|
def __new__(cls, name, bases, attrs, **kwargs):
|
374
362
|
new_class = super().__new__(cls, name, bases, attrs, **kwargs)
|
375
363
|
return new_class
|
@@ -411,36 +399,13 @@ class Registry(ModelBase):
|
|
411
399
|
def __repr__(cls) -> str:
|
412
400
|
return registry_repr(cls)
|
413
401
|
|
402
|
+
@doc_args(_lookup.__doc__)
|
414
403
|
def lookup(
|
415
404
|
cls,
|
416
405
|
field: StrField | None = None,
|
417
406
|
return_field: StrField | None = None,
|
418
407
|
) -> NamedTuple:
|
419
|
-
"""
|
420
|
-
|
421
|
-
Args:
|
422
|
-
field: The field to look up the values for. Defaults to first string field.
|
423
|
-
return_field: The field to return. If `None`, returns the whole record.
|
424
|
-
|
425
|
-
Returns:
|
426
|
-
A `NamedTuple` of lookup information of the field values with a
|
427
|
-
dictionary converter.
|
428
|
-
|
429
|
-
See Also:
|
430
|
-
:meth:`~lamindb.models.Record.search`
|
431
|
-
|
432
|
-
Examples:
|
433
|
-
>>> import bionty as bt
|
434
|
-
>>> bt.settings.organism = "human"
|
435
|
-
>>> bt.Gene.from_source(symbol="ADGB-DT").save()
|
436
|
-
>>> lookup = bt.Gene.lookup()
|
437
|
-
>>> lookup.adgb_dt
|
438
|
-
>>> lookup_dict = lookup.dict()
|
439
|
-
>>> lookup_dict['ADGB-DT']
|
440
|
-
>>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
|
441
|
-
>>> genes.ensg00000002745
|
442
|
-
>>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
|
443
|
-
"""
|
408
|
+
"""{}""" # noqa: D415
|
444
409
|
return _lookup(cls=cls, field=field, return_field=return_field)
|
445
410
|
|
446
411
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
@@ -487,10 +452,12 @@ class Registry(ModelBase):
|
|
487
452
|
- Guide: :doc:`docs:registries`
|
488
453
|
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
489
454
|
|
490
|
-
Examples
|
455
|
+
Examples:
|
456
|
+
|
457
|
+
::
|
491
458
|
|
492
|
-
|
493
|
-
|
459
|
+
ulabel = ln.ULabel.get("FvtpPJLJ")
|
460
|
+
ulabel = ln.ULabel.get(name="my-label")
|
494
461
|
"""
|
495
462
|
from .query_set import QuerySet
|
496
463
|
|
@@ -538,6 +505,7 @@ class Registry(ModelBase):
|
|
538
505
|
query_set = query_set.order_by("-updated_at")
|
539
506
|
return query_set[:limit].df(include=include, features=features)
|
540
507
|
|
508
|
+
@doc_args(_search.__doc__)
|
541
509
|
def search(
|
542
510
|
cls,
|
543
511
|
string: str,
|
@@ -546,27 +514,7 @@ class Registry(ModelBase):
|
|
546
514
|
limit: int | None = 20,
|
547
515
|
case_sensitive: bool = False,
|
548
516
|
) -> QuerySet:
|
549
|
-
"""
|
550
|
-
|
551
|
-
Args:
|
552
|
-
string: The input string to match against the field ontology values.
|
553
|
-
field: The field or fields to search. Search all string fields by default.
|
554
|
-
limit: Maximum amount of top results to return.
|
555
|
-
case_sensitive: Whether the match is case sensitive.
|
556
|
-
|
557
|
-
Returns:
|
558
|
-
A sorted `DataFrame` of search results with a score in column `score`.
|
559
|
-
If `return_queryset` is `True`. `QuerySet`.
|
560
|
-
|
561
|
-
See Also:
|
562
|
-
:meth:`~lamindb.models.Record.filter`
|
563
|
-
:meth:`~lamindb.models.Record.lookup`
|
564
|
-
|
565
|
-
Examples:
|
566
|
-
>>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
|
567
|
-
>>> ln.save(ulabels)
|
568
|
-
>>> ln.ULabel.search("ULabel2")
|
569
|
-
"""
|
517
|
+
"""{}""" # noqa: D415
|
570
518
|
return _search(
|
571
519
|
cls=cls,
|
572
520
|
string=string,
|
@@ -594,17 +542,21 @@ class Registry(ModelBase):
|
|
594
542
|
"""
|
595
543
|
from .query_set import QuerySet
|
596
544
|
|
597
|
-
|
545
|
+
# connection already established
|
546
|
+
if instance in connections:
|
547
|
+
return QuerySet(model=cls, using=instance)
|
548
|
+
# we're in the default instance
|
549
|
+
if instance is None or instance == "default":
|
598
550
|
return QuerySet(model=cls, using=None)
|
599
|
-
|
600
551
|
owner, name = get_owner_name_from_identifier(instance)
|
601
|
-
if
|
552
|
+
if [owner, name] == setup_settings.instance.slug.split("/"):
|
602
553
|
return QuerySet(model=cls, using=None)
|
603
554
|
|
604
|
-
|
605
|
-
|
606
|
-
|
555
|
+
# move on to different instances
|
556
|
+
cache_using_filepath = (
|
557
|
+
setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
607
558
|
)
|
559
|
+
settings_file = instance_settings_file(name, owner)
|
608
560
|
if not settings_file.exists():
|
609
561
|
result = connect_instance_hub(owner=owner, name=name)
|
610
562
|
if isinstance(result, str):
|
@@ -613,24 +565,46 @@ class Registry(ModelBase):
|
|
613
565
|
)
|
614
566
|
iresult, _ = result
|
615
567
|
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
616
|
-
# are empty and then triggers a TypeError in missing_members =
|
617
|
-
|
568
|
+
# are empty and then triggers a TypeError in missing_members = source_modules - target_modules
|
569
|
+
source_modules = set( # noqa
|
618
570
|
[mod for mod in iresult["schema_str"].split(",") if mod != ""]
|
619
571
|
)
|
620
|
-
|
621
|
-
if not source_module.issubset(target_module):
|
622
|
-
missing_members = source_module - target_module
|
623
|
-
logger.warning(
|
624
|
-
f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
|
625
|
-
)
|
626
|
-
cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
|
627
|
-
settings_file = instance_settings_file(name, owner)
|
572
|
+
# this just retrives the full connection string from iresult
|
628
573
|
db = update_db_using_local(iresult, settings_file)
|
574
|
+
cache_using_filepath.write_text(
|
575
|
+
f"{iresult['lnid']}\n{iresult['schema_str']}"
|
576
|
+
)
|
577
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
578
|
+
is_fine_grained_access = (
|
579
|
+
iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
|
580
|
+
)
|
581
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
582
|
+
into_db_token = iresult
|
629
583
|
else:
|
630
584
|
isettings = load_instance_settings(settings_file)
|
585
|
+
source_modules = isettings.modules
|
631
586
|
db = isettings.db
|
632
|
-
|
587
|
+
cache_using_filepath.write_text(
|
588
|
+
f"{isettings.uid}\n{','.join(source_modules)}"
|
589
|
+
)
|
590
|
+
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
591
|
+
is_fine_grained_access = (
|
592
|
+
isettings._fine_grained_access and isettings._db_permissions == "jwt"
|
593
|
+
)
|
594
|
+
# access_db can take both: the dict from connect_instance_hub and isettings
|
595
|
+
into_db_token = isettings
|
596
|
+
|
597
|
+
target_modules = setup_settings.instance.modules
|
598
|
+
if missing_members := source_modules - target_modules:
|
599
|
+
logger.warning(
|
600
|
+
f"source modules has additional modules: {missing_members}\n"
|
601
|
+
"consider mounting these registry modules to transfer all metadata"
|
602
|
+
)
|
603
|
+
|
633
604
|
add_db_connection(db, instance)
|
605
|
+
if is_fine_grained_access:
|
606
|
+
db_token = DBToken(into_db_token)
|
607
|
+
db_token_manager.set(db_token, instance)
|
634
608
|
return QuerySet(model=cls, using=instance)
|
635
609
|
|
636
610
|
def __get_module_name__(cls) -> str:
|
@@ -640,10 +614,6 @@ class Registry(ModelBase):
|
|
640
614
|
module_name = "core"
|
641
615
|
return module_name
|
642
616
|
|
643
|
-
@deprecated("__get_module_name__")
|
644
|
-
def __get_schema_name__(cls) -> str:
|
645
|
-
return cls.__get_module_name__()
|
646
|
-
|
647
617
|
def __get_name_with_module__(cls) -> str:
|
648
618
|
module_name = cls.__get_module_name__()
|
649
619
|
if module_name == "core":
|
@@ -652,9 +622,19 @@ class Registry(ModelBase):
|
|
652
622
|
module_prefix = f"{module_name}."
|
653
623
|
return f"{module_prefix}{cls.__name__}"
|
654
624
|
|
655
|
-
|
656
|
-
|
657
|
-
|
625
|
+
def __get_available_fields__(cls) -> set[str]:
|
626
|
+
if cls._available_fields is None:
|
627
|
+
cls._available_fields = {
|
628
|
+
f.name
|
629
|
+
for f in cls._meta.get_fields()
|
630
|
+
if not f.name.startswith("_")
|
631
|
+
and not f.name.startswith("links_")
|
632
|
+
and not f.name.endswith("_id")
|
633
|
+
}
|
634
|
+
if cls.__name__ == "Artifact":
|
635
|
+
cls._available_fields.add("visibility")
|
636
|
+
cls._available_fields.add("transform")
|
637
|
+
return cls._available_fields
|
658
638
|
|
659
639
|
|
660
640
|
class BasicRecord(models.Model, metaclass=Registry):
|
@@ -665,80 +645,89 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
665
645
|
It's mainly used for LinkORMs and similar.
|
666
646
|
"""
|
667
647
|
|
648
|
+
objects = QueryManager()
|
649
|
+
|
668
650
|
class Meta:
|
669
651
|
abstract = True
|
652
|
+
base_manager_name = "objects"
|
670
653
|
|
671
654
|
def __init__(self, *args, **kwargs):
|
672
655
|
skip_validation = kwargs.pop("_skip_validation", False)
|
673
|
-
if not args
|
674
|
-
super().__init__(**kwargs)
|
675
|
-
elif not args and not skip_validation:
|
676
|
-
from ..core._settings import settings
|
677
|
-
from .can_curate import CanCurate
|
678
|
-
from .collection import Collection
|
679
|
-
from .schema import Schema
|
680
|
-
from .transform import Transform
|
681
|
-
|
682
|
-
validate_fields(self, kwargs)
|
683
|
-
|
684
|
-
# do not search for names if an id is passed; this is important
|
685
|
-
# e.g. when synching ids from the notebook store to lamindb
|
686
|
-
has_consciously_provided_uid = False
|
687
|
-
if "_has_consciously_provided_uid" in kwargs:
|
688
|
-
has_consciously_provided_uid = kwargs.pop(
|
689
|
-
"_has_consciously_provided_uid"
|
690
|
-
)
|
656
|
+
if not args:
|
691
657
|
if (
|
692
|
-
|
693
|
-
and
|
694
|
-
|
658
|
+
issubclass(self.__class__, Record)
|
659
|
+
and self.__class__.__name__
|
660
|
+
not in {"Storage", "ULabel", "Feature", "Schema", "Param"}
|
661
|
+
# do not save bionty entities in restricted spaces by default
|
662
|
+
and self.__class__.__module__ != "bionty.models"
|
695
663
|
):
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
664
|
+
from lamindb import context as run_context
|
665
|
+
|
666
|
+
if run_context.space is not None:
|
667
|
+
kwargs["space"] = run_context.space
|
668
|
+
if skip_validation:
|
669
|
+
super().__init__(**kwargs)
|
670
|
+
else:
|
671
|
+
from ..core._settings import settings
|
672
|
+
from .can_curate import CanCurate
|
673
|
+
from .collection import Collection
|
674
|
+
from .transform import Transform
|
675
|
+
|
676
|
+
validate_fields(self, kwargs)
|
677
|
+
|
678
|
+
# do not search for names if an id is passed; this is important
|
679
|
+
# e.g. when synching ids from the notebook store to lamindb
|
680
|
+
has_consciously_provided_uid = False
|
681
|
+
if "_has_consciously_provided_uid" in kwargs:
|
682
|
+
has_consciously_provided_uid = kwargs.pop(
|
683
|
+
"_has_consciously_provided_uid"
|
684
|
+
)
|
685
|
+
if (
|
686
|
+
isinstance(self, (CanCurate, Collection, Transform))
|
687
|
+
and settings.creation.search_names
|
688
|
+
and not has_consciously_provided_uid
|
689
|
+
):
|
690
|
+
name_field = getattr(self, "_name_field", "name")
|
691
|
+
exact_match = suggest_records_with_similar_names(
|
692
|
+
self, name_field, kwargs
|
693
|
+
)
|
694
|
+
if exact_match is not None:
|
695
|
+
if "version" in kwargs:
|
696
|
+
if kwargs["version"] is not None:
|
697
|
+
version_comment = " and version"
|
698
|
+
existing_record = self.__class__.filter(
|
699
|
+
**{
|
700
|
+
name_field: kwargs[name_field],
|
701
|
+
"version": kwargs["version"],
|
702
|
+
}
|
703
|
+
).one_or_none()
|
704
|
+
else:
|
705
|
+
# for a versioned record, an exact name match is not a criterion
|
706
|
+
# for retrieving a record in case `version` isn't passed -
|
707
|
+
# we'd always pull out many records with exactly the same name
|
708
|
+
existing_record = None
|
710
709
|
else:
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
if isinstance(self, ValidateFields):
|
733
|
-
# this will trigger validation against django validators
|
734
|
-
try:
|
735
|
-
if hasattr(self, "clean_fields"):
|
736
|
-
self.clean_fields()
|
737
|
-
else:
|
738
|
-
self._Model__clean_fields()
|
739
|
-
except DjangoValidationError as e:
|
740
|
-
message = _format_django_validation_error(self, e)
|
741
|
-
raise FieldValidationError(message) from e
|
710
|
+
version_comment = ""
|
711
|
+
existing_record = exact_match
|
712
|
+
if existing_record is not None:
|
713
|
+
logger.important(
|
714
|
+
f"returning existing {self.__class__.__name__} record with same"
|
715
|
+
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
716
|
+
)
|
717
|
+
init_self_from_db(self, existing_record)
|
718
|
+
update_attributes(self, kwargs)
|
719
|
+
return None
|
720
|
+
super().__init__(**kwargs)
|
721
|
+
if isinstance(self, ValidateFields):
|
722
|
+
# this will trigger validation against django validators
|
723
|
+
try:
|
724
|
+
if hasattr(self, "clean_fields"):
|
725
|
+
self.clean_fields()
|
726
|
+
else:
|
727
|
+
self._Model__clean_fields()
|
728
|
+
except DjangoValidationError as e:
|
729
|
+
message = _format_django_validation_error(self, e)
|
730
|
+
raise FieldValidationError(message) from e
|
742
731
|
elif len(args) != len(self._meta.concrete_fields):
|
743
732
|
raise FieldValidationError(
|
744
733
|
f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
|
@@ -797,20 +786,33 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
797
786
|
# save unversioned record
|
798
787
|
else:
|
799
788
|
super().save(*args, **kwargs)
|
800
|
-
except IntegrityError as e:
|
789
|
+
except (IntegrityError, ProgrammingError) as e:
|
801
790
|
error_msg = str(e)
|
802
791
|
# two possible error messages for hash duplication
|
803
792
|
# "duplicate key value violates unique constraint"
|
804
793
|
# "UNIQUE constraint failed"
|
805
794
|
if (
|
806
|
-
|
807
|
-
|
808
|
-
|
795
|
+
isinstance(e, IntegrityError)
|
796
|
+
and "hash" in error_msg
|
797
|
+
and (
|
798
|
+
"UNIQUE constraint failed" in error_msg
|
799
|
+
or "duplicate key value violates unique constraint" in error_msg
|
800
|
+
)
|
801
|
+
):
|
809
802
|
pre_existing_record = self.__class__.get(hash=self.hash)
|
810
803
|
logger.warning(
|
811
804
|
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
812
805
|
)
|
813
806
|
init_self_from_db(self, pre_existing_record)
|
807
|
+
elif (
|
808
|
+
isinstance(e, ProgrammingError)
|
809
|
+
and hasattr(self, "space")
|
810
|
+
and "new row violates row-level security policy" in error_msg
|
811
|
+
):
|
812
|
+
raise NoWriteAccess(
|
813
|
+
f"You’re not allowed to write to the space '{self.space.name}'.\n"
|
814
|
+
"Please contact an administrator of the space if you need write access."
|
815
|
+
) from None
|
814
816
|
else:
|
815
817
|
raise
|
816
818
|
# call the below in case a user makes more updates to the record
|
@@ -840,16 +842,20 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
840
842
|
if k != "run":
|
841
843
|
logger.important(f"{k} records: {', '.join(v)}")
|
842
844
|
|
843
|
-
if
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
845
|
+
if (
|
846
|
+
self.__class__.__name__
|
847
|
+
in {
|
848
|
+
"Artifact",
|
849
|
+
"Transform",
|
850
|
+
"Run",
|
851
|
+
"ULabel",
|
852
|
+
"Feature",
|
853
|
+
"Schema",
|
854
|
+
"Collection",
|
855
|
+
"Reference",
|
856
|
+
}
|
857
|
+
and self._branch_code >= 1
|
858
|
+
):
|
853
859
|
import lamindb as ln
|
854
860
|
|
855
861
|
if ln.context.project is not None:
|
@@ -882,7 +888,7 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
882
888
|
|
883
889
|
|
884
890
|
class Space(BasicRecord):
|
885
|
-
"""Spaces.
|
891
|
+
"""Spaces to restrict access to records to specific users or teams.
|
886
892
|
|
887
893
|
You can use spaces to restrict access to records within an instance.
|
888
894
|
|
@@ -1063,146 +1069,6 @@ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
|
1063
1069
|
return []
|
1064
1070
|
|
1065
1071
|
|
1066
|
-
def _search(
|
1067
|
-
cls,
|
1068
|
-
string: str,
|
1069
|
-
*,
|
1070
|
-
field: StrField | list[StrField] | None = None,
|
1071
|
-
limit: int | None = 20,
|
1072
|
-
case_sensitive: bool = False,
|
1073
|
-
truncate_string: bool = False,
|
1074
|
-
) -> QuerySet:
|
1075
|
-
if string is None:
|
1076
|
-
raise ValueError("Cannot search for None value! Please pass a valid string.")
|
1077
|
-
|
1078
|
-
input_queryset = (
|
1079
|
-
cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1080
|
-
)
|
1081
|
-
registry = input_queryset.model
|
1082
|
-
name_field = getattr(registry, "_name_field", "name")
|
1083
|
-
if field is None:
|
1084
|
-
fields = [
|
1085
|
-
field.name
|
1086
|
-
for field in registry._meta.fields
|
1087
|
-
if field.get_internal_type() in {"CharField", "TextField"}
|
1088
|
-
]
|
1089
|
-
else:
|
1090
|
-
if not isinstance(field, list):
|
1091
|
-
fields_input = [field]
|
1092
|
-
else:
|
1093
|
-
fields_input = field
|
1094
|
-
fields = []
|
1095
|
-
for field in fields_input:
|
1096
|
-
if not isinstance(field, str):
|
1097
|
-
try:
|
1098
|
-
fields.append(field.field.name)
|
1099
|
-
except AttributeError as error:
|
1100
|
-
raise TypeError(
|
1101
|
-
"Please pass a Record string field, e.g., `CellType.name`!"
|
1102
|
-
) from error
|
1103
|
-
else:
|
1104
|
-
fields.append(field)
|
1105
|
-
|
1106
|
-
if truncate_string:
|
1107
|
-
if (len_string := len(string)) > 5:
|
1108
|
-
n_80_pct = int(len_string * 0.8)
|
1109
|
-
string = string[:n_80_pct]
|
1110
|
-
|
1111
|
-
string = string.strip()
|
1112
|
-
string_escape = re.escape(string)
|
1113
|
-
|
1114
|
-
exact_lookup = Exact if case_sensitive else IExact
|
1115
|
-
regex_lookup = Regex if case_sensitive else IRegex
|
1116
|
-
contains_lookup = Contains if case_sensitive else IContains
|
1117
|
-
|
1118
|
-
ranks = []
|
1119
|
-
contains_filters = []
|
1120
|
-
for field in fields:
|
1121
|
-
field_expr = Coalesce(
|
1122
|
-
Cast(field, output_field=TextField()),
|
1123
|
-
Value(""),
|
1124
|
-
output_field=TextField(),
|
1125
|
-
)
|
1126
|
-
# exact rank
|
1127
|
-
exact_expr = exact_lookup(field_expr, string)
|
1128
|
-
exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
|
1129
|
-
ranks.append(exact_rank)
|
1130
|
-
# exact synonym
|
1131
|
-
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
|
1132
|
-
synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
|
1133
|
-
ranks.append(synonym_rank)
|
1134
|
-
# match as sub-phrase
|
1135
|
-
sub_expr = regex_lookup(
|
1136
|
-
field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
|
1137
|
-
)
|
1138
|
-
sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
|
1139
|
-
ranks.append(sub_rank)
|
1140
|
-
# startswith and avoid matching string with " " on the right
|
1141
|
-
# mostly for truncated
|
1142
|
-
startswith_expr = regex_lookup(
|
1143
|
-
field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
|
1144
|
-
)
|
1145
|
-
startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
|
1146
|
-
ranks.append(startswith_rank)
|
1147
|
-
# match as sub-phrase from the left, mostly for truncated
|
1148
|
-
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
|
1149
|
-
right_rank = Cast(right_expr, output_field=IntegerField()) * 2
|
1150
|
-
ranks.append(right_rank)
|
1151
|
-
# match as sub-phrase from the right
|
1152
|
-
left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
|
1153
|
-
left_rank = Cast(left_expr, output_field=IntegerField()) * 2
|
1154
|
-
ranks.append(left_rank)
|
1155
|
-
# simple contains filter
|
1156
|
-
contains_expr = contains_lookup(field_expr, string)
|
1157
|
-
contains_filter = Q(contains_expr)
|
1158
|
-
contains_filters.append(contains_filter)
|
1159
|
-
# also rank by contains
|
1160
|
-
contains_rank = Cast(contains_expr, output_field=IntegerField())
|
1161
|
-
ranks.append(contains_rank)
|
1162
|
-
# additional rule for truncated strings
|
1163
|
-
# weight matches from the beginning of the string higher
|
1164
|
-
# sometimes whole words get truncated and startswith_expr is not enough
|
1165
|
-
if truncate_string and field == name_field:
|
1166
|
-
startswith_lookup = StartsWith if case_sensitive else IStartsWith
|
1167
|
-
name_startswith_expr = startswith_lookup(field_expr, string)
|
1168
|
-
name_startswith_rank = (
|
1169
|
-
Cast(name_startswith_expr, output_field=IntegerField()) * 2
|
1170
|
-
)
|
1171
|
-
ranks.append(name_startswith_rank)
|
1172
|
-
|
1173
|
-
ranked_queryset = (
|
1174
|
-
input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
|
1175
|
-
.alias(rank=sum(ranks))
|
1176
|
-
.order_by("-rank")
|
1177
|
-
)
|
1178
|
-
|
1179
|
-
return ranked_queryset[:limit]
|
1180
|
-
|
1181
|
-
|
1182
|
-
def _lookup(
|
1183
|
-
cls,
|
1184
|
-
field: StrField | None = None,
|
1185
|
-
return_field: StrField | None = None,
|
1186
|
-
using_key: str | None = None,
|
1187
|
-
) -> NamedTuple:
|
1188
|
-
"""{}""" # noqa: D415
|
1189
|
-
queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1190
|
-
field = get_name_field(registry=queryset.model, field=field)
|
1191
|
-
|
1192
|
-
return Lookup(
|
1193
|
-
records=queryset,
|
1194
|
-
values=[i.get(field) for i in queryset.values()],
|
1195
|
-
tuple_name=cls.__class__.__name__,
|
1196
|
-
prefix="ln",
|
1197
|
-
).lookup(
|
1198
|
-
return_field=(
|
1199
|
-
get_name_field(registry=queryset.model, field=return_field)
|
1200
|
-
if return_field is not None
|
1201
|
-
else None
|
1202
|
-
)
|
1203
|
-
)
|
1204
|
-
|
1205
|
-
|
1206
1072
|
def get_name_field(
|
1207
1073
|
registry: type[Record] | QuerySet | Manager,
|
1208
1074
|
*,
|
@@ -1315,10 +1181,12 @@ def get_transfer_run(record) -> Run:
|
|
1315
1181
|
|
1316
1182
|
slug = record._state.db
|
1317
1183
|
owner, name = get_owner_name_from_identifier(slug)
|
1318
|
-
|
1319
|
-
|
1184
|
+
cache_using_filepath = (
|
1185
|
+
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
1186
|
+
)
|
1187
|
+
if not cache_using_filepath.exists():
|
1320
1188
|
raise SystemExit("Need to call .using() before")
|
1321
|
-
instance_uid =
|
1189
|
+
instance_uid = cache_using_filepath.read_text().split("\n")[0]
|
1322
1190
|
key = f"transfers/{instance_uid}"
|
1323
1191
|
uid = instance_uid + "0000"
|
1324
1192
|
transform = Transform.filter(uid=uid).one_or_none()
|
@@ -1401,11 +1269,13 @@ def transfer_to_default_db(
|
|
1401
1269
|
def track_current_key_and_name_values(record: Record):
|
1402
1270
|
from lamindb.models import Artifact
|
1403
1271
|
|
1272
|
+
# below, we're using __dict__ to avoid triggering the refresh from the database
|
1273
|
+
# which can lead to a recursion
|
1404
1274
|
if isinstance(record, Artifact):
|
1405
|
-
record._old_key = record.key
|
1406
|
-
record._old_suffix = record.suffix
|
1275
|
+
record._old_key = record.__dict__.get("key")
|
1276
|
+
record._old_suffix = record.__dict__.get("suffix")
|
1407
1277
|
elif hasattr(record, "_name_field"):
|
1408
|
-
record._old_name =
|
1278
|
+
record._old_name = record.__dict__.get(record._name_field)
|
1409
1279
|
|
1410
1280
|
|
1411
1281
|
def check_name_change(record: Record):
|
@@ -1439,9 +1309,6 @@ def check_name_change(record: Record):
|
|
1439
1309
|
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|
1440
1310
|
)
|
1441
1311
|
.exclude(feature_id=None) # must have a feature
|
1442
|
-
.exclude(
|
1443
|
-
feature_ref_is_name=None
|
1444
|
-
) # must be linked via Curator and therefore part of a schema
|
1445
1312
|
.distinct()
|
1446
1313
|
)
|
1447
1314
|
artifact_ids = linked_records.list("artifact__uid")
|