lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +203 -102
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/_tiledbsoma.py +29 -13
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3349
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1534 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +382 -287
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +177 -106
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/collection.py +73 -52
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +51 -17
- lamindb/models/has_parents.py +69 -14
- lamindb/models/project.py +1 -1
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +247 -172
- lamindb/models/record.py +65 -247
- lamindb/models/run.py +4 -4
- lamindb/models/save.py +8 -2
- lamindb/models/schema.py +456 -184
- lamindb/models/transform.py +2 -2
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
lamindb/models/record.py
CHANGED
@@ -5,7 +5,6 @@ import inspect
|
|
5
5
|
import re
|
6
6
|
import sys
|
7
7
|
from collections import defaultdict
|
8
|
-
from functools import reduce
|
9
8
|
from itertools import chain
|
10
9
|
from pathlib import PurePosixPath
|
11
10
|
from typing import (
|
@@ -21,36 +20,15 @@ from typing import (
|
|
21
20
|
import dj_database_url
|
22
21
|
import lamindb_setup as ln_setup
|
23
22
|
from django.core.exceptions import ValidationError as DjangoValidationError
|
24
|
-
from django.db import IntegrityError, connections, models, transaction
|
25
|
-
from django.db.models import
|
26
|
-
CASCADE,
|
27
|
-
PROTECT,
|
28
|
-
Field,
|
29
|
-
IntegerField,
|
30
|
-
Manager,
|
31
|
-
Q,
|
32
|
-
QuerySet,
|
33
|
-
Value,
|
34
|
-
)
|
23
|
+
from django.db import IntegrityError, ProgrammingError, connections, models, transaction
|
24
|
+
from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
|
35
25
|
from django.db.models.base import ModelBase
|
36
26
|
from django.db.models.fields.related import (
|
37
27
|
ManyToManyField,
|
38
28
|
ManyToManyRel,
|
39
29
|
ManyToOneRel,
|
40
30
|
)
|
41
|
-
from django.db.models.functions import Cast, Coalesce
|
42
|
-
from django.db.models.lookups import (
|
43
|
-
Contains,
|
44
|
-
Exact,
|
45
|
-
IContains,
|
46
|
-
IExact,
|
47
|
-
IRegex,
|
48
|
-
IStartsWith,
|
49
|
-
Regex,
|
50
|
-
StartsWith,
|
51
|
-
)
|
52
31
|
from lamin_utils import colors, logger
|
53
|
-
from lamin_utils._lookup import Lookup
|
54
32
|
from lamindb_setup import settings as setup_settings
|
55
33
|
from lamindb_setup._connect_instance import (
|
56
34
|
get_owner_name_from_identifier,
|
@@ -58,27 +36,27 @@ from lamindb_setup._connect_instance import (
|
|
58
36
|
update_db_using_local,
|
59
37
|
)
|
60
38
|
from lamindb_setup.core._docs import doc_args
|
61
|
-
from lamindb_setup.core._hub_core import
|
39
|
+
from lamindb_setup.core._hub_core import connect_instance_hub
|
62
40
|
from lamindb_setup.core._settings_store import instance_settings_file
|
63
|
-
from lamindb_setup.core.django import db_token_manager
|
41
|
+
from lamindb_setup.core.django import DBToken, db_token_manager
|
64
42
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
65
43
|
|
66
|
-
from
|
44
|
+
from ..base.fields import (
|
67
45
|
CharField,
|
68
46
|
DateTimeField,
|
69
47
|
ForeignKey,
|
70
48
|
JSONField,
|
71
|
-
TextField,
|
72
49
|
)
|
73
|
-
from
|
74
|
-
from lamindb.errors import FieldValidationError
|
75
|
-
|
50
|
+
from ..base.types import FieldAttr, StrField
|
76
51
|
from ..errors import (
|
52
|
+
FieldValidationError,
|
77
53
|
InvalidArgument,
|
54
|
+
NoWriteAccess,
|
78
55
|
RecordNameChangeIntegrityError,
|
79
56
|
ValidationError,
|
80
57
|
)
|
81
58
|
from ._is_versioned import IsVersioned
|
59
|
+
from .query_manager import QueryManager, _lookup, _search
|
82
60
|
|
83
61
|
if TYPE_CHECKING:
|
84
62
|
from datetime import datetime
|
@@ -183,13 +161,19 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
183
161
|
|
184
162
|
def update_attributes(record: Record, attributes: dict[str, str]):
|
185
163
|
for key, value in attributes.items():
|
186
|
-
if (
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
164
|
+
if getattr(record, key) != value and value is not None:
|
165
|
+
if key not in {"uid", "dtype", "otype", "hash"}:
|
166
|
+
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
167
|
+
setattr(record, key, value)
|
168
|
+
else:
|
169
|
+
hash_message = (
|
170
|
+
"recomputing on .save()"
|
171
|
+
if key == "hash"
|
172
|
+
else f"keeping {getattr(record, key)}"
|
173
|
+
)
|
174
|
+
logger.warning(
|
175
|
+
f"ignoring tentative value {value} for {key}, {hash_message}"
|
176
|
+
)
|
193
177
|
|
194
178
|
|
195
179
|
def validate_literal_fields(record: Record, kwargs) -> None:
|
@@ -277,9 +261,12 @@ def validate_fields(record: Record, kwargs):
|
|
277
261
|
"uid"
|
278
262
|
).max_length # triggers FieldDoesNotExist
|
279
263
|
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
280
|
-
|
281
|
-
|
282
|
-
)
|
264
|
+
if not (
|
265
|
+
record.__class__ is Schema and len(kwargs["uid"]) == 16
|
266
|
+
): # no error for schema
|
267
|
+
raise ValidationError(
|
268
|
+
f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
|
269
|
+
)
|
283
270
|
# validate is_type
|
284
271
|
if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
|
285
272
|
if kwargs["name"].endswith("s"):
|
@@ -412,36 +399,13 @@ class Registry(ModelBase):
|
|
412
399
|
def __repr__(cls) -> str:
|
413
400
|
return registry_repr(cls)
|
414
401
|
|
402
|
+
@doc_args(_lookup.__doc__)
|
415
403
|
def lookup(
|
416
404
|
cls,
|
417
405
|
field: StrField | None = None,
|
418
406
|
return_field: StrField | None = None,
|
419
407
|
) -> NamedTuple:
|
420
|
-
"""
|
421
|
-
|
422
|
-
Args:
|
423
|
-
field: The field to look up the values for. Defaults to first string field.
|
424
|
-
return_field: The field to return. If `None`, returns the whole record.
|
425
|
-
|
426
|
-
Returns:
|
427
|
-
A `NamedTuple` of lookup information of the field values with a
|
428
|
-
dictionary converter.
|
429
|
-
|
430
|
-
See Also:
|
431
|
-
:meth:`~lamindb.models.Record.search`
|
432
|
-
|
433
|
-
Examples:
|
434
|
-
>>> import bionty as bt
|
435
|
-
>>> bt.settings.organism = "human"
|
436
|
-
>>> bt.Gene.from_source(symbol="ADGB-DT").save()
|
437
|
-
>>> lookup = bt.Gene.lookup()
|
438
|
-
>>> lookup.adgb_dt
|
439
|
-
>>> lookup_dict = lookup.dict()
|
440
|
-
>>> lookup_dict['ADGB-DT']
|
441
|
-
>>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
|
442
|
-
>>> genes.ensg00000002745
|
443
|
-
>>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
|
444
|
-
"""
|
408
|
+
"""{}""" # noqa: D415
|
445
409
|
return _lookup(cls=cls, field=field, return_field=return_field)
|
446
410
|
|
447
411
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
@@ -541,6 +505,7 @@ class Registry(ModelBase):
|
|
541
505
|
query_set = query_set.order_by("-updated_at")
|
542
506
|
return query_set[:limit].df(include=include, features=features)
|
543
507
|
|
508
|
+
@doc_args(_search.__doc__)
|
544
509
|
def search(
|
545
510
|
cls,
|
546
511
|
string: str,
|
@@ -549,27 +514,7 @@ class Registry(ModelBase):
|
|
549
514
|
limit: int | None = 20,
|
550
515
|
case_sensitive: bool = False,
|
551
516
|
) -> QuerySet:
|
552
|
-
"""
|
553
|
-
|
554
|
-
Args:
|
555
|
-
string: The input string to match against the field ontology values.
|
556
|
-
field: The field or fields to search. Search all string fields by default.
|
557
|
-
limit: Maximum amount of top results to return.
|
558
|
-
case_sensitive: Whether the match is case sensitive.
|
559
|
-
|
560
|
-
Returns:
|
561
|
-
A sorted `DataFrame` of search results with a score in column `score`.
|
562
|
-
If `return_queryset` is `True`. `QuerySet`.
|
563
|
-
|
564
|
-
See Also:
|
565
|
-
:meth:`~lamindb.models.Record.filter`
|
566
|
-
:meth:`~lamindb.models.Record.lookup`
|
567
|
-
|
568
|
-
Examples:
|
569
|
-
>>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
|
570
|
-
>>> ln.save(ulabels)
|
571
|
-
>>> ln.ULabel.search("ULabel2")
|
572
|
-
"""
|
517
|
+
"""{}""" # noqa: D415
|
573
518
|
return _search(
|
574
519
|
cls=cls,
|
575
520
|
string=string,
|
@@ -597,6 +542,9 @@ class Registry(ModelBase):
|
|
597
542
|
"""
|
598
543
|
from .query_set import QuerySet
|
599
544
|
|
545
|
+
# connection already established
|
546
|
+
if instance in connections:
|
547
|
+
return QuerySet(model=cls, using=instance)
|
600
548
|
# we're in the default instance
|
601
549
|
if instance is None or instance == "default":
|
602
550
|
return QuerySet(model=cls, using=None)
|
@@ -631,7 +579,7 @@ class Registry(ModelBase):
|
|
631
579
|
iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
|
632
580
|
)
|
633
581
|
# access_db can take both: the dict from connect_instance_hub and isettings
|
634
|
-
|
582
|
+
into_db_token = iresult
|
635
583
|
else:
|
636
584
|
isettings = load_instance_settings(settings_file)
|
637
585
|
source_modules = isettings.modules
|
@@ -644,18 +592,17 @@ class Registry(ModelBase):
|
|
644
592
|
isettings._fine_grained_access and isettings._db_permissions == "jwt"
|
645
593
|
)
|
646
594
|
# access_db can take both: the dict from connect_instance_hub and isettings
|
647
|
-
|
595
|
+
into_db_token = isettings
|
648
596
|
|
649
597
|
target_modules = setup_settings.instance.modules
|
650
|
-
if
|
651
|
-
logger.
|
652
|
-
f"source
|
653
|
-
"consider mounting these registry modules to transfer all metadata"
|
598
|
+
if missing_members := source_modules - target_modules:
|
599
|
+
logger.info(
|
600
|
+
f"in transfer, source lamindb instance has additional modules: {', '.join(missing_members)}"
|
654
601
|
)
|
655
602
|
|
656
603
|
add_db_connection(db, instance)
|
657
604
|
if is_fine_grained_access:
|
658
|
-
db_token =
|
605
|
+
db_token = DBToken(into_db_token)
|
659
606
|
db_token_manager.set(db_token, instance)
|
660
607
|
return QuerySet(model=cls, using=instance)
|
661
608
|
|
@@ -697,15 +644,19 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
697
644
|
It's mainly used for LinkORMs and similar.
|
698
645
|
"""
|
699
646
|
|
647
|
+
objects = QueryManager()
|
648
|
+
|
700
649
|
class Meta:
|
701
650
|
abstract = True
|
651
|
+
base_manager_name = "objects"
|
702
652
|
|
703
653
|
def __init__(self, *args, **kwargs):
|
704
654
|
skip_validation = kwargs.pop("_skip_validation", False)
|
705
655
|
if not args:
|
706
656
|
if (
|
707
657
|
issubclass(self.__class__, Record)
|
708
|
-
and
|
658
|
+
and self.__class__.__name__
|
659
|
+
not in {"Storage", "ULabel", "Feature", "Schema", "Param"}
|
709
660
|
# do not save bionty entities in restricted spaces by default
|
710
661
|
and self.__class__.__module__ != "bionty.models"
|
711
662
|
):
|
@@ -719,7 +670,6 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
719
670
|
from ..core._settings import settings
|
720
671
|
from .can_curate import CanCurate
|
721
672
|
from .collection import Collection
|
722
|
-
from .schema import Schema
|
723
673
|
from .transform import Transform
|
724
674
|
|
725
675
|
validate_fields(self, kwargs)
|
@@ -763,11 +713,6 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
763
713
|
f"returning existing {self.__class__.__name__} record with same"
|
764
714
|
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
765
715
|
)
|
766
|
-
if isinstance(self, Schema):
|
767
|
-
if existing_record.hash != kwargs["hash"]:
|
768
|
-
logger.warning(
|
769
|
-
f"You're updating schema {existing_record.uid}, which might already have been used to validate datasets. Be careful."
|
770
|
-
)
|
771
716
|
init_self_from_db(self, existing_record)
|
772
717
|
update_attributes(self, kwargs)
|
773
718
|
return None
|
@@ -840,20 +785,33 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
840
785
|
# save unversioned record
|
841
786
|
else:
|
842
787
|
super().save(*args, **kwargs)
|
843
|
-
except IntegrityError as e:
|
788
|
+
except (IntegrityError, ProgrammingError) as e:
|
844
789
|
error_msg = str(e)
|
845
790
|
# two possible error messages for hash duplication
|
846
791
|
# "duplicate key value violates unique constraint"
|
847
792
|
# "UNIQUE constraint failed"
|
848
793
|
if (
|
849
|
-
|
850
|
-
|
851
|
-
|
794
|
+
isinstance(e, IntegrityError)
|
795
|
+
and "hash" in error_msg
|
796
|
+
and (
|
797
|
+
"UNIQUE constraint failed" in error_msg
|
798
|
+
or "duplicate key value violates unique constraint" in error_msg
|
799
|
+
)
|
800
|
+
):
|
852
801
|
pre_existing_record = self.__class__.get(hash=self.hash)
|
853
802
|
logger.warning(
|
854
803
|
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
855
804
|
)
|
856
805
|
init_self_from_db(self, pre_existing_record)
|
806
|
+
elif (
|
807
|
+
isinstance(e, ProgrammingError)
|
808
|
+
and hasattr(self, "space")
|
809
|
+
and "new row violates row-level security policy" in error_msg
|
810
|
+
):
|
811
|
+
raise NoWriteAccess(
|
812
|
+
f"You’re not allowed to write to the space '{self.space.name}'.\n"
|
813
|
+
"Please contact an administrator of the space if you need write access."
|
814
|
+
) from None
|
857
815
|
else:
|
858
816
|
raise
|
859
817
|
# call the below in case a user makes more updates to the record
|
@@ -880,7 +838,7 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
880
838
|
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
881
839
|
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
882
840
|
for k, v in transfer_logs.items():
|
883
|
-
if k != "run":
|
841
|
+
if k != "run" and len(v) > 0:
|
884
842
|
logger.important(f"{k} records: {', '.join(v)}")
|
885
843
|
|
886
844
|
if (
|
@@ -929,7 +887,7 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
929
887
|
|
930
888
|
|
931
889
|
class Space(BasicRecord):
|
932
|
-
"""Spaces.
|
890
|
+
"""Spaces to restrict access to records to specific users or teams.
|
933
891
|
|
934
892
|
You can use spaces to restrict access to records within an instance.
|
935
893
|
|
@@ -1110,146 +1068,6 @@ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
|
1110
1068
|
return []
|
1111
1069
|
|
1112
1070
|
|
1113
|
-
def _search(
|
1114
|
-
cls,
|
1115
|
-
string: str,
|
1116
|
-
*,
|
1117
|
-
field: StrField | list[StrField] | None = None,
|
1118
|
-
limit: int | None = 20,
|
1119
|
-
case_sensitive: bool = False,
|
1120
|
-
truncate_string: bool = False,
|
1121
|
-
) -> QuerySet:
|
1122
|
-
if string is None:
|
1123
|
-
raise ValueError("Cannot search for None value! Please pass a valid string.")
|
1124
|
-
|
1125
|
-
input_queryset = (
|
1126
|
-
cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1127
|
-
)
|
1128
|
-
registry = input_queryset.model
|
1129
|
-
name_field = getattr(registry, "_name_field", "name")
|
1130
|
-
if field is None:
|
1131
|
-
fields = [
|
1132
|
-
field.name
|
1133
|
-
for field in registry._meta.fields
|
1134
|
-
if field.get_internal_type() in {"CharField", "TextField"}
|
1135
|
-
]
|
1136
|
-
else:
|
1137
|
-
if not isinstance(field, list):
|
1138
|
-
fields_input = [field]
|
1139
|
-
else:
|
1140
|
-
fields_input = field
|
1141
|
-
fields = []
|
1142
|
-
for field in fields_input:
|
1143
|
-
if not isinstance(field, str):
|
1144
|
-
try:
|
1145
|
-
fields.append(field.field.name)
|
1146
|
-
except AttributeError as error:
|
1147
|
-
raise TypeError(
|
1148
|
-
"Please pass a Record string field, e.g., `CellType.name`!"
|
1149
|
-
) from error
|
1150
|
-
else:
|
1151
|
-
fields.append(field)
|
1152
|
-
|
1153
|
-
if truncate_string:
|
1154
|
-
if (len_string := len(string)) > 5:
|
1155
|
-
n_80_pct = int(len_string * 0.8)
|
1156
|
-
string = string[:n_80_pct]
|
1157
|
-
|
1158
|
-
string = string.strip()
|
1159
|
-
string_escape = re.escape(string)
|
1160
|
-
|
1161
|
-
exact_lookup = Exact if case_sensitive else IExact
|
1162
|
-
regex_lookup = Regex if case_sensitive else IRegex
|
1163
|
-
contains_lookup = Contains if case_sensitive else IContains
|
1164
|
-
|
1165
|
-
ranks = []
|
1166
|
-
contains_filters = []
|
1167
|
-
for field in fields:
|
1168
|
-
field_expr = Coalesce(
|
1169
|
-
Cast(field, output_field=TextField()),
|
1170
|
-
Value(""),
|
1171
|
-
output_field=TextField(),
|
1172
|
-
)
|
1173
|
-
# exact rank
|
1174
|
-
exact_expr = exact_lookup(field_expr, string)
|
1175
|
-
exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
|
1176
|
-
ranks.append(exact_rank)
|
1177
|
-
# exact synonym
|
1178
|
-
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
|
1179
|
-
synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
|
1180
|
-
ranks.append(synonym_rank)
|
1181
|
-
# match as sub-phrase
|
1182
|
-
sub_expr = regex_lookup(
|
1183
|
-
field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
|
1184
|
-
)
|
1185
|
-
sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
|
1186
|
-
ranks.append(sub_rank)
|
1187
|
-
# startswith and avoid matching string with " " on the right
|
1188
|
-
# mostly for truncated
|
1189
|
-
startswith_expr = regex_lookup(
|
1190
|
-
field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
|
1191
|
-
)
|
1192
|
-
startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
|
1193
|
-
ranks.append(startswith_rank)
|
1194
|
-
# match as sub-phrase from the left, mostly for truncated
|
1195
|
-
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
|
1196
|
-
right_rank = Cast(right_expr, output_field=IntegerField()) * 2
|
1197
|
-
ranks.append(right_rank)
|
1198
|
-
# match as sub-phrase from the right
|
1199
|
-
left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
|
1200
|
-
left_rank = Cast(left_expr, output_field=IntegerField()) * 2
|
1201
|
-
ranks.append(left_rank)
|
1202
|
-
# simple contains filter
|
1203
|
-
contains_expr = contains_lookup(field_expr, string)
|
1204
|
-
contains_filter = Q(contains_expr)
|
1205
|
-
contains_filters.append(contains_filter)
|
1206
|
-
# also rank by contains
|
1207
|
-
contains_rank = Cast(contains_expr, output_field=IntegerField())
|
1208
|
-
ranks.append(contains_rank)
|
1209
|
-
# additional rule for truncated strings
|
1210
|
-
# weight matches from the beginning of the string higher
|
1211
|
-
# sometimes whole words get truncated and startswith_expr is not enough
|
1212
|
-
if truncate_string and field == name_field:
|
1213
|
-
startswith_lookup = StartsWith if case_sensitive else IStartsWith
|
1214
|
-
name_startswith_expr = startswith_lookup(field_expr, string)
|
1215
|
-
name_startswith_rank = (
|
1216
|
-
Cast(name_startswith_expr, output_field=IntegerField()) * 2
|
1217
|
-
)
|
1218
|
-
ranks.append(name_startswith_rank)
|
1219
|
-
|
1220
|
-
ranked_queryset = (
|
1221
|
-
input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
|
1222
|
-
.alias(rank=sum(ranks))
|
1223
|
-
.order_by("-rank")
|
1224
|
-
)
|
1225
|
-
|
1226
|
-
return ranked_queryset[:limit]
|
1227
|
-
|
1228
|
-
|
1229
|
-
def _lookup(
|
1230
|
-
cls,
|
1231
|
-
field: StrField | None = None,
|
1232
|
-
return_field: StrField | None = None,
|
1233
|
-
using_key: str | None = None,
|
1234
|
-
) -> NamedTuple:
|
1235
|
-
"""{}""" # noqa: D415
|
1236
|
-
queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1237
|
-
field = get_name_field(registry=queryset.model, field=field)
|
1238
|
-
|
1239
|
-
return Lookup(
|
1240
|
-
records=queryset,
|
1241
|
-
values=[i.get(field) for i in queryset.values()],
|
1242
|
-
tuple_name=cls.__class__.__name__,
|
1243
|
-
prefix="ln",
|
1244
|
-
).lookup(
|
1245
|
-
return_field=(
|
1246
|
-
get_name_field(registry=queryset.model, field=return_field)
|
1247
|
-
if return_field is not None
|
1248
|
-
else None
|
1249
|
-
)
|
1250
|
-
)
|
1251
|
-
|
1252
|
-
|
1253
1071
|
def get_name_field(
|
1254
1072
|
registry: type[Record] | QuerySet | Manager,
|
1255
1073
|
*,
|
lamindb/models/run.py
CHANGED
@@ -347,7 +347,7 @@ class ParamValue(Record):
|
|
347
347
|
|
348
348
|
|
349
349
|
class Run(Record):
|
350
|
-
"""Runs.
|
350
|
+
"""Runs of transforms such as the execution of a script.
|
351
351
|
|
352
352
|
A registry to store runs of transforms, such as an executation of a script.
|
353
353
|
|
@@ -559,7 +559,7 @@ class Run(Record):
|
|
559
559
|
|
560
560
|
Query by fields::
|
561
561
|
|
562
|
-
ln.Run.filter(key="
|
562
|
+
ln.Run.filter(key="examples/my_file.parquet")
|
563
563
|
|
564
564
|
Query by params::
|
565
565
|
|
@@ -614,9 +614,9 @@ def delete_run_artifacts(run: Run) -> None:
|
|
614
614
|
|
615
615
|
class RunParamValue(BasicRecord, LinkORM):
|
616
616
|
id: int = models.BigAutoField(primary_key=True)
|
617
|
-
run: Run = ForeignKey(Run, CASCADE, related_name="
|
617
|
+
run: Run = ForeignKey(Run, CASCADE, related_name="links_paramvalue")
|
618
618
|
# we follow the lower() case convention rather than snake case for link models
|
619
|
-
paramvalue: ParamValue = ForeignKey(ParamValue, PROTECT, related_name="
|
619
|
+
paramvalue: ParamValue = ForeignKey(ParamValue, PROTECT, related_name="links_run")
|
620
620
|
created_at: datetime = DateTimeField(
|
621
621
|
editable=False, db_default=models.functions.Now(), db_index=True
|
622
622
|
)
|
lamindb/models/save.py
CHANGED
@@ -30,7 +30,7 @@ if TYPE_CHECKING:
|
|
30
30
|
|
31
31
|
|
32
32
|
def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
|
33
|
-
"""Bulk save
|
33
|
+
"""Bulk save records.
|
34
34
|
|
35
35
|
Note:
|
36
36
|
|
@@ -157,7 +157,13 @@ def check_and_attempt_upload(
|
|
157
157
|
return exception
|
158
158
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
159
159
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
160
|
-
|
160
|
+
# this happens only after the actual upload was performed
|
161
|
+
# we avoid failing here in case any problems happen in copy_or_move_to_cache
|
162
|
+
# because the cache copying or cleanup is not absolutely necessary
|
163
|
+
try:
|
164
|
+
copy_or_move_to_cache(artifact, storage_path, cache_path)
|
165
|
+
except Exception as e:
|
166
|
+
logger.warning(f"A problem with cache on saving: {e}")
|
161
167
|
# after successful upload, we should remove the attribute so that another call
|
162
168
|
# call to save won't upload again, the user should call replace() then
|
163
169
|
del artifact._local_filepath
|