lamindb 1.11a1__py3-none-any.whl → 1.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_finish.py +3 -3
- lamindb/core/_context.py +4 -2
- lamindb/curators/core.py +13 -5
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/_cellxgene.py +1 -1
- lamindb/examples/croissant/__init__.py +20 -4
- lamindb/examples/datasets/_core.py +8 -1
- lamindb/examples/datasets/mini_immuno.py +0 -1
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0121_recorduser.py +7 -0
- lamindb/models/__init__.py +1 -0
- lamindb/models/_feature_manager.py +78 -18
- lamindb/models/artifact.py +71 -65
- lamindb/models/artifact_set.py +12 -3
- lamindb/models/query_set.py +170 -74
- lamindb/models/record.py +5 -1
- lamindb/models/run.py +2 -27
- lamindb/models/save.py +18 -10
- lamindb/models/sqlrecord.py +47 -33
- lamindb-1.11.2.dist-info/METADATA +180 -0
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/RECORD +25 -25
- lamindb-1.11a1.dist-info/METADATA +0 -144
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/LICENSE +0 -0
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -8,10 +8,9 @@ from typing import TYPE_CHECKING, Any, Literal, Union, overload
|
|
8
8
|
|
9
9
|
import fsspec
|
10
10
|
import lamindb_setup as ln_setup
|
11
|
-
import numpy as np
|
12
11
|
import pandas as pd
|
13
12
|
from anndata import AnnData
|
14
|
-
from django.db import connections, models
|
13
|
+
from django.db import ProgrammingError, connections, models
|
15
14
|
from django.db.models import CASCADE, PROTECT, Q
|
16
15
|
from django.db.models.functions import Length
|
17
16
|
from lamin_utils import colors, logger
|
@@ -33,7 +32,7 @@ from lamindb.base.fields import (
|
|
33
32
|
CharField,
|
34
33
|
ForeignKey,
|
35
34
|
)
|
36
|
-
from lamindb.errors import FieldValidationError, UnknownStorageLocation
|
35
|
+
from lamindb.errors import FieldValidationError, NoWriteAccess, UnknownStorageLocation
|
37
36
|
from lamindb.models.query_set import QuerySet
|
38
37
|
|
39
38
|
from ..base.users import current_user_id
|
@@ -69,7 +68,6 @@ from ..models._is_versioned import (
|
|
69
68
|
from ._django import get_artifact_with_related, get_collection_with_related
|
70
69
|
from ._feature_manager import (
|
71
70
|
FeatureManager,
|
72
|
-
filter_base,
|
73
71
|
get_label_links,
|
74
72
|
)
|
75
73
|
from ._is_versioned import IsVersioned
|
@@ -296,6 +294,7 @@ def process_data(
|
|
296
294
|
|
297
295
|
def get_stat_or_artifact(
|
298
296
|
path: UPath,
|
297
|
+
storage: Record,
|
299
298
|
key: str | None = None,
|
300
299
|
check_hash: bool = True,
|
301
300
|
is_replace: bool = False,
|
@@ -333,14 +332,14 @@ def get_stat_or_artifact(
|
|
333
332
|
else:
|
334
333
|
result = (
|
335
334
|
Artifact.objects.using(instance)
|
336
|
-
.filter(Q(hash=hash) | Q(key=key, storage=
|
335
|
+
.filter(Q(hash=hash) | Q(key=key, storage=storage))
|
337
336
|
.order_by("-created_at")
|
338
337
|
.all()
|
339
338
|
)
|
340
339
|
artifact_with_same_hash_exists = result.filter(hash=hash).count() > 0
|
341
340
|
if not artifact_with_same_hash_exists and len(result) > 0:
|
342
341
|
logger.important(
|
343
|
-
f"creating new artifact version for key='{key}' (storage: '{
|
342
|
+
f"creating new artifact version for key='{key}' (storage: '{storage.root}')"
|
344
343
|
)
|
345
344
|
previous_artifact_version = result[0]
|
346
345
|
if artifact_with_same_hash_exists:
|
@@ -418,24 +417,6 @@ def get_artifact_kwargs_from_data(
|
|
418
417
|
skip_check_exists,
|
419
418
|
is_replace=is_replace,
|
420
419
|
)
|
421
|
-
stat_or_artifact = get_stat_or_artifact(
|
422
|
-
path=path,
|
423
|
-
key=key,
|
424
|
-
instance=using_key,
|
425
|
-
is_replace=is_replace,
|
426
|
-
)
|
427
|
-
if isinstance(stat_or_artifact, Artifact):
|
428
|
-
existing_artifact = stat_or_artifact
|
429
|
-
if run is not None:
|
430
|
-
existing_artifact._populate_subsequent_runs(run)
|
431
|
-
return existing_artifact, None
|
432
|
-
else:
|
433
|
-
size, hash, hash_type, n_files, revises = stat_or_artifact
|
434
|
-
|
435
|
-
if revises is not None: # update provisional_uid
|
436
|
-
provisional_uid, revises = create_uid(revises=revises, version=version)
|
437
|
-
if settings.cache_dir in path.parents:
|
438
|
-
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
439
420
|
|
440
421
|
check_path_in_storage = False
|
441
422
|
if use_existing_storage_key:
|
@@ -456,6 +437,26 @@ def get_artifact_kwargs_from_data(
|
|
456
437
|
else:
|
457
438
|
storage = storage
|
458
439
|
|
440
|
+
stat_or_artifact = get_stat_or_artifact(
|
441
|
+
path=path,
|
442
|
+
storage=storage,
|
443
|
+
key=key,
|
444
|
+
instance=using_key,
|
445
|
+
is_replace=is_replace,
|
446
|
+
)
|
447
|
+
if isinstance(stat_or_artifact, Artifact):
|
448
|
+
existing_artifact = stat_or_artifact
|
449
|
+
if run is not None:
|
450
|
+
existing_artifact._populate_subsequent_runs(run)
|
451
|
+
return existing_artifact, None
|
452
|
+
else:
|
453
|
+
size, hash, hash_type, n_files, revises = stat_or_artifact
|
454
|
+
|
455
|
+
if revises is not None: # update provisional_uid
|
456
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
457
|
+
if settings.cache_dir in path.parents:
|
458
|
+
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
459
|
+
|
459
460
|
log_storage_hint(
|
460
461
|
check_path_in_storage=check_path_in_storage,
|
461
462
|
storage=storage,
|
@@ -1033,7 +1034,7 @@ def delete_permanently(artifact: Artifact, storage: bool, using_key: str):
|
|
1033
1034
|
delete_in_storage = storage is None or storage
|
1034
1035
|
else:
|
1035
1036
|
# for artifacts with non-virtual semantic storage keys (key is not None)
|
1036
|
-
# ask for extra-confirmation
|
1037
|
+
# ask for extra-confirmation if storage is None
|
1037
1038
|
if storage is None:
|
1038
1039
|
response = input(
|
1039
1040
|
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
@@ -1887,42 +1888,8 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1887
1888
|
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1888
1889
|
|
1889
1890
|
"""
|
1890
|
-
from
|
1891
|
-
|
1892
|
-
if expressions:
|
1893
|
-
keys_normalized = [key.split("__")[0] for key in expressions]
|
1894
|
-
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1895
|
-
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1896
|
-
qs = QuerySet(model=cls).filter(*queries, **expressions)
|
1897
|
-
if not any(e.startswith("kind") for e in expressions):
|
1898
|
-
return qs.exclude(kind="__lamindb_run__")
|
1899
|
-
else:
|
1900
|
-
return qs
|
1901
|
-
elif all(
|
1902
|
-
features_validated := Feature.validate(
|
1903
|
-
keys_normalized, field="name", mute=True
|
1904
|
-
)
|
1905
|
-
):
|
1906
|
-
return filter_base(Artifact, **expressions)
|
1907
|
-
else:
|
1908
|
-
features = ", ".join(
|
1909
|
-
sorted(np.array(keys_normalized)[~features_validated])
|
1910
|
-
)
|
1911
|
-
message = f"feature names: {features}"
|
1912
|
-
avail_fields = cls.__get_available_fields__()
|
1913
|
-
if "_branch_code" in avail_fields:
|
1914
|
-
avail_fields.remove("_branch_code") # backward compat
|
1915
|
-
fields = ", ".join(sorted(avail_fields))
|
1916
|
-
raise InvalidArgument(
|
1917
|
-
f"You can query either by available fields: {fields}\n"
|
1918
|
-
f"Or fix invalid {message}"
|
1919
|
-
)
|
1920
|
-
else:
|
1921
|
-
return (
|
1922
|
-
QuerySet(model=cls)
|
1923
|
-
.filter(*queries, **expressions)
|
1924
|
-
.exclude(kind="__lamindb_run__")
|
1925
|
-
)
|
1891
|
+
# from Registry metaclass
|
1892
|
+
return type(cls).filter(cls, *queries, **expressions)
|
1926
1893
|
|
1927
1894
|
@classmethod
|
1928
1895
|
def from_dataframe(
|
@@ -3107,8 +3074,12 @@ def _track_run_input(
|
|
3107
3074
|
)
|
3108
3075
|
data.save()
|
3109
3076
|
is_valid = True
|
3077
|
+
data_run_id, run_id = data.run_id, run.id
|
3078
|
+
different_runs = (data_run_id != run_id) or (
|
3079
|
+
data_run_id is None and run_id is None
|
3080
|
+
)
|
3110
3081
|
return (
|
3111
|
-
|
3082
|
+
different_runs
|
3112
3083
|
and not data._state.adding # this seems duplicated with data._state.db is None
|
3113
3084
|
and is_valid
|
3114
3085
|
)
|
@@ -3153,8 +3124,9 @@ def _track_run_input(
|
|
3153
3124
|
if track_run_input:
|
3154
3125
|
if run is None:
|
3155
3126
|
raise ValueError("No run context set. Call `ln.track()`.")
|
3156
|
-
|
3157
|
-
|
3127
|
+
if run._state.adding:
|
3128
|
+
# avoid adding the same run twice
|
3129
|
+
run.save()
|
3158
3130
|
if data_class_name == "artifact":
|
3159
3131
|
IsLink = run.input_artifacts.through
|
3160
3132
|
links = [
|
@@ -3166,7 +3138,41 @@ def _track_run_input(
|
|
3166
3138
|
IsLink(run_id=run.id, collection_id=data_id)
|
3167
3139
|
for data_id in input_data_ids
|
3168
3140
|
]
|
3169
|
-
|
3141
|
+
try:
|
3142
|
+
IsLink.objects.bulk_create(links, ignore_conflicts=True)
|
3143
|
+
except ProgrammingError as e:
|
3144
|
+
if "new row violates row-level security policy" in str(e):
|
3145
|
+
instance = setup_settings.instance
|
3146
|
+
available_spaces = instance.available_spaces
|
3147
|
+
if available_spaces is None:
|
3148
|
+
raise NoWriteAccess(
|
3149
|
+
f"You’re not allowed to write to the instance {instance.slug}.\n"
|
3150
|
+
"Please contact administrators of the instance if you need write access."
|
3151
|
+
) from None
|
3152
|
+
write_access_spaces = (
|
3153
|
+
available_spaces["admin"] + available_spaces["write"]
|
3154
|
+
)
|
3155
|
+
no_write_access_spaces = {
|
3156
|
+
data_space
|
3157
|
+
for data in input_data
|
3158
|
+
if (data_space := data.space) not in write_access_spaces
|
3159
|
+
}
|
3160
|
+
if (run_space := run.space) not in write_access_spaces:
|
3161
|
+
no_write_access_spaces.add(run_space)
|
3162
|
+
if len(no_write_access_spaces) > 1:
|
3163
|
+
name_msg = ", ".join(
|
3164
|
+
f"'{space.name}'" for space in no_write_access_spaces
|
3165
|
+
)
|
3166
|
+
space_msg = "spaces"
|
3167
|
+
else:
|
3168
|
+
name_msg = f"'{no_write_access_spaces.pop().name}'"
|
3169
|
+
space_msg = "space"
|
3170
|
+
raise NoWriteAccess(
|
3171
|
+
f"You’re not allowed to write to the {space_msg} {name_msg}.\n"
|
3172
|
+
f"Please contact administrators of the {space_msg} if you need write access."
|
3173
|
+
) from None
|
3174
|
+
else:
|
3175
|
+
raise e
|
3170
3176
|
|
3171
3177
|
|
3172
3178
|
# privates currently dealt with separately
|
lamindb/models/artifact_set.py
CHANGED
@@ -28,6 +28,7 @@ UNORDERED_WARNING = (
|
|
28
28
|
)
|
29
29
|
|
30
30
|
|
31
|
+
# maybe make this abstract
|
31
32
|
class ArtifactSet(Iterable):
|
32
33
|
"""Abstract class representing sets of artifacts returned by queries.
|
33
34
|
|
@@ -127,6 +128,11 @@ class ArtifactSet(Iterable):
|
|
127
128
|
|
128
129
|
def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
129
130
|
"""Returns artifacts in the query set that are registered for the provided path."""
|
131
|
+
from lamindb.models import BasicQuerySet, QuerySet
|
132
|
+
|
133
|
+
# not QuerySet but only BasicQuerySet
|
134
|
+
assert isinstance(artifacts, BasicQuerySet) and not isinstance(artifacts, QuerySet) # noqa: S101
|
135
|
+
|
130
136
|
upath = UPath(path)
|
131
137
|
|
132
138
|
path_str = upath.as_posix()
|
@@ -135,12 +141,15 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
|
135
141
|
stem_len = len(stem)
|
136
142
|
|
137
143
|
if stem_len == 16:
|
138
|
-
qs = artifacts.filter(
|
144
|
+
qs = artifacts.filter(
|
139
145
|
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
140
146
|
uid__startswith=stem,
|
141
147
|
)
|
142
148
|
elif stem_len == 20:
|
143
|
-
qs = artifacts.filter(
|
149
|
+
qs = artifacts.filter(
|
150
|
+
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
151
|
+
uid=stem,
|
152
|
+
)
|
144
153
|
else:
|
145
154
|
qs = None
|
146
155
|
|
@@ -148,7 +157,7 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
|
148
157
|
return qs
|
149
158
|
|
150
159
|
qs = (
|
151
|
-
artifacts.filter(_key_is_virtual=False)
|
160
|
+
artifacts.filter(_key_is_virtual=False)
|
152
161
|
.alias(
|
153
162
|
db_path=Concat("storage__root", Value("/"), "key", output_field=TextField())
|
154
163
|
)
|
lamindb/models/query_set.py
CHANGED
@@ -5,7 +5,7 @@ from collections import UserList
|
|
5
5
|
from collections.abc import Iterable
|
6
6
|
from collections.abc import Iterable as IterableType
|
7
7
|
from datetime import datetime, timezone
|
8
|
-
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
8
|
+
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
from django.core.exceptions import FieldError
|
@@ -16,7 +16,7 @@ from lamin_utils import logger
|
|
16
16
|
from lamindb_setup.core import deprecated
|
17
17
|
from lamindb_setup.core._docs import doc_args
|
18
18
|
|
19
|
-
from ..errors import DoesNotExist
|
19
|
+
from ..errors import DoesNotExist, MultipleResultsFound
|
20
20
|
from ._is_versioned import IsVersioned
|
21
21
|
from .can_curate import CanCurate, _inspect, _standardize, _validate
|
22
22
|
from .query_manager import _lookup, _search
|
@@ -28,10 +28,6 @@ if TYPE_CHECKING:
|
|
28
28
|
T = TypeVar("T")
|
29
29
|
|
30
30
|
|
31
|
-
class MultipleResultsFound(Exception):
|
32
|
-
pass
|
33
|
-
|
34
|
-
|
35
31
|
pd.set_option("display.max_columns", 200)
|
36
32
|
|
37
33
|
|
@@ -63,15 +59,28 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
|
|
63
59
|
return keys
|
64
60
|
|
65
61
|
|
66
|
-
def one_helper(
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
62
|
+
def one_helper(
|
63
|
+
self: QuerySet | SQLRecordList,
|
64
|
+
does_not_exist_msg: str | None = None,
|
65
|
+
raise_doesnotexist: bool = True,
|
66
|
+
not_exists: bool | None = None,
|
67
|
+
raise_multipleresultsfound: bool = True,
|
68
|
+
):
|
69
|
+
if not_exists is None:
|
70
|
+
if isinstance(self, SQLRecordList):
|
71
|
+
not_exists = len(self) == 0
|
72
|
+
else:
|
73
|
+
not_exists = not self.exists() # type: ignore
|
71
74
|
if not_exists:
|
72
|
-
|
75
|
+
if raise_doesnotexist:
|
76
|
+
raise DoesNotExist(does_not_exist_msg)
|
77
|
+
else:
|
78
|
+
return None
|
73
79
|
elif len(self) > 1:
|
74
|
-
|
80
|
+
if raise_multipleresultsfound:
|
81
|
+
raise MultipleResultsFound(self)
|
82
|
+
else:
|
83
|
+
return self[0]
|
75
84
|
else:
|
76
85
|
return self[0]
|
77
86
|
|
@@ -88,7 +97,7 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
88
97
|
"visibility": "branch_id",
|
89
98
|
"_branch_code": "branch_id",
|
90
99
|
}
|
91
|
-
elif queryset.model
|
100
|
+
elif queryset.model is Artifact:
|
92
101
|
name_mappings = {
|
93
102
|
"visibility": "branch_id",
|
94
103
|
"_branch_code": "branch_id",
|
@@ -146,19 +155,29 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
146
155
|
expressions,
|
147
156
|
)
|
148
157
|
if issubclass(queryset.model, SQLRecord):
|
149
|
-
# branch_id is set to 1 unless expressions contains id or
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
158
|
+
# branch_id is set to 1 unless expressions contains id, uid or hash
|
159
|
+
id_uid_hash = {"id", "uid", "hash", "id__in", "uid__in", "hash__in"}
|
160
|
+
if not any(expression in id_uid_hash for expression in expressions):
|
161
|
+
expressions_have_branch = False
|
162
|
+
branch_branch_id = {"branch", "branch_id"}
|
163
|
+
branch_branch_id__ = ("branch__", "branch_id__")
|
164
|
+
for expression in expressions:
|
165
|
+
if expression in branch_branch_id or expression.startswith(
|
166
|
+
branch_branch_id__
|
167
|
+
):
|
168
|
+
expressions_have_branch = True
|
169
|
+
break
|
170
|
+
if not expressions_have_branch:
|
171
|
+
# TODO: should be set to the current default branch
|
172
|
+
expressions["branch_id"] = 1
|
173
|
+
else:
|
174
|
+
# if branch_id is None, do not apply a filter
|
175
|
+
# otherwise, it would mean filtering for NULL values, which doesn't make
|
176
|
+
# sense for a non-NULLABLE column
|
177
|
+
if "branch_id" in expressions and expressions["branch_id"] is None:
|
178
|
+
expressions.pop("branch_id")
|
179
|
+
if "branch" in expressions and expressions["branch"] is None:
|
180
|
+
expressions.pop("branch")
|
162
181
|
if queryset._db is not None:
|
163
182
|
# only check for database mismatch if there is a defined database on the
|
164
183
|
# queryset
|
@@ -173,52 +192,64 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
173
192
|
|
174
193
|
|
175
194
|
def get(
|
176
|
-
registry_or_queryset:
|
195
|
+
registry_or_queryset: Registry | BasicQuerySet,
|
177
196
|
idlike: int | str | None = None,
|
178
197
|
**expressions,
|
179
198
|
) -> SQLRecord:
|
180
199
|
if isinstance(registry_or_queryset, BasicQuerySet):
|
200
|
+
# not QuerySet but only BasicQuerySet
|
201
|
+
assert not isinstance(registry_or_queryset, QuerySet) # noqa: S101
|
202
|
+
|
181
203
|
qs = registry_or_queryset
|
182
204
|
registry = qs.model
|
183
205
|
else:
|
184
206
|
qs = BasicQuerySet(model=registry_or_queryset)
|
185
207
|
registry = registry_or_queryset
|
208
|
+
|
186
209
|
if isinstance(idlike, int):
|
187
|
-
return
|
210
|
+
return qs.get(id=idlike)
|
188
211
|
elif isinstance(idlike, str):
|
189
212
|
NAME_FIELD = (
|
190
213
|
registry._name_field if hasattr(registry, "_name_field") else "name"
|
191
214
|
)
|
192
215
|
DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
|
216
|
+
# this is the case in which the user passes an under-specified uid
|
193
217
|
if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
|
194
|
-
|
195
|
-
|
218
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=True)
|
219
|
+
not_exists = None
|
220
|
+
if not new_qs.exists():
|
221
|
+
# also try is_latest is False due to nothing found
|
222
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=False)
|
223
|
+
else:
|
224
|
+
not_exists = False
|
225
|
+
# it doesn't make sense to raise MultipleResultsFound when querying with an
|
226
|
+
# underspecified uid
|
227
|
+
return one_helper(
|
228
|
+
new_qs,
|
229
|
+
DOESNOTEXIST_MSG,
|
230
|
+
not_exists=not_exists,
|
231
|
+
raise_multipleresultsfound=False,
|
232
|
+
)
|
196
233
|
else:
|
197
|
-
qs =
|
234
|
+
qs = qs.filter(uid__startswith=idlike)
|
198
235
|
return one_helper(qs, DOESNOTEXIST_MSG)
|
199
236
|
else:
|
200
237
|
assert idlike is None # noqa: S101
|
201
238
|
expressions = process_expressions(qs, expressions)
|
202
|
-
# don't want branch_id here in .get(), only in .filter()
|
203
|
-
expressions.pop("branch_id", None)
|
204
239
|
# inject is_latest for consistency with idlike
|
205
240
|
is_latest_was_not_in_expressions = "is_latest" not in expressions
|
206
241
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
207
242
|
expressions["is_latest"] = True
|
208
243
|
try:
|
209
|
-
return
|
244
|
+
return qs.get(**expressions)
|
210
245
|
except registry.DoesNotExist as e:
|
211
246
|
# handle the case in which the is_latest injection led to a missed query
|
212
247
|
if "is_latest" in expressions and is_latest_was_not_in_expressions:
|
213
248
|
expressions.pop("is_latest")
|
214
|
-
result = (
|
215
|
-
BasicQuerySet.filter(qs, **expressions)
|
216
|
-
.order_by("-created_at")
|
217
|
-
.first()
|
218
|
-
)
|
249
|
+
result = qs.filter(**expressions).order_by("-created_at").first()
|
219
250
|
if result is not None:
|
220
251
|
return result
|
221
|
-
raise
|
252
|
+
raise e
|
222
253
|
|
223
254
|
|
224
255
|
class SQLRecordList(UserList, Generic[T]):
|
@@ -390,6 +421,9 @@ def get_feature_annotate_kwargs(
|
|
390
421
|
# Prepare Django's annotate for features
|
391
422
|
annotate_kwargs = {}
|
392
423
|
for link_attr, feature_type in link_attributes_on_models.items():
|
424
|
+
if link_attr == "links_project" and registry is Record:
|
425
|
+
# we're only interested in values_project when "annotating" records
|
426
|
+
continue
|
393
427
|
annotate_kwargs[f"{link_attr}__feature__name"] = F(
|
394
428
|
f"{link_attr}__feature__name"
|
395
429
|
)
|
@@ -645,6 +679,27 @@ def process_cols_from_include(
|
|
645
679
|
return result
|
646
680
|
|
647
681
|
|
682
|
+
def _queryset_class_factory(
|
683
|
+
registry: Registry, queryset_cls: type[models.QuerySet]
|
684
|
+
) -> type[models.QuerySet]:
|
685
|
+
from lamindb.models import Artifact, ArtifactSet
|
686
|
+
|
687
|
+
# If the model is Artifact, create a new class
|
688
|
+
# for BasicQuerySet or QuerySet that inherits from ArtifactSet.
|
689
|
+
# This allows to add artifact specific functionality to all classes
|
690
|
+
# inheriting from BasicQuerySet.
|
691
|
+
# Thus all query sets of artifacts (and only of artifacts)
|
692
|
+
# will have functions from ArtifactSet.
|
693
|
+
if registry is Artifact and not issubclass(queryset_cls, ArtifactSet):
|
694
|
+
new_cls = type(
|
695
|
+
"Artifact" + queryset_cls.__name__, (queryset_cls, ArtifactSet), {}
|
696
|
+
)
|
697
|
+
else:
|
698
|
+
new_cls = queryset_cls
|
699
|
+
|
700
|
+
return new_cls
|
701
|
+
|
702
|
+
|
648
703
|
class BasicQuerySet(models.QuerySet):
|
649
704
|
"""Sets of records returned by queries.
|
650
705
|
|
@@ -660,19 +715,23 @@ class BasicQuerySet(models.QuerySet):
|
|
660
715
|
"""
|
661
716
|
|
662
717
|
def __new__(cls, model=None, query=None, using=None, hints=None):
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
return
|
718
|
+
# see comments in _queryset_class_factory
|
719
|
+
return object.__new__(_queryset_class_factory(model, cls))
|
720
|
+
|
721
|
+
def _to_class(
|
722
|
+
self, cls: type[models.QuerySet], copy: bool = True
|
723
|
+
) -> models.QuerySet:
|
724
|
+
qs = self.all() if copy else self
|
725
|
+
qs.__class__ = cls
|
726
|
+
return qs
|
727
|
+
|
728
|
+
def _to_basic(self, copy: bool = True) -> BasicQuerySet:
|
729
|
+
cls = _queryset_class_factory(self.model, BasicQuerySet)
|
730
|
+
return self._to_class(cls, copy)
|
731
|
+
|
732
|
+
def _to_non_basic(self, copy: bool = True) -> QuerySet:
|
733
|
+
cls = _queryset_class_factory(self.model, QuerySet)
|
734
|
+
return self._to_class(cls, copy)
|
676
735
|
|
677
736
|
@doc_args(SQLRecord.to_dataframe.__doc__)
|
678
737
|
def to_dataframe(
|
@@ -750,17 +809,46 @@ class BasicQuerySet(models.QuerySet):
|
|
750
809
|
) -> pd.DataFrame:
|
751
810
|
return self.to_dataframe(include, features)
|
752
811
|
|
753
|
-
def delete(self, *args, **kwargs):
|
754
|
-
"""Delete all records in the query set.
|
812
|
+
def delete(self, *args, permanent: bool | None = None, **kwargs):
|
813
|
+
"""Delete all records in the query set.
|
814
|
+
|
815
|
+
Args:
|
816
|
+
permanent: Whether to permanently delete the record (skips trash).
|
817
|
+
Is only relevant for records that have the `branch` field.
|
818
|
+
|
819
|
+
Note:
|
820
|
+
Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
|
821
|
+
Use `permanent=True` for actual deletion.
|
822
|
+
|
823
|
+
Examples:
|
824
|
+
|
825
|
+
For any `QuerySet` object `qs`, call:
|
826
|
+
|
827
|
+
>>> qs.delete()
|
828
|
+
"""
|
755
829
|
from lamindb.models import Artifact, Collection, Run, Storage, Transform
|
756
830
|
|
757
|
-
#
|
758
|
-
if self.model in {Artifact, Collection, Transform, Run
|
831
|
+
# all these models have non-trivial delete behavior, hence we need to handle in a loop
|
832
|
+
if self.model in {Artifact, Collection, Transform, Run}:
|
833
|
+
for record in self:
|
834
|
+
record.delete(*args, permanent=permanent, **kwargs)
|
835
|
+
elif self.model is Storage: # storage does not have soft delete
|
836
|
+
if permanent is False:
|
837
|
+
logger.warning(
|
838
|
+
"the Storage registry doesn't support soft delete, hard deleting"
|
839
|
+
)
|
759
840
|
for record in self:
|
760
|
-
|
761
|
-
record.delete(*args, **kwargs)
|
841
|
+
record.delete()
|
762
842
|
else:
|
763
|
-
|
843
|
+
if not permanent and hasattr(self.model, "branch_id"):
|
844
|
+
logger.warning("moved records to trash (branch_id = -1)")
|
845
|
+
self.update(branch_id=-1)
|
846
|
+
else:
|
847
|
+
if permanent is False:
|
848
|
+
logger.warning(
|
849
|
+
f"model {self.model.__name__} doesn't support soft delete, hard deleting"
|
850
|
+
)
|
851
|
+
super().delete(*args, **kwargs)
|
764
852
|
|
765
853
|
def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
766
854
|
"""Populate an (unordered) list with the results.
|
@@ -802,12 +890,7 @@ class BasicQuerySet(models.QuerySet):
|
|
802
890
|
>>> ULabel.filter(name="benchmark").one_or_none()
|
803
891
|
>>> ULabel.filter(name="non existing label").one_or_none()
|
804
892
|
"""
|
805
|
-
|
806
|
-
return None
|
807
|
-
elif len(self) == 1:
|
808
|
-
return self[0]
|
809
|
-
else:
|
810
|
-
raise MultipleResultsFound(self.all())
|
893
|
+
return one_helper(self, raise_doesnotexist=False)
|
811
894
|
|
812
895
|
def latest_version(self) -> QuerySet:
|
813
896
|
"""Filter every version family by latest version."""
|
@@ -884,18 +967,18 @@ class QuerySet(BasicQuerySet):
|
|
884
967
|
"""Query a single record. Raises error if there are more or none."""
|
885
968
|
is_run_input = expressions.pop("is_run_input", False)
|
886
969
|
|
970
|
+
# artifacts_from_path and get accept only BasicQuerySet
|
971
|
+
qs = self._to_class(BasicQuerySet, copy=True)
|
972
|
+
|
887
973
|
if path := expressions.pop("path", None):
|
888
974
|
from .artifact_set import ArtifactSet, artifacts_from_path
|
889
975
|
|
890
976
|
if not isinstance(self, ArtifactSet):
|
891
977
|
raise ValueError("Querying by path is only possible for artifacts.")
|
892
|
-
|
893
|
-
qs = artifacts_from_path(self, path)
|
894
|
-
else:
|
895
|
-
qs = self
|
978
|
+
qs = artifacts_from_path(qs, path)
|
896
979
|
|
897
980
|
try:
|
898
|
-
record = get(qs, idlike, **expressions)
|
981
|
+
record = get(qs, idlike, **expressions)
|
899
982
|
except ValueError as e:
|
900
983
|
# Pass through original error for explicit id lookups
|
901
984
|
if "Field 'id' expected a number" in str(e):
|
@@ -921,15 +1004,28 @@ class QuerySet(BasicQuerySet):
|
|
921
1004
|
|
922
1005
|
def filter(self, *queries, **expressions) -> QuerySet:
|
923
1006
|
"""Query a set of records."""
|
1007
|
+
from lamindb.models import Artifact, Record, Run
|
1008
|
+
|
1009
|
+
registry = self.model
|
1010
|
+
|
1011
|
+
if not expressions.pop("_skip_filter_with_features", False) and registry in {
|
1012
|
+
Artifact,
|
1013
|
+
Run,
|
1014
|
+
Record,
|
1015
|
+
}:
|
1016
|
+
from ._feature_manager import filter_with_features
|
1017
|
+
|
1018
|
+
return filter_with_features(self, *queries, **expressions)
|
1019
|
+
|
924
1020
|
# Suggest to use __name for related fields such as id when not passed
|
925
1021
|
for field, value in expressions.items():
|
926
1022
|
if (
|
927
1023
|
isinstance(value, str)
|
928
1024
|
and value.strip("-").isalpha()
|
929
1025
|
and "__" not in field
|
930
|
-
and hasattr(
|
1026
|
+
and hasattr(registry, field)
|
931
1027
|
):
|
932
|
-
field_attr = getattr(
|
1028
|
+
field_attr = getattr(registry, field)
|
933
1029
|
if hasattr(field_attr, "field") and field_attr.field.related_model:
|
934
1030
|
raise FieldError(
|
935
1031
|
f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
|
lamindb/models/record.py
CHANGED
@@ -108,6 +108,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
108
108
|
Run, through="RecordRun", related_name="records"
|
109
109
|
)
|
110
110
|
"""Linked runs."""
|
111
|
+
linked_users: User = models.ManyToManyField(
|
112
|
+
User, through="RecordUser", related_name="records"
|
113
|
+
)
|
114
|
+
"""Linked runs."""
|
111
115
|
run: Run | None = ForeignKey(
|
112
116
|
Run,
|
113
117
|
PROTECT,
|
@@ -122,7 +126,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
122
126
|
ulabels: ULabel = models.ManyToManyField(
|
123
127
|
ULabel,
|
124
128
|
through="RecordULabel",
|
125
|
-
related_name="_records", # in transition period
|
129
|
+
related_name="_records", # in transition period with underscore prefix
|
126
130
|
)
|
127
131
|
"""Linked runs."""
|
128
132
|
linked_projects: Project
|