lamindb 1.11a1__py3-none-any.whl → 1.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_finish.py +3 -3
- lamindb/core/_context.py +4 -2
- lamindb/curators/core.py +13 -5
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/_cellxgene.py +1 -1
- lamindb/examples/croissant/__init__.py +20 -4
- lamindb/examples/datasets/_core.py +8 -1
- lamindb/examples/datasets/mini_immuno.py +0 -1
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0121_recorduser.py +7 -0
- lamindb/models/__init__.py +1 -0
- lamindb/models/_feature_manager.py +78 -18
- lamindb/models/artifact.py +71 -65
- lamindb/models/artifact_set.py +12 -3
- lamindb/models/query_set.py +146 -58
- lamindb/models/record.py +5 -1
- lamindb/models/run.py +2 -27
- lamindb/models/save.py +6 -8
- lamindb/models/sqlrecord.py +47 -33
- {lamindb-1.11a1.dist-info → lamindb-1.11.1.dist-info}/METADATA +12 -17
- {lamindb-1.11a1.dist-info → lamindb-1.11.1.dist-info}/RECORD +25 -25
- {lamindb-1.11a1.dist-info → lamindb-1.11.1.dist-info}/LICENSE +0 -0
- {lamindb-1.11a1.dist-info → lamindb-1.11.1.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -8,10 +8,9 @@ from typing import TYPE_CHECKING, Any, Literal, Union, overload
|
|
8
8
|
|
9
9
|
import fsspec
|
10
10
|
import lamindb_setup as ln_setup
|
11
|
-
import numpy as np
|
12
11
|
import pandas as pd
|
13
12
|
from anndata import AnnData
|
14
|
-
from django.db import connections, models
|
13
|
+
from django.db import ProgrammingError, connections, models
|
15
14
|
from django.db.models import CASCADE, PROTECT, Q
|
16
15
|
from django.db.models.functions import Length
|
17
16
|
from lamin_utils import colors, logger
|
@@ -33,7 +32,7 @@ from lamindb.base.fields import (
|
|
33
32
|
CharField,
|
34
33
|
ForeignKey,
|
35
34
|
)
|
36
|
-
from lamindb.errors import FieldValidationError, UnknownStorageLocation
|
35
|
+
from lamindb.errors import FieldValidationError, NoWriteAccess, UnknownStorageLocation
|
37
36
|
from lamindb.models.query_set import QuerySet
|
38
37
|
|
39
38
|
from ..base.users import current_user_id
|
@@ -69,7 +68,6 @@ from ..models._is_versioned import (
|
|
69
68
|
from ._django import get_artifact_with_related, get_collection_with_related
|
70
69
|
from ._feature_manager import (
|
71
70
|
FeatureManager,
|
72
|
-
filter_base,
|
73
71
|
get_label_links,
|
74
72
|
)
|
75
73
|
from ._is_versioned import IsVersioned
|
@@ -296,6 +294,7 @@ def process_data(
|
|
296
294
|
|
297
295
|
def get_stat_or_artifact(
|
298
296
|
path: UPath,
|
297
|
+
storage: Record,
|
299
298
|
key: str | None = None,
|
300
299
|
check_hash: bool = True,
|
301
300
|
is_replace: bool = False,
|
@@ -333,14 +332,14 @@ def get_stat_or_artifact(
|
|
333
332
|
else:
|
334
333
|
result = (
|
335
334
|
Artifact.objects.using(instance)
|
336
|
-
.filter(Q(hash=hash) | Q(key=key, storage=
|
335
|
+
.filter(Q(hash=hash) | Q(key=key, storage=storage))
|
337
336
|
.order_by("-created_at")
|
338
337
|
.all()
|
339
338
|
)
|
340
339
|
artifact_with_same_hash_exists = result.filter(hash=hash).count() > 0
|
341
340
|
if not artifact_with_same_hash_exists and len(result) > 0:
|
342
341
|
logger.important(
|
343
|
-
f"creating new artifact version for key='{key}' (storage: '{
|
342
|
+
f"creating new artifact version for key='{key}' (storage: '{storage.root}')"
|
344
343
|
)
|
345
344
|
previous_artifact_version = result[0]
|
346
345
|
if artifact_with_same_hash_exists:
|
@@ -418,24 +417,6 @@ def get_artifact_kwargs_from_data(
|
|
418
417
|
skip_check_exists,
|
419
418
|
is_replace=is_replace,
|
420
419
|
)
|
421
|
-
stat_or_artifact = get_stat_or_artifact(
|
422
|
-
path=path,
|
423
|
-
key=key,
|
424
|
-
instance=using_key,
|
425
|
-
is_replace=is_replace,
|
426
|
-
)
|
427
|
-
if isinstance(stat_or_artifact, Artifact):
|
428
|
-
existing_artifact = stat_or_artifact
|
429
|
-
if run is not None:
|
430
|
-
existing_artifact._populate_subsequent_runs(run)
|
431
|
-
return existing_artifact, None
|
432
|
-
else:
|
433
|
-
size, hash, hash_type, n_files, revises = stat_or_artifact
|
434
|
-
|
435
|
-
if revises is not None: # update provisional_uid
|
436
|
-
provisional_uid, revises = create_uid(revises=revises, version=version)
|
437
|
-
if settings.cache_dir in path.parents:
|
438
|
-
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
439
420
|
|
440
421
|
check_path_in_storage = False
|
441
422
|
if use_existing_storage_key:
|
@@ -456,6 +437,26 @@ def get_artifact_kwargs_from_data(
|
|
456
437
|
else:
|
457
438
|
storage = storage
|
458
439
|
|
440
|
+
stat_or_artifact = get_stat_or_artifact(
|
441
|
+
path=path,
|
442
|
+
storage=storage,
|
443
|
+
key=key,
|
444
|
+
instance=using_key,
|
445
|
+
is_replace=is_replace,
|
446
|
+
)
|
447
|
+
if isinstance(stat_or_artifact, Artifact):
|
448
|
+
existing_artifact = stat_or_artifact
|
449
|
+
if run is not None:
|
450
|
+
existing_artifact._populate_subsequent_runs(run)
|
451
|
+
return existing_artifact, None
|
452
|
+
else:
|
453
|
+
size, hash, hash_type, n_files, revises = stat_or_artifact
|
454
|
+
|
455
|
+
if revises is not None: # update provisional_uid
|
456
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
457
|
+
if settings.cache_dir in path.parents:
|
458
|
+
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
459
|
+
|
459
460
|
log_storage_hint(
|
460
461
|
check_path_in_storage=check_path_in_storage,
|
461
462
|
storage=storage,
|
@@ -1033,7 +1034,7 @@ def delete_permanently(artifact: Artifact, storage: bool, using_key: str):
|
|
1033
1034
|
delete_in_storage = storage is None or storage
|
1034
1035
|
else:
|
1035
1036
|
# for artifacts with non-virtual semantic storage keys (key is not None)
|
1036
|
-
# ask for extra-confirmation
|
1037
|
+
# ask for extra-confirmation if storage is None
|
1037
1038
|
if storage is None:
|
1038
1039
|
response = input(
|
1039
1040
|
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
@@ -1887,42 +1888,8 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1887
1888
|
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1888
1889
|
|
1889
1890
|
"""
|
1890
|
-
from
|
1891
|
-
|
1892
|
-
if expressions:
|
1893
|
-
keys_normalized = [key.split("__")[0] for key in expressions]
|
1894
|
-
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1895
|
-
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1896
|
-
qs = QuerySet(model=cls).filter(*queries, **expressions)
|
1897
|
-
if not any(e.startswith("kind") for e in expressions):
|
1898
|
-
return qs.exclude(kind="__lamindb_run__")
|
1899
|
-
else:
|
1900
|
-
return qs
|
1901
|
-
elif all(
|
1902
|
-
features_validated := Feature.validate(
|
1903
|
-
keys_normalized, field="name", mute=True
|
1904
|
-
)
|
1905
|
-
):
|
1906
|
-
return filter_base(Artifact, **expressions)
|
1907
|
-
else:
|
1908
|
-
features = ", ".join(
|
1909
|
-
sorted(np.array(keys_normalized)[~features_validated])
|
1910
|
-
)
|
1911
|
-
message = f"feature names: {features}"
|
1912
|
-
avail_fields = cls.__get_available_fields__()
|
1913
|
-
if "_branch_code" in avail_fields:
|
1914
|
-
avail_fields.remove("_branch_code") # backward compat
|
1915
|
-
fields = ", ".join(sorted(avail_fields))
|
1916
|
-
raise InvalidArgument(
|
1917
|
-
f"You can query either by available fields: {fields}\n"
|
1918
|
-
f"Or fix invalid {message}"
|
1919
|
-
)
|
1920
|
-
else:
|
1921
|
-
return (
|
1922
|
-
QuerySet(model=cls)
|
1923
|
-
.filter(*queries, **expressions)
|
1924
|
-
.exclude(kind="__lamindb_run__")
|
1925
|
-
)
|
1891
|
+
# from Registry metaclass
|
1892
|
+
return type(cls).filter(cls, *queries, **expressions)
|
1926
1893
|
|
1927
1894
|
@classmethod
|
1928
1895
|
def from_dataframe(
|
@@ -3107,8 +3074,12 @@ def _track_run_input(
|
|
3107
3074
|
)
|
3108
3075
|
data.save()
|
3109
3076
|
is_valid = True
|
3077
|
+
data_run_id, run_id = data.run_id, run.id
|
3078
|
+
different_runs = (data_run_id != run_id) or (
|
3079
|
+
data_run_id is None and run_id is None
|
3080
|
+
)
|
3110
3081
|
return (
|
3111
|
-
|
3082
|
+
different_runs
|
3112
3083
|
and not data._state.adding # this seems duplicated with data._state.db is None
|
3113
3084
|
and is_valid
|
3114
3085
|
)
|
@@ -3153,8 +3124,9 @@ def _track_run_input(
|
|
3153
3124
|
if track_run_input:
|
3154
3125
|
if run is None:
|
3155
3126
|
raise ValueError("No run context set. Call `ln.track()`.")
|
3156
|
-
|
3157
|
-
|
3127
|
+
if run._state.adding:
|
3128
|
+
# avoid adding the same run twice
|
3129
|
+
run.save()
|
3158
3130
|
if data_class_name == "artifact":
|
3159
3131
|
IsLink = run.input_artifacts.through
|
3160
3132
|
links = [
|
@@ -3166,7 +3138,41 @@ def _track_run_input(
|
|
3166
3138
|
IsLink(run_id=run.id, collection_id=data_id)
|
3167
3139
|
for data_id in input_data_ids
|
3168
3140
|
]
|
3169
|
-
|
3141
|
+
try:
|
3142
|
+
IsLink.objects.bulk_create(links, ignore_conflicts=True)
|
3143
|
+
except ProgrammingError as e:
|
3144
|
+
if "new row violates row-level security policy" in str(e):
|
3145
|
+
instance = setup_settings.instance
|
3146
|
+
available_spaces = instance.available_spaces
|
3147
|
+
if available_spaces is None:
|
3148
|
+
raise NoWriteAccess(
|
3149
|
+
f"You’re not allowed to write to the instance {instance.slug}.\n"
|
3150
|
+
"Please contact administrators of the instance if you need write access."
|
3151
|
+
) from None
|
3152
|
+
write_access_spaces = (
|
3153
|
+
available_spaces["admin"] + available_spaces["write"]
|
3154
|
+
)
|
3155
|
+
no_write_access_spaces = {
|
3156
|
+
data_space
|
3157
|
+
for data in input_data
|
3158
|
+
if (data_space := data.space) not in write_access_spaces
|
3159
|
+
}
|
3160
|
+
if (run_space := run.space) not in write_access_spaces:
|
3161
|
+
no_write_access_spaces.add(run_space)
|
3162
|
+
if len(no_write_access_spaces) > 1:
|
3163
|
+
name_msg = ", ".join(
|
3164
|
+
f"'{space.name}'" for space in no_write_access_spaces
|
3165
|
+
)
|
3166
|
+
space_msg = "spaces"
|
3167
|
+
else:
|
3168
|
+
name_msg = f"'{no_write_access_spaces.pop().name}'"
|
3169
|
+
space_msg = "space"
|
3170
|
+
raise NoWriteAccess(
|
3171
|
+
f"You’re not allowed to write to the {space_msg} {name_msg}.\n"
|
3172
|
+
f"Please contact administrators of the {space_msg} if you need write access."
|
3173
|
+
) from None
|
3174
|
+
else:
|
3175
|
+
raise e
|
3170
3176
|
|
3171
3177
|
|
3172
3178
|
# privates currently dealt with separately
|
lamindb/models/artifact_set.py
CHANGED
@@ -28,6 +28,7 @@ UNORDERED_WARNING = (
|
|
28
28
|
)
|
29
29
|
|
30
30
|
|
31
|
+
# maybe make this abstract
|
31
32
|
class ArtifactSet(Iterable):
|
32
33
|
"""Abstract class representing sets of artifacts returned by queries.
|
33
34
|
|
@@ -127,6 +128,11 @@ class ArtifactSet(Iterable):
|
|
127
128
|
|
128
129
|
def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
129
130
|
"""Returns artifacts in the query set that are registered for the provided path."""
|
131
|
+
from lamindb.models import BasicQuerySet, QuerySet
|
132
|
+
|
133
|
+
# not QuerySet but only BasicQuerySet
|
134
|
+
assert isinstance(artifacts, BasicQuerySet) and not isinstance(artifacts, QuerySet) # noqa: S101
|
135
|
+
|
130
136
|
upath = UPath(path)
|
131
137
|
|
132
138
|
path_str = upath.as_posix()
|
@@ -135,12 +141,15 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
|
135
141
|
stem_len = len(stem)
|
136
142
|
|
137
143
|
if stem_len == 16:
|
138
|
-
qs = artifacts.filter(
|
144
|
+
qs = artifacts.filter(
|
139
145
|
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
140
146
|
uid__startswith=stem,
|
141
147
|
)
|
142
148
|
elif stem_len == 20:
|
143
|
-
qs = artifacts.filter(
|
149
|
+
qs = artifacts.filter(
|
150
|
+
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
151
|
+
uid=stem,
|
152
|
+
)
|
144
153
|
else:
|
145
154
|
qs = None
|
146
155
|
|
@@ -148,7 +157,7 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
|
148
157
|
return qs
|
149
158
|
|
150
159
|
qs = (
|
151
|
-
artifacts.filter(_key_is_virtual=False)
|
160
|
+
artifacts.filter(_key_is_virtual=False)
|
152
161
|
.alias(
|
153
162
|
db_path=Concat("storage__root", Value("/"), "key", output_field=TextField())
|
154
163
|
)
|
lamindb/models/query_set.py
CHANGED
@@ -5,7 +5,7 @@ from collections import UserList
|
|
5
5
|
from collections.abc import Iterable
|
6
6
|
from collections.abc import Iterable as IterableType
|
7
7
|
from datetime import datetime, timezone
|
8
|
-
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
8
|
+
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
from django.core.exceptions import FieldError
|
@@ -16,7 +16,7 @@ from lamin_utils import logger
|
|
16
16
|
from lamindb_setup.core import deprecated
|
17
17
|
from lamindb_setup.core._docs import doc_args
|
18
18
|
|
19
|
-
from ..errors import DoesNotExist
|
19
|
+
from ..errors import DoesNotExist, MultipleResultsFound
|
20
20
|
from ._is_versioned import IsVersioned
|
21
21
|
from .can_curate import CanCurate, _inspect, _standardize, _validate
|
22
22
|
from .query_manager import _lookup, _search
|
@@ -28,10 +28,6 @@ if TYPE_CHECKING:
|
|
28
28
|
T = TypeVar("T")
|
29
29
|
|
30
30
|
|
31
|
-
class MultipleResultsFound(Exception):
|
32
|
-
pass
|
33
|
-
|
34
|
-
|
35
31
|
pd.set_option("display.max_columns", 200)
|
36
32
|
|
37
33
|
|
@@ -63,15 +59,28 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
|
|
63
59
|
return keys
|
64
60
|
|
65
61
|
|
66
|
-
def one_helper(
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
62
|
+
def one_helper(
|
63
|
+
self: QuerySet | SQLRecordList,
|
64
|
+
does_not_exist_msg: str | None = None,
|
65
|
+
raise_doesnotexist: bool = True,
|
66
|
+
not_exists: bool | None = None,
|
67
|
+
raise_multipleresultsfound: bool = True,
|
68
|
+
):
|
69
|
+
if not_exists is None:
|
70
|
+
if isinstance(self, SQLRecordList):
|
71
|
+
not_exists = len(self) == 0
|
72
|
+
else:
|
73
|
+
not_exists = not self.exists() # type: ignore
|
71
74
|
if not_exists:
|
72
|
-
|
75
|
+
if raise_doesnotexist:
|
76
|
+
raise DoesNotExist(does_not_exist_msg)
|
77
|
+
else:
|
78
|
+
return None
|
73
79
|
elif len(self) > 1:
|
74
|
-
|
80
|
+
if raise_multipleresultsfound:
|
81
|
+
raise MultipleResultsFound(self)
|
82
|
+
else:
|
83
|
+
return self[0]
|
75
84
|
else:
|
76
85
|
return self[0]
|
77
86
|
|
@@ -88,7 +97,7 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
88
97
|
"visibility": "branch_id",
|
89
98
|
"_branch_code": "branch_id",
|
90
99
|
}
|
91
|
-
elif queryset.model
|
100
|
+
elif queryset.model is Artifact:
|
92
101
|
name_mappings = {
|
93
102
|
"visibility": "branch_id",
|
94
103
|
"_branch_code": "branch_id",
|
@@ -173,28 +182,46 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
173
182
|
|
174
183
|
|
175
184
|
def get(
|
176
|
-
registry_or_queryset:
|
185
|
+
registry_or_queryset: Registry | BasicQuerySet,
|
177
186
|
idlike: int | str | None = None,
|
178
187
|
**expressions,
|
179
188
|
) -> SQLRecord:
|
180
189
|
if isinstance(registry_or_queryset, BasicQuerySet):
|
190
|
+
# not QuerySet but only BasicQuerySet
|
191
|
+
assert not isinstance(registry_or_queryset, QuerySet) # noqa: S101
|
192
|
+
|
181
193
|
qs = registry_or_queryset
|
182
194
|
registry = qs.model
|
183
195
|
else:
|
184
196
|
qs = BasicQuerySet(model=registry_or_queryset)
|
185
197
|
registry = registry_or_queryset
|
198
|
+
|
186
199
|
if isinstance(idlike, int):
|
187
|
-
return
|
200
|
+
return qs.get(id=idlike)
|
188
201
|
elif isinstance(idlike, str):
|
189
202
|
NAME_FIELD = (
|
190
203
|
registry._name_field if hasattr(registry, "_name_field") else "name"
|
191
204
|
)
|
192
205
|
DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
|
206
|
+
# this is the case in which the user passes an under-specified uid
|
193
207
|
if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
|
194
|
-
|
195
|
-
|
208
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=True)
|
209
|
+
not_exists = None
|
210
|
+
if not new_qs.exists():
|
211
|
+
# also try is_latest is False due to nothing found
|
212
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=False)
|
213
|
+
else:
|
214
|
+
not_exists = False
|
215
|
+
# it doesn't make sense to raise MultipleResultsFound when querying with an
|
216
|
+
# underspecified uid
|
217
|
+
return one_helper(
|
218
|
+
new_qs,
|
219
|
+
DOESNOTEXIST_MSG,
|
220
|
+
not_exists=not_exists,
|
221
|
+
raise_multipleresultsfound=False,
|
222
|
+
)
|
196
223
|
else:
|
197
|
-
qs =
|
224
|
+
qs = qs.filter(uid__startswith=idlike)
|
198
225
|
return one_helper(qs, DOESNOTEXIST_MSG)
|
199
226
|
else:
|
200
227
|
assert idlike is None # noqa: S101
|
@@ -206,16 +233,12 @@ def get(
|
|
206
233
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
207
234
|
expressions["is_latest"] = True
|
208
235
|
try:
|
209
|
-
return
|
236
|
+
return qs.get(**expressions)
|
210
237
|
except registry.DoesNotExist as e:
|
211
238
|
# handle the case in which the is_latest injection led to a missed query
|
212
239
|
if "is_latest" in expressions and is_latest_was_not_in_expressions:
|
213
240
|
expressions.pop("is_latest")
|
214
|
-
result = (
|
215
|
-
BasicQuerySet.filter(qs, **expressions)
|
216
|
-
.order_by("-created_at")
|
217
|
-
.first()
|
218
|
-
)
|
241
|
+
result = qs.filter(**expressions).order_by("-created_at").first()
|
219
242
|
if result is not None:
|
220
243
|
return result
|
221
244
|
raise registry.DoesNotExist from e
|
@@ -390,6 +413,9 @@ def get_feature_annotate_kwargs(
|
|
390
413
|
# Prepare Django's annotate for features
|
391
414
|
annotate_kwargs = {}
|
392
415
|
for link_attr, feature_type in link_attributes_on_models.items():
|
416
|
+
if link_attr == "links_project" and registry is Record:
|
417
|
+
# we're only interested in values_project when "annotating" records
|
418
|
+
continue
|
393
419
|
annotate_kwargs[f"{link_attr}__feature__name"] = F(
|
394
420
|
f"{link_attr}__feature__name"
|
395
421
|
)
|
@@ -645,6 +671,27 @@ def process_cols_from_include(
|
|
645
671
|
return result
|
646
672
|
|
647
673
|
|
674
|
+
def _queryset_class_factory(
|
675
|
+
registry: Registry, queryset_cls: type[models.QuerySet]
|
676
|
+
) -> type[models.QuerySet]:
|
677
|
+
from lamindb.models import Artifact, ArtifactSet
|
678
|
+
|
679
|
+
# If the model is Artifact, create a new class
|
680
|
+
# for BasicQuerySet or QuerySet that inherits from ArtifactSet.
|
681
|
+
# This allows to add artifact specific functionality to all classes
|
682
|
+
# inheriting from BasicQuerySet.
|
683
|
+
# Thus all query sets of artifacts (and only of artifacts)
|
684
|
+
# will have functions from ArtifactSet.
|
685
|
+
if registry is Artifact and not issubclass(queryset_cls, ArtifactSet):
|
686
|
+
new_cls = type(
|
687
|
+
"Artifact" + queryset_cls.__name__, (queryset_cls, ArtifactSet), {}
|
688
|
+
)
|
689
|
+
else:
|
690
|
+
new_cls = queryset_cls
|
691
|
+
|
692
|
+
return new_cls
|
693
|
+
|
694
|
+
|
648
695
|
class BasicQuerySet(models.QuerySet):
|
649
696
|
"""Sets of records returned by queries.
|
650
697
|
|
@@ -660,19 +707,23 @@ class BasicQuerySet(models.QuerySet):
|
|
660
707
|
"""
|
661
708
|
|
662
709
|
def __new__(cls, model=None, query=None, using=None, hints=None):
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
return
|
710
|
+
# see comments in _queryset_class_factory
|
711
|
+
return object.__new__(_queryset_class_factory(model, cls))
|
712
|
+
|
713
|
+
def _to_class(
|
714
|
+
self, cls: type[models.QuerySet], copy: bool = True
|
715
|
+
) -> models.QuerySet:
|
716
|
+
qs = self.all() if copy else self
|
717
|
+
qs.__class__ = cls
|
718
|
+
return qs
|
719
|
+
|
720
|
+
def _to_basic(self, copy: bool = True) -> BasicQuerySet:
|
721
|
+
cls = _queryset_class_factory(self.model, BasicQuerySet)
|
722
|
+
return self._to_class(cls, copy)
|
723
|
+
|
724
|
+
def _to_non_basic(self, copy: bool = True) -> QuerySet:
|
725
|
+
cls = _queryset_class_factory(self.model, QuerySet)
|
726
|
+
return self._to_class(cls, copy)
|
676
727
|
|
677
728
|
@doc_args(SQLRecord.to_dataframe.__doc__)
|
678
729
|
def to_dataframe(
|
@@ -750,17 +801,46 @@ class BasicQuerySet(models.QuerySet):
|
|
750
801
|
) -> pd.DataFrame:
|
751
802
|
return self.to_dataframe(include, features)
|
752
803
|
|
753
|
-
def delete(self, *args, **kwargs):
|
754
|
-
"""Delete all records in the query set.
|
804
|
+
def delete(self, *args, permanent: bool | None = None, **kwargs):
|
805
|
+
"""Delete all records in the query set.
|
806
|
+
|
807
|
+
Args:
|
808
|
+
permanent: Whether to permanently delete the record (skips trash).
|
809
|
+
Is only relevant for records that have the `branch` field.
|
810
|
+
|
811
|
+
Note:
|
812
|
+
Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
|
813
|
+
Use `permanent=True` for actual deletion.
|
814
|
+
|
815
|
+
Examples:
|
816
|
+
|
817
|
+
For any `QuerySet` object `qs`, call:
|
818
|
+
|
819
|
+
>>> qs.delete()
|
820
|
+
"""
|
755
821
|
from lamindb.models import Artifact, Collection, Run, Storage, Transform
|
756
822
|
|
757
|
-
#
|
758
|
-
if self.model in {Artifact, Collection, Transform, Run
|
823
|
+
# all these models have non-trivial delete behavior, hence we need to handle in a loop
|
824
|
+
if self.model in {Artifact, Collection, Transform, Run}:
|
825
|
+
for record in self:
|
826
|
+
record.delete(*args, permanent=permanent, **kwargs)
|
827
|
+
elif self.model is Storage: # storage does not have soft delete
|
828
|
+
if permanent is False:
|
829
|
+
logger.warning(
|
830
|
+
"the Storage registry doesn't support soft delete, hard deleting"
|
831
|
+
)
|
759
832
|
for record in self:
|
760
|
-
|
761
|
-
record.delete(*args, **kwargs)
|
833
|
+
record.delete()
|
762
834
|
else:
|
763
|
-
|
835
|
+
if not permanent and hasattr(self.model, "branch_id"):
|
836
|
+
logger.warning("moved records to trash (branch_id = -1)")
|
837
|
+
self.update(branch_id=-1)
|
838
|
+
else:
|
839
|
+
if permanent is False:
|
840
|
+
logger.warning(
|
841
|
+
f"model {self.model.__name__} doesn't support soft delete, hard deleting"
|
842
|
+
)
|
843
|
+
super().delete(*args, **kwargs)
|
764
844
|
|
765
845
|
def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
766
846
|
"""Populate an (unordered) list with the results.
|
@@ -802,12 +882,7 @@ class BasicQuerySet(models.QuerySet):
|
|
802
882
|
>>> ULabel.filter(name="benchmark").one_or_none()
|
803
883
|
>>> ULabel.filter(name="non existing label").one_or_none()
|
804
884
|
"""
|
805
|
-
|
806
|
-
return None
|
807
|
-
elif len(self) == 1:
|
808
|
-
return self[0]
|
809
|
-
else:
|
810
|
-
raise MultipleResultsFound(self.all())
|
885
|
+
return one_helper(self, raise_doesnotexist=False)
|
811
886
|
|
812
887
|
def latest_version(self) -> QuerySet:
|
813
888
|
"""Filter every version family by latest version."""
|
@@ -884,18 +959,18 @@ class QuerySet(BasicQuerySet):
|
|
884
959
|
"""Query a single record. Raises error if there are more or none."""
|
885
960
|
is_run_input = expressions.pop("is_run_input", False)
|
886
961
|
|
962
|
+
# artifacts_from_path and get accept only BasicQuerySet
|
963
|
+
qs = self._to_class(BasicQuerySet, copy=True)
|
964
|
+
|
887
965
|
if path := expressions.pop("path", None):
|
888
966
|
from .artifact_set import ArtifactSet, artifacts_from_path
|
889
967
|
|
890
968
|
if not isinstance(self, ArtifactSet):
|
891
969
|
raise ValueError("Querying by path is only possible for artifacts.")
|
892
|
-
|
893
|
-
qs = artifacts_from_path(self, path)
|
894
|
-
else:
|
895
|
-
qs = self
|
970
|
+
qs = artifacts_from_path(qs, path)
|
896
971
|
|
897
972
|
try:
|
898
|
-
record = get(qs, idlike, **expressions)
|
973
|
+
record = get(qs, idlike, **expressions)
|
899
974
|
except ValueError as e:
|
900
975
|
# Pass through original error for explicit id lookups
|
901
976
|
if "Field 'id' expected a number" in str(e):
|
@@ -921,15 +996,28 @@ class QuerySet(BasicQuerySet):
|
|
921
996
|
|
922
997
|
def filter(self, *queries, **expressions) -> QuerySet:
|
923
998
|
"""Query a set of records."""
|
999
|
+
from lamindb.models import Artifact, Record, Run
|
1000
|
+
|
1001
|
+
registry = self.model
|
1002
|
+
|
1003
|
+
if not expressions.pop("_skip_filter_with_features", False) and registry in {
|
1004
|
+
Artifact,
|
1005
|
+
Run,
|
1006
|
+
Record,
|
1007
|
+
}:
|
1008
|
+
from ._feature_manager import filter_with_features
|
1009
|
+
|
1010
|
+
return filter_with_features(self, *queries, **expressions)
|
1011
|
+
|
924
1012
|
# Suggest to use __name for related fields such as id when not passed
|
925
1013
|
for field, value in expressions.items():
|
926
1014
|
if (
|
927
1015
|
isinstance(value, str)
|
928
1016
|
and value.strip("-").isalpha()
|
929
1017
|
and "__" not in field
|
930
|
-
and hasattr(
|
1018
|
+
and hasattr(registry, field)
|
931
1019
|
):
|
932
|
-
field_attr = getattr(
|
1020
|
+
field_attr = getattr(registry, field)
|
933
1021
|
if hasattr(field_attr, "field") and field_attr.field.related_model:
|
934
1022
|
raise FieldError(
|
935
1023
|
f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
|
lamindb/models/record.py
CHANGED
@@ -108,6 +108,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
108
108
|
Run, through="RecordRun", related_name="records"
|
109
109
|
)
|
110
110
|
"""Linked runs."""
|
111
|
+
linked_users: User = models.ManyToManyField(
|
112
|
+
User, through="RecordUser", related_name="records"
|
113
|
+
)
|
114
|
+
"""Linked runs."""
|
111
115
|
run: Run | None = ForeignKey(
|
112
116
|
Run,
|
113
117
|
PROTECT,
|
@@ -122,7 +126,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
122
126
|
ulabels: ULabel = models.ManyToManyField(
|
123
127
|
ULabel,
|
124
128
|
through="RecordULabel",
|
125
|
-
related_name="_records", # in transition period
|
129
|
+
related_name="_records", # in transition period with underscore prefix
|
126
130
|
)
|
127
131
|
"""Linked runs."""
|
128
132
|
linked_projects: Project
|
lamindb/models/run.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, overload
|
4
4
|
|
5
|
-
import numpy as np
|
6
5
|
from django.db import models
|
7
6
|
from django.db.models import (
|
8
7
|
CASCADE,
|
@@ -18,7 +17,6 @@ from lamindb.base.fields import (
|
|
18
17
|
ForeignKey,
|
19
18
|
)
|
20
19
|
from lamindb.base.users import current_user_id
|
21
|
-
from lamindb.errors import InvalidArgument
|
22
20
|
|
23
21
|
from ..base.ids import base62_16
|
24
22
|
from .can_curate import CanCurate
|
@@ -428,31 +426,8 @@ class Run(SQLRecord):
|
|
428
426
|
|
429
427
|
ln.Run.filter(hyperparam_x=100)
|
430
428
|
"""
|
431
|
-
from
|
432
|
-
|
433
|
-
from .query_set import QuerySet
|
434
|
-
|
435
|
-
if expressions:
|
436
|
-
keys_normalized = [key.split("__")[0] for key in expressions]
|
437
|
-
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
438
|
-
if field_or_feature_or_param in Run.__get_available_fields__():
|
439
|
-
return QuerySet(model=cls).filter(*queries, **expressions)
|
440
|
-
elif all(
|
441
|
-
params_validated := Feature.validate(
|
442
|
-
keys_normalized, field="name", mute=True
|
443
|
-
)
|
444
|
-
):
|
445
|
-
return filter_base(Run, **expressions)
|
446
|
-
else:
|
447
|
-
params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
|
448
|
-
message = f"feature names: {params}"
|
449
|
-
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
450
|
-
raise InvalidArgument(
|
451
|
-
f"You can query either by available fields: {fields}\n"
|
452
|
-
f"Or fix invalid {message}"
|
453
|
-
)
|
454
|
-
else:
|
455
|
-
return QuerySet(model=cls).filter(*queries, **expressions)
|
429
|
+
# from Registry metaclass
|
430
|
+
return type(cls).filter(cls, *queries, **expressions)
|
456
431
|
|
457
432
|
|
458
433
|
def delete_run_artifacts(run: Run) -> None:
|