lamindb 0.76.8__py3-none-any.whl → 0.76.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1205
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +389 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/METADATA +4 -4
- lamindb-0.76.9.dist-info/RECORD +60 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_save.py
CHANGED
@@ -1,308 +1,308 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import os
|
4
|
-
import shutil
|
5
|
-
import traceback
|
6
|
-
from collections import defaultdict
|
7
|
-
from datetime import datetime
|
8
|
-
from functools import partial
|
9
|
-
from typing import TYPE_CHECKING, Iterable, overload
|
10
|
-
|
11
|
-
import lamindb_setup
|
12
|
-
from django.db import IntegrityError, transaction
|
13
|
-
from django.utils.functional import partition
|
14
|
-
from lamin_utils import logger
|
15
|
-
from lamindb_setup.core.upath import LocalPathClasses
|
16
|
-
from lnschema_core.models import Artifact, Record
|
17
|
-
|
18
|
-
from lamindb.core._settings import settings
|
19
|
-
from lamindb.core.storage.paths import (
|
20
|
-
_cache_key_from_artifact_storage,
|
21
|
-
attempt_accessing_path,
|
22
|
-
auto_storage_key_from_artifact,
|
23
|
-
delete_storage_using_key,
|
24
|
-
store_file_or_folder,
|
25
|
-
)
|
26
|
-
|
27
|
-
if TYPE_CHECKING:
|
28
|
-
from lamindb_setup.core.upath import UPath
|
29
|
-
|
30
|
-
|
31
|
-
def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
|
32
|
-
"""Bulk save to registries & storage.
|
33
|
-
|
34
|
-
Note:
|
35
|
-
|
36
|
-
This is a much faster than saving records using ``record.save()``.
|
37
|
-
|
38
|
-
Warning:
|
39
|
-
|
40
|
-
Bulk saving neither automatically creates related records nor updates
|
41
|
-
existing records! Use ``record.save()`` for these use cases.
|
42
|
-
|
43
|
-
Args:
|
44
|
-
records: Multiple :class:`~lamindb.core.Record` objects.
|
45
|
-
ignore_conflicts: If ``True``, do not error if some records violate a
|
46
|
-
unique or another constraint. However, it won't inplace update the id
|
47
|
-
fields of records. If you need records with ids, you need to query
|
48
|
-
them from the database.
|
49
|
-
|
50
|
-
Examples:
|
51
|
-
|
52
|
-
Save a list of records:
|
53
|
-
|
54
|
-
>>> labels = [ln.ULabel(f"Label {i}") for i in range(10)]
|
55
|
-
>>> ln.save(projects)
|
56
|
-
|
57
|
-
For a single record, use ``record.save()``:
|
58
|
-
|
59
|
-
>>> transform = ln.Transform(name="My pipeline")
|
60
|
-
>>> transform.save()
|
61
|
-
|
62
|
-
Update a single existing record:
|
63
|
-
|
64
|
-
>>> transform = ln.Transform.get("0Cb86EZj")
|
65
|
-
>>> transform.name = "New name"
|
66
|
-
>>> transform.save()
|
67
|
-
|
68
|
-
"""
|
69
|
-
if isinstance(records, Record):
|
70
|
-
raise ValueError("Please use record.save() if saving a single record.")
|
71
|
-
|
72
|
-
# previously, this was all set based,
|
73
|
-
# but models without primary keys aren't hashable
|
74
|
-
# we distinguish between artifacts and non-artifacts
|
75
|
-
# for artifacts, we want to bulk-upload rather than upload one-by-one
|
76
|
-
non_artifacts, artifacts = partition(lambda r: isinstance(r, Artifact), records)
|
77
|
-
if non_artifacts:
|
78
|
-
non_artifacts_old, non_artifacts_new = partition(
|
79
|
-
lambda r: r._state.adding or r.pk is None, non_artifacts
|
80
|
-
)
|
81
|
-
bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
|
82
|
-
if non_artifacts_old:
|
83
|
-
bulk_update(non_artifacts_old)
|
84
|
-
non_artifacts_with_parents = [
|
85
|
-
r for r in non_artifacts_new if hasattr(r, "_parents")
|
86
|
-
]
|
87
|
-
if len(non_artifacts_with_parents) > 0:
|
88
|
-
# this can only happen within bionty right now!!
|
89
|
-
# we might extend to core lamindb later
|
90
|
-
from bionty.core import add_ontology
|
91
|
-
|
92
|
-
add_ontology(non_artifacts_with_parents)
|
93
|
-
|
94
|
-
if artifacts:
|
95
|
-
with transaction.atomic():
|
96
|
-
for record in artifacts:
|
97
|
-
record._save_skip_storage()
|
98
|
-
using_key = settings._using_key
|
99
|
-
store_artifacts(artifacts, using_key=using_key)
|
100
|
-
|
101
|
-
# this function returns None as potentially 10k records might be saved
|
102
|
-
# refreshing all of them from the DB would mean a severe performance penalty
|
103
|
-
# 2nd reason: consistency with Django Model.save(), which also returns None
|
104
|
-
return None
|
105
|
-
|
106
|
-
|
107
|
-
def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
108
|
-
records_by_orm = defaultdict(list)
|
109
|
-
for record in records:
|
110
|
-
records_by_orm[record.__class__].append(record)
|
111
|
-
for registry, records in records_by_orm.items():
|
112
|
-
registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
113
|
-
|
114
|
-
|
115
|
-
def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
116
|
-
records_by_orm = defaultdict(list)
|
117
|
-
for record in records:
|
118
|
-
records_by_orm[record.__class__].append(record)
|
119
|
-
for registry, records in records_by_orm.items():
|
120
|
-
field_names = [
|
121
|
-
field.name
|
122
|
-
for field in registry._meta.fields
|
123
|
-
if (field.name != "created_at" and field.name != "id")
|
124
|
-
]
|
125
|
-
registry.objects.bulk_update(records, field_names)
|
126
|
-
|
127
|
-
|
128
|
-
# This is also used within Artifact.save()
|
129
|
-
def check_and_attempt_upload(
|
130
|
-
artifact: Artifact,
|
131
|
-
using_key: str | None = None,
|
132
|
-
access_token: str | None = None,
|
133
|
-
print_progress: bool = True,
|
134
|
-
) -> Exception | None:
|
135
|
-
# if Artifact object is either newly instantiated or replace() was called on
|
136
|
-
# a local env it will have a _local_filepath and needs to be uploaded
|
137
|
-
if hasattr(artifact, "_local_filepath"):
|
138
|
-
try:
|
139
|
-
storage_path, cache_path = upload_artifact(
|
140
|
-
artifact,
|
141
|
-
using_key,
|
142
|
-
access_token=access_token,
|
143
|
-
print_progress=print_progress,
|
144
|
-
)
|
145
|
-
except Exception as exception:
|
146
|
-
logger.warning(f"could not upload artifact: {artifact}")
|
147
|
-
return exception
|
148
|
-
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
149
|
-
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
150
|
-
copy_or_move_to_cache(artifact, storage_path, cache_path)
|
151
|
-
# after successful upload, we should remove the attribute so that another call
|
152
|
-
# call to save won't upload again, the user should call replace() then
|
153
|
-
del artifact._local_filepath
|
154
|
-
# returning None means proceed (either success or no action needed)
|
155
|
-
return None
|
156
|
-
|
157
|
-
|
158
|
-
def copy_or_move_to_cache(
|
159
|
-
artifact: Artifact, storage_path: UPath, cache_path: UPath | None
|
160
|
-
):
|
161
|
-
local_path = artifact._local_filepath
|
162
|
-
|
163
|
-
# in-memory cases
|
164
|
-
if local_path is None or not local_path.exists():
|
165
|
-
return None
|
166
|
-
|
167
|
-
local_path = local_path.resolve()
|
168
|
-
is_dir = local_path.is_dir()
|
169
|
-
cache_dir = settings._storage_settings.cache_dir
|
170
|
-
|
171
|
-
# just delete from the cache dir if storage_path is local
|
172
|
-
if cache_path is None:
|
173
|
-
if (
|
174
|
-
local_path.as_posix() != storage_path.as_posix()
|
175
|
-
and cache_dir in local_path.parents
|
176
|
-
):
|
177
|
-
if is_dir:
|
178
|
-
shutil.rmtree(local_path)
|
179
|
-
else:
|
180
|
-
local_path.unlink()
|
181
|
-
return None
|
182
|
-
# non-local storage_path further
|
183
|
-
if local_path != cache_path:
|
184
|
-
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
185
|
-
if cache_dir in local_path.parents:
|
186
|
-
if cache_path.is_dir():
|
187
|
-
shutil.rmtree(cache_path)
|
188
|
-
local_path.replace(cache_path)
|
189
|
-
else:
|
190
|
-
if is_dir:
|
191
|
-
shutil.copytree(local_path, cache_path)
|
192
|
-
else:
|
193
|
-
shutil.copy(local_path, cache_path)
|
194
|
-
# make sure that the cached version is older than the cloud one
|
195
|
-
mts = datetime.now().timestamp() + 1.0
|
196
|
-
if is_dir:
|
197
|
-
files = (file for file in cache_path.rglob("*") if file.is_file())
|
198
|
-
for file in files:
|
199
|
-
os.utime(file, times=(mts, mts))
|
200
|
-
else:
|
201
|
-
os.utime(cache_path, times=(mts, mts))
|
202
|
-
|
203
|
-
|
204
|
-
# This is also used within Artifact.save()
|
205
|
-
def check_and_attempt_clearing(
|
206
|
-
artifact: Artifact, using_key: str | None = None
|
207
|
-
) -> Exception | None:
|
208
|
-
# this is a clean-up operation after replace() was called
|
209
|
-
# this will only evaluate to True if replace() was called
|
210
|
-
if hasattr(artifact, "_clear_storagekey"):
|
211
|
-
try:
|
212
|
-
if artifact._clear_storagekey is not None:
|
213
|
-
delete_storage_using_key(
|
214
|
-
artifact, artifact._clear_storagekey, using_key=using_key
|
215
|
-
)
|
216
|
-
logger.success(
|
217
|
-
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
218
|
-
)
|
219
|
-
artifact._clear_storagekey = None
|
220
|
-
except Exception as exception:
|
221
|
-
return exception
|
222
|
-
# returning None means proceed (either success or no action needed)
|
223
|
-
return None
|
224
|
-
|
225
|
-
|
226
|
-
def store_artifacts(
|
227
|
-
artifacts: Iterable[Artifact], using_key: str | None = None
|
228
|
-
) -> None:
|
229
|
-
"""Upload artifacts in a list of database-committed artifacts to storage.
|
230
|
-
|
231
|
-
If any upload fails, subsequent artifacts are cleaned up from the DB.
|
232
|
-
"""
|
233
|
-
exception: Exception | None = None
|
234
|
-
# because uploads might fail, we need to maintain a new list
|
235
|
-
# of the succeeded uploads
|
236
|
-
stored_artifacts = []
|
237
|
-
|
238
|
-
# upload new local artifacts
|
239
|
-
for artifact in artifacts:
|
240
|
-
exception = check_and_attempt_upload(artifact, using_key)
|
241
|
-
if exception is not None:
|
242
|
-
break
|
243
|
-
stored_artifacts += [artifact]
|
244
|
-
exception = check_and_attempt_clearing(artifact, using_key)
|
245
|
-
if exception is not None:
|
246
|
-
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
247
|
-
break
|
248
|
-
|
249
|
-
if exception is not None:
|
250
|
-
# clean up metadata for artifacts not uploaded to storage
|
251
|
-
with transaction.atomic():
|
252
|
-
for artifact in artifacts:
|
253
|
-
if artifact not in stored_artifacts:
|
254
|
-
artifact._delete_skip_storage()
|
255
|
-
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
256
|
-
# this is bad because we're losing the original traceback
|
257
|
-
# needs to be refactored - also, the orginal error should be raised
|
258
|
-
raise RuntimeError(error_message)
|
259
|
-
return None
|
260
|
-
|
261
|
-
|
262
|
-
def prepare_error_message(records, stored_artifacts, exception) -> str:
|
263
|
-
if len(records) == 1 or len(stored_artifacts) == 0:
|
264
|
-
error_message = (
|
265
|
-
"No entries were uploaded or committed"
|
266
|
-
" to the database. See error message:\n\n"
|
267
|
-
)
|
268
|
-
else:
|
269
|
-
error_message = (
|
270
|
-
"The following entries have been"
|
271
|
-
" successfully uploaded and committed to the database:\n"
|
272
|
-
)
|
273
|
-
for record in stored_artifacts:
|
274
|
-
error_message += (
|
275
|
-
f"- {', '.join(record.__repr__().split(', ')[:3]) + ', ...)'}\n"
|
276
|
-
)
|
277
|
-
error_message += "\nSee error message:\n\n"
|
278
|
-
error_message += f"{str(exception)}\n\n{traceback.format_exc()}"
|
279
|
-
return error_message
|
280
|
-
|
281
|
-
|
282
|
-
def upload_artifact(
|
283
|
-
artifact,
|
284
|
-
using_key: str | None = None,
|
285
|
-
access_token: str | None = None,
|
286
|
-
print_progress: bool = True,
|
287
|
-
) -> tuple[UPath, UPath | None]:
|
288
|
-
"""Store and add file and its linked entries."""
|
289
|
-
# can't currently use filepath_from_artifact here because it resolves to ._local_filepath
|
290
|
-
storage_key = auto_storage_key_from_artifact(artifact)
|
291
|
-
storage_path, storage_settings = attempt_accessing_path(
|
292
|
-
artifact, storage_key, using_key=using_key, access_token=access_token
|
293
|
-
)
|
294
|
-
if hasattr(artifact, "_to_store") and artifact._to_store:
|
295
|
-
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
296
|
-
store_file_or_folder(
|
297
|
-
artifact._local_filepath, storage_path, print_progress=print_progress
|
298
|
-
)
|
299
|
-
|
300
|
-
if isinstance(storage_path, LocalPathClasses):
|
301
|
-
cache_path = None
|
302
|
-
else:
|
303
|
-
cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
|
304
|
-
cache_path = storage_settings.cloud_to_local_no_update(
|
305
|
-
storage_path, cache_key=cache_key
|
306
|
-
)
|
307
|
-
|
308
|
-
return storage_path, cache_path
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
import traceback
|
6
|
+
from collections import defaultdict
|
7
|
+
from datetime import datetime
|
8
|
+
from functools import partial
|
9
|
+
from typing import TYPE_CHECKING, Iterable, overload
|
10
|
+
|
11
|
+
import lamindb_setup
|
12
|
+
from django.db import IntegrityError, transaction
|
13
|
+
from django.utils.functional import partition
|
14
|
+
from lamin_utils import logger
|
15
|
+
from lamindb_setup.core.upath import LocalPathClasses
|
16
|
+
from lnschema_core.models import Artifact, Record
|
17
|
+
|
18
|
+
from lamindb.core._settings import settings
|
19
|
+
from lamindb.core.storage.paths import (
|
20
|
+
_cache_key_from_artifact_storage,
|
21
|
+
attempt_accessing_path,
|
22
|
+
auto_storage_key_from_artifact,
|
23
|
+
delete_storage_using_key,
|
24
|
+
store_file_or_folder,
|
25
|
+
)
|
26
|
+
|
27
|
+
if TYPE_CHECKING:
|
28
|
+
from lamindb_setup.core.upath import UPath
|
29
|
+
|
30
|
+
|
31
|
+
def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
|
32
|
+
"""Bulk save to registries & storage.
|
33
|
+
|
34
|
+
Note:
|
35
|
+
|
36
|
+
This is a much faster than saving records using ``record.save()``.
|
37
|
+
|
38
|
+
Warning:
|
39
|
+
|
40
|
+
Bulk saving neither automatically creates related records nor updates
|
41
|
+
existing records! Use ``record.save()`` for these use cases.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
records: Multiple :class:`~lamindb.core.Record` objects.
|
45
|
+
ignore_conflicts: If ``True``, do not error if some records violate a
|
46
|
+
unique or another constraint. However, it won't inplace update the id
|
47
|
+
fields of records. If you need records with ids, you need to query
|
48
|
+
them from the database.
|
49
|
+
|
50
|
+
Examples:
|
51
|
+
|
52
|
+
Save a list of records:
|
53
|
+
|
54
|
+
>>> labels = [ln.ULabel(f"Label {i}") for i in range(10)]
|
55
|
+
>>> ln.save(projects)
|
56
|
+
|
57
|
+
For a single record, use ``record.save()``:
|
58
|
+
|
59
|
+
>>> transform = ln.Transform(name="My pipeline")
|
60
|
+
>>> transform.save()
|
61
|
+
|
62
|
+
Update a single existing record:
|
63
|
+
|
64
|
+
>>> transform = ln.Transform.get("0Cb86EZj")
|
65
|
+
>>> transform.name = "New name"
|
66
|
+
>>> transform.save()
|
67
|
+
|
68
|
+
"""
|
69
|
+
if isinstance(records, Record):
|
70
|
+
raise ValueError("Please use record.save() if saving a single record.")
|
71
|
+
|
72
|
+
# previously, this was all set based,
|
73
|
+
# but models without primary keys aren't hashable
|
74
|
+
# we distinguish between artifacts and non-artifacts
|
75
|
+
# for artifacts, we want to bulk-upload rather than upload one-by-one
|
76
|
+
non_artifacts, artifacts = partition(lambda r: isinstance(r, Artifact), records)
|
77
|
+
if non_artifacts:
|
78
|
+
non_artifacts_old, non_artifacts_new = partition(
|
79
|
+
lambda r: r._state.adding or r.pk is None, non_artifacts
|
80
|
+
)
|
81
|
+
bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
|
82
|
+
if non_artifacts_old:
|
83
|
+
bulk_update(non_artifacts_old)
|
84
|
+
non_artifacts_with_parents = [
|
85
|
+
r for r in non_artifacts_new if hasattr(r, "_parents")
|
86
|
+
]
|
87
|
+
if len(non_artifacts_with_parents) > 0:
|
88
|
+
# this can only happen within bionty right now!!
|
89
|
+
# we might extend to core lamindb later
|
90
|
+
from bionty.core import add_ontology
|
91
|
+
|
92
|
+
add_ontology(non_artifacts_with_parents)
|
93
|
+
|
94
|
+
if artifacts:
|
95
|
+
with transaction.atomic():
|
96
|
+
for record in artifacts:
|
97
|
+
record._save_skip_storage()
|
98
|
+
using_key = settings._using_key
|
99
|
+
store_artifacts(artifacts, using_key=using_key)
|
100
|
+
|
101
|
+
# this function returns None as potentially 10k records might be saved
|
102
|
+
# refreshing all of them from the DB would mean a severe performance penalty
|
103
|
+
# 2nd reason: consistency with Django Model.save(), which also returns None
|
104
|
+
return None
|
105
|
+
|
106
|
+
|
107
|
+
def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
108
|
+
records_by_orm = defaultdict(list)
|
109
|
+
for record in records:
|
110
|
+
records_by_orm[record.__class__].append(record)
|
111
|
+
for registry, records in records_by_orm.items():
|
112
|
+
registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
113
|
+
|
114
|
+
|
115
|
+
def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
116
|
+
records_by_orm = defaultdict(list)
|
117
|
+
for record in records:
|
118
|
+
records_by_orm[record.__class__].append(record)
|
119
|
+
for registry, records in records_by_orm.items():
|
120
|
+
field_names = [
|
121
|
+
field.name
|
122
|
+
for field in registry._meta.fields
|
123
|
+
if (field.name != "created_at" and field.name != "id")
|
124
|
+
]
|
125
|
+
registry.objects.bulk_update(records, field_names)
|
126
|
+
|
127
|
+
|
128
|
+
# This is also used within Artifact.save()
|
129
|
+
def check_and_attempt_upload(
|
130
|
+
artifact: Artifact,
|
131
|
+
using_key: str | None = None,
|
132
|
+
access_token: str | None = None,
|
133
|
+
print_progress: bool = True,
|
134
|
+
) -> Exception | None:
|
135
|
+
# if Artifact object is either newly instantiated or replace() was called on
|
136
|
+
# a local env it will have a _local_filepath and needs to be uploaded
|
137
|
+
if hasattr(artifact, "_local_filepath"):
|
138
|
+
try:
|
139
|
+
storage_path, cache_path = upload_artifact(
|
140
|
+
artifact,
|
141
|
+
using_key,
|
142
|
+
access_token=access_token,
|
143
|
+
print_progress=print_progress,
|
144
|
+
)
|
145
|
+
except Exception as exception:
|
146
|
+
logger.warning(f"could not upload artifact: {artifact}")
|
147
|
+
return exception
|
148
|
+
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
149
|
+
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
150
|
+
copy_or_move_to_cache(artifact, storage_path, cache_path)
|
151
|
+
# after successful upload, we should remove the attribute so that another call
|
152
|
+
# call to save won't upload again, the user should call replace() then
|
153
|
+
del artifact._local_filepath
|
154
|
+
# returning None means proceed (either success or no action needed)
|
155
|
+
return None
|
156
|
+
|
157
|
+
|
158
|
+
def copy_or_move_to_cache(
|
159
|
+
artifact: Artifact, storage_path: UPath, cache_path: UPath | None
|
160
|
+
):
|
161
|
+
local_path = artifact._local_filepath
|
162
|
+
|
163
|
+
# in-memory cases
|
164
|
+
if local_path is None or not local_path.exists():
|
165
|
+
return None
|
166
|
+
|
167
|
+
local_path = local_path.resolve()
|
168
|
+
is_dir = local_path.is_dir()
|
169
|
+
cache_dir = settings._storage_settings.cache_dir
|
170
|
+
|
171
|
+
# just delete from the cache dir if storage_path is local
|
172
|
+
if cache_path is None:
|
173
|
+
if (
|
174
|
+
local_path.as_posix() != storage_path.as_posix()
|
175
|
+
and cache_dir in local_path.parents
|
176
|
+
):
|
177
|
+
if is_dir:
|
178
|
+
shutil.rmtree(local_path)
|
179
|
+
else:
|
180
|
+
local_path.unlink()
|
181
|
+
return None
|
182
|
+
# non-local storage_path further
|
183
|
+
if local_path != cache_path:
|
184
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
185
|
+
if cache_dir in local_path.parents:
|
186
|
+
if cache_path.is_dir():
|
187
|
+
shutil.rmtree(cache_path)
|
188
|
+
local_path.replace(cache_path)
|
189
|
+
else:
|
190
|
+
if is_dir:
|
191
|
+
shutil.copytree(local_path, cache_path)
|
192
|
+
else:
|
193
|
+
shutil.copy(local_path, cache_path)
|
194
|
+
# make sure that the cached version is older than the cloud one
|
195
|
+
mts = datetime.now().timestamp() + 1.0
|
196
|
+
if is_dir:
|
197
|
+
files = (file for file in cache_path.rglob("*") if file.is_file())
|
198
|
+
for file in files:
|
199
|
+
os.utime(file, times=(mts, mts))
|
200
|
+
else:
|
201
|
+
os.utime(cache_path, times=(mts, mts))
|
202
|
+
|
203
|
+
|
204
|
+
# This is also used within Artifact.save()
|
205
|
+
def check_and_attempt_clearing(
|
206
|
+
artifact: Artifact, using_key: str | None = None
|
207
|
+
) -> Exception | None:
|
208
|
+
# this is a clean-up operation after replace() was called
|
209
|
+
# this will only evaluate to True if replace() was called
|
210
|
+
if hasattr(artifact, "_clear_storagekey"):
|
211
|
+
try:
|
212
|
+
if artifact._clear_storagekey is not None:
|
213
|
+
delete_storage_using_key(
|
214
|
+
artifact, artifact._clear_storagekey, using_key=using_key
|
215
|
+
)
|
216
|
+
logger.success(
|
217
|
+
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
218
|
+
)
|
219
|
+
artifact._clear_storagekey = None
|
220
|
+
except Exception as exception:
|
221
|
+
return exception
|
222
|
+
# returning None means proceed (either success or no action needed)
|
223
|
+
return None
|
224
|
+
|
225
|
+
|
226
|
+
def store_artifacts(
|
227
|
+
artifacts: Iterable[Artifact], using_key: str | None = None
|
228
|
+
) -> None:
|
229
|
+
"""Upload artifacts in a list of database-committed artifacts to storage.
|
230
|
+
|
231
|
+
If any upload fails, subsequent artifacts are cleaned up from the DB.
|
232
|
+
"""
|
233
|
+
exception: Exception | None = None
|
234
|
+
# because uploads might fail, we need to maintain a new list
|
235
|
+
# of the succeeded uploads
|
236
|
+
stored_artifacts = []
|
237
|
+
|
238
|
+
# upload new local artifacts
|
239
|
+
for artifact in artifacts:
|
240
|
+
exception = check_and_attempt_upload(artifact, using_key)
|
241
|
+
if exception is not None:
|
242
|
+
break
|
243
|
+
stored_artifacts += [artifact]
|
244
|
+
exception = check_and_attempt_clearing(artifact, using_key)
|
245
|
+
if exception is not None:
|
246
|
+
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
247
|
+
break
|
248
|
+
|
249
|
+
if exception is not None:
|
250
|
+
# clean up metadata for artifacts not uploaded to storage
|
251
|
+
with transaction.atomic():
|
252
|
+
for artifact in artifacts:
|
253
|
+
if artifact not in stored_artifacts:
|
254
|
+
artifact._delete_skip_storage()
|
255
|
+
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
256
|
+
# this is bad because we're losing the original traceback
|
257
|
+
# needs to be refactored - also, the orginal error should be raised
|
258
|
+
raise RuntimeError(error_message)
|
259
|
+
return None
|
260
|
+
|
261
|
+
|
262
|
+
def prepare_error_message(records, stored_artifacts, exception) -> str:
|
263
|
+
if len(records) == 1 or len(stored_artifacts) == 0:
|
264
|
+
error_message = (
|
265
|
+
"No entries were uploaded or committed"
|
266
|
+
" to the database. See error message:\n\n"
|
267
|
+
)
|
268
|
+
else:
|
269
|
+
error_message = (
|
270
|
+
"The following entries have been"
|
271
|
+
" successfully uploaded and committed to the database:\n"
|
272
|
+
)
|
273
|
+
for record in stored_artifacts:
|
274
|
+
error_message += (
|
275
|
+
f"- {', '.join(record.__repr__().split(', ')[:3]) + ', ...)'}\n"
|
276
|
+
)
|
277
|
+
error_message += "\nSee error message:\n\n"
|
278
|
+
error_message += f"{str(exception)}\n\n{traceback.format_exc()}"
|
279
|
+
return error_message
|
280
|
+
|
281
|
+
|
282
|
+
def upload_artifact(
|
283
|
+
artifact,
|
284
|
+
using_key: str | None = None,
|
285
|
+
access_token: str | None = None,
|
286
|
+
print_progress: bool = True,
|
287
|
+
) -> tuple[UPath, UPath | None]:
|
288
|
+
"""Store and add file and its linked entries."""
|
289
|
+
# can't currently use filepath_from_artifact here because it resolves to ._local_filepath
|
290
|
+
storage_key = auto_storage_key_from_artifact(artifact)
|
291
|
+
storage_path, storage_settings = attempt_accessing_path(
|
292
|
+
artifact, storage_key, using_key=using_key, access_token=access_token
|
293
|
+
)
|
294
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
295
|
+
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
296
|
+
store_file_or_folder(
|
297
|
+
artifact._local_filepath, storage_path, print_progress=print_progress
|
298
|
+
)
|
299
|
+
|
300
|
+
if isinstance(storage_path, LocalPathClasses):
|
301
|
+
cache_path = None
|
302
|
+
else:
|
303
|
+
cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
|
304
|
+
cache_path = storage_settings.cloud_to_local_no_update(
|
305
|
+
storage_path, cache_key=cache_key
|
306
|
+
)
|
307
|
+
|
308
|
+
return storage_path, cache_path
|
lamindb/_storage.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
from lamindb_setup.core._docs import doc_args
|
2
|
-
from lamindb_setup.core.upath import UPath, create_path
|
3
|
-
from lnschema_core import Storage
|
4
|
-
|
5
|
-
|
6
|
-
@property # type: ignore
|
7
|
-
@doc_args(Storage.path.__doc__)
|
8
|
-
def path(self) -> UPath:
|
9
|
-
"""{}""" # noqa: D415
|
10
|
-
access_token = self._access_token if hasattr(self, "_access_token") else None
|
11
|
-
return create_path(self.root, access_token=access_token)
|
12
|
-
|
13
|
-
|
14
|
-
Storage.path = path
|
1
|
+
from lamindb_setup.core._docs import doc_args
|
2
|
+
from lamindb_setup.core.upath import UPath, create_path
|
3
|
+
from lnschema_core import Storage
|
4
|
+
|
5
|
+
|
6
|
+
@property # type: ignore
|
7
|
+
@doc_args(Storage.path.__doc__)
|
8
|
+
def path(self) -> UPath:
|
9
|
+
"""{}""" # noqa: D415
|
10
|
+
access_token = self._access_token if hasattr(self, "_access_token") else None
|
11
|
+
return create_path(self.root, access_token=access_token)
|
12
|
+
|
13
|
+
|
14
|
+
Storage.path = path
|