lamindb 0.76.8__py3-none-any.whl → 0.76.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1205
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +389 -387
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +574 -574
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -867
  27. lamindb/core/_label_manager.py +253 -253
  28. lamindb/core/_mapped_collection.py +631 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +581 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -90
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -164
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -204
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -172
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/METADATA +4 -4
  59. lamindb-0.76.9.dist-info/RECORD +60 -0
  60. {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_save.py CHANGED
@@ -1,308 +1,308 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- import traceback
6
- from collections import defaultdict
7
- from datetime import datetime
8
- from functools import partial
9
- from typing import TYPE_CHECKING, Iterable, overload
10
-
11
- import lamindb_setup
12
- from django.db import IntegrityError, transaction
13
- from django.utils.functional import partition
14
- from lamin_utils import logger
15
- from lamindb_setup.core.upath import LocalPathClasses
16
- from lnschema_core.models import Artifact, Record
17
-
18
- from lamindb.core._settings import settings
19
- from lamindb.core.storage.paths import (
20
- _cache_key_from_artifact_storage,
21
- attempt_accessing_path,
22
- auto_storage_key_from_artifact,
23
- delete_storage_using_key,
24
- store_file_or_folder,
25
- )
26
-
27
- if TYPE_CHECKING:
28
- from lamindb_setup.core.upath import UPath
29
-
30
-
31
- def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
32
- """Bulk save to registries & storage.
33
-
34
- Note:
35
-
36
- This is a much faster than saving records using ``record.save()``.
37
-
38
- Warning:
39
-
40
- Bulk saving neither automatically creates related records nor updates
41
- existing records! Use ``record.save()`` for these use cases.
42
-
43
- Args:
44
- records: Multiple :class:`~lamindb.core.Record` objects.
45
- ignore_conflicts: If ``True``, do not error if some records violate a
46
- unique or another constraint. However, it won't inplace update the id
47
- fields of records. If you need records with ids, you need to query
48
- them from the database.
49
-
50
- Examples:
51
-
52
- Save a list of records:
53
-
54
- >>> labels = [ln.ULabel(f"Label {i}") for i in range(10)]
55
- >>> ln.save(projects)
56
-
57
- For a single record, use ``record.save()``:
58
-
59
- >>> transform = ln.Transform(name="My pipeline")
60
- >>> transform.save()
61
-
62
- Update a single existing record:
63
-
64
- >>> transform = ln.Transform.get("0Cb86EZj")
65
- >>> transform.name = "New name"
66
- >>> transform.save()
67
-
68
- """
69
- if isinstance(records, Record):
70
- raise ValueError("Please use record.save() if saving a single record.")
71
-
72
- # previously, this was all set based,
73
- # but models without primary keys aren't hashable
74
- # we distinguish between artifacts and non-artifacts
75
- # for artifacts, we want to bulk-upload rather than upload one-by-one
76
- non_artifacts, artifacts = partition(lambda r: isinstance(r, Artifact), records)
77
- if non_artifacts:
78
- non_artifacts_old, non_artifacts_new = partition(
79
- lambda r: r._state.adding or r.pk is None, non_artifacts
80
- )
81
- bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
82
- if non_artifacts_old:
83
- bulk_update(non_artifacts_old)
84
- non_artifacts_with_parents = [
85
- r for r in non_artifacts_new if hasattr(r, "_parents")
86
- ]
87
- if len(non_artifacts_with_parents) > 0:
88
- # this can only happen within bionty right now!!
89
- # we might extend to core lamindb later
90
- from bionty.core import add_ontology
91
-
92
- add_ontology(non_artifacts_with_parents)
93
-
94
- if artifacts:
95
- with transaction.atomic():
96
- for record in artifacts:
97
- record._save_skip_storage()
98
- using_key = settings._using_key
99
- store_artifacts(artifacts, using_key=using_key)
100
-
101
- # this function returns None as potentially 10k records might be saved
102
- # refreshing all of them from the DB would mean a severe performance penalty
103
- # 2nd reason: consistency with Django Model.save(), which also returns None
104
- return None
105
-
106
-
107
- def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
108
- records_by_orm = defaultdict(list)
109
- for record in records:
110
- records_by_orm[record.__class__].append(record)
111
- for registry, records in records_by_orm.items():
112
- registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
113
-
114
-
115
- def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
116
- records_by_orm = defaultdict(list)
117
- for record in records:
118
- records_by_orm[record.__class__].append(record)
119
- for registry, records in records_by_orm.items():
120
- field_names = [
121
- field.name
122
- for field in registry._meta.fields
123
- if (field.name != "created_at" and field.name != "id")
124
- ]
125
- registry.objects.bulk_update(records, field_names)
126
-
127
-
128
- # This is also used within Artifact.save()
129
- def check_and_attempt_upload(
130
- artifact: Artifact,
131
- using_key: str | None = None,
132
- access_token: str | None = None,
133
- print_progress: bool = True,
134
- ) -> Exception | None:
135
- # if Artifact object is either newly instantiated or replace() was called on
136
- # a local env it will have a _local_filepath and needs to be uploaded
137
- if hasattr(artifact, "_local_filepath"):
138
- try:
139
- storage_path, cache_path = upload_artifact(
140
- artifact,
141
- using_key,
142
- access_token=access_token,
143
- print_progress=print_progress,
144
- )
145
- except Exception as exception:
146
- logger.warning(f"could not upload artifact: {artifact}")
147
- return exception
148
- # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
149
- if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
150
- copy_or_move_to_cache(artifact, storage_path, cache_path)
151
- # after successful upload, we should remove the attribute so that another call
152
- # call to save won't upload again, the user should call replace() then
153
- del artifact._local_filepath
154
- # returning None means proceed (either success or no action needed)
155
- return None
156
-
157
-
158
- def copy_or_move_to_cache(
159
- artifact: Artifact, storage_path: UPath, cache_path: UPath | None
160
- ):
161
- local_path = artifact._local_filepath
162
-
163
- # in-memory cases
164
- if local_path is None or not local_path.exists():
165
- return None
166
-
167
- local_path = local_path.resolve()
168
- is_dir = local_path.is_dir()
169
- cache_dir = settings._storage_settings.cache_dir
170
-
171
- # just delete from the cache dir if storage_path is local
172
- if cache_path is None:
173
- if (
174
- local_path.as_posix() != storage_path.as_posix()
175
- and cache_dir in local_path.parents
176
- ):
177
- if is_dir:
178
- shutil.rmtree(local_path)
179
- else:
180
- local_path.unlink()
181
- return None
182
- # non-local storage_path further
183
- if local_path != cache_path:
184
- cache_path.parent.mkdir(parents=True, exist_ok=True)
185
- if cache_dir in local_path.parents:
186
- if cache_path.is_dir():
187
- shutil.rmtree(cache_path)
188
- local_path.replace(cache_path)
189
- else:
190
- if is_dir:
191
- shutil.copytree(local_path, cache_path)
192
- else:
193
- shutil.copy(local_path, cache_path)
194
- # make sure that the cached version is older than the cloud one
195
- mts = datetime.now().timestamp() + 1.0
196
- if is_dir:
197
- files = (file for file in cache_path.rglob("*") if file.is_file())
198
- for file in files:
199
- os.utime(file, times=(mts, mts))
200
- else:
201
- os.utime(cache_path, times=(mts, mts))
202
-
203
-
204
- # This is also used within Artifact.save()
205
- def check_and_attempt_clearing(
206
- artifact: Artifact, using_key: str | None = None
207
- ) -> Exception | None:
208
- # this is a clean-up operation after replace() was called
209
- # this will only evaluate to True if replace() was called
210
- if hasattr(artifact, "_clear_storagekey"):
211
- try:
212
- if artifact._clear_storagekey is not None:
213
- delete_storage_using_key(
214
- artifact, artifact._clear_storagekey, using_key=using_key
215
- )
216
- logger.success(
217
- f"deleted stale object at storage key {artifact._clear_storagekey}"
218
- )
219
- artifact._clear_storagekey = None
220
- except Exception as exception:
221
- return exception
222
- # returning None means proceed (either success or no action needed)
223
- return None
224
-
225
-
226
- def store_artifacts(
227
- artifacts: Iterable[Artifact], using_key: str | None = None
228
- ) -> None:
229
- """Upload artifacts in a list of database-committed artifacts to storage.
230
-
231
- If any upload fails, subsequent artifacts are cleaned up from the DB.
232
- """
233
- exception: Exception | None = None
234
- # because uploads might fail, we need to maintain a new list
235
- # of the succeeded uploads
236
- stored_artifacts = []
237
-
238
- # upload new local artifacts
239
- for artifact in artifacts:
240
- exception = check_and_attempt_upload(artifact, using_key)
241
- if exception is not None:
242
- break
243
- stored_artifacts += [artifact]
244
- exception = check_and_attempt_clearing(artifact, using_key)
245
- if exception is not None:
246
- logger.warning(f"clean up of {artifact._clear_storagekey} failed")
247
- break
248
-
249
- if exception is not None:
250
- # clean up metadata for artifacts not uploaded to storage
251
- with transaction.atomic():
252
- for artifact in artifacts:
253
- if artifact not in stored_artifacts:
254
- artifact._delete_skip_storage()
255
- error_message = prepare_error_message(artifacts, stored_artifacts, exception)
256
- # this is bad because we're losing the original traceback
257
- # needs to be refactored - also, the orginal error should be raised
258
- raise RuntimeError(error_message)
259
- return None
260
-
261
-
262
- def prepare_error_message(records, stored_artifacts, exception) -> str:
263
- if len(records) == 1 or len(stored_artifacts) == 0:
264
- error_message = (
265
- "No entries were uploaded or committed"
266
- " to the database. See error message:\n\n"
267
- )
268
- else:
269
- error_message = (
270
- "The following entries have been"
271
- " successfully uploaded and committed to the database:\n"
272
- )
273
- for record in stored_artifacts:
274
- error_message += (
275
- f"- {', '.join(record.__repr__().split(', ')[:3]) + ', ...)'}\n"
276
- )
277
- error_message += "\nSee error message:\n\n"
278
- error_message += f"{str(exception)}\n\n{traceback.format_exc()}"
279
- return error_message
280
-
281
-
282
- def upload_artifact(
283
- artifact,
284
- using_key: str | None = None,
285
- access_token: str | None = None,
286
- print_progress: bool = True,
287
- ) -> tuple[UPath, UPath | None]:
288
- """Store and add file and its linked entries."""
289
- # can't currently use filepath_from_artifact here because it resolves to ._local_filepath
290
- storage_key = auto_storage_key_from_artifact(artifact)
291
- storage_path, storage_settings = attempt_accessing_path(
292
- artifact, storage_key, using_key=using_key, access_token=access_token
293
- )
294
- if hasattr(artifact, "_to_store") and artifact._to_store:
295
- logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
296
- store_file_or_folder(
297
- artifact._local_filepath, storage_path, print_progress=print_progress
298
- )
299
-
300
- if isinstance(storage_path, LocalPathClasses):
301
- cache_path = None
302
- else:
303
- cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
304
- cache_path = storage_settings.cloud_to_local_no_update(
305
- storage_path, cache_key=cache_key
306
- )
307
-
308
- return storage_path, cache_path
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ import traceback
6
+ from collections import defaultdict
7
+ from datetime import datetime
8
+ from functools import partial
9
+ from typing import TYPE_CHECKING, Iterable, overload
10
+
11
+ import lamindb_setup
12
+ from django.db import IntegrityError, transaction
13
+ from django.utils.functional import partition
14
+ from lamin_utils import logger
15
+ from lamindb_setup.core.upath import LocalPathClasses
16
+ from lnschema_core.models import Artifact, Record
17
+
18
+ from lamindb.core._settings import settings
19
+ from lamindb.core.storage.paths import (
20
+ _cache_key_from_artifact_storage,
21
+ attempt_accessing_path,
22
+ auto_storage_key_from_artifact,
23
+ delete_storage_using_key,
24
+ store_file_or_folder,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from lamindb_setup.core.upath import UPath
29
+
30
+
31
+ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
32
+ """Bulk save to registries & storage.
33
+
34
+ Note:
35
+
36
+ This is a much faster than saving records using ``record.save()``.
37
+
38
+ Warning:
39
+
40
+ Bulk saving neither automatically creates related records nor updates
41
+ existing records! Use ``record.save()`` for these use cases.
42
+
43
+ Args:
44
+ records: Multiple :class:`~lamindb.core.Record` objects.
45
+ ignore_conflicts: If ``True``, do not error if some records violate a
46
+ unique or another constraint. However, it won't inplace update the id
47
+ fields of records. If you need records with ids, you need to query
48
+ them from the database.
49
+
50
+ Examples:
51
+
52
+ Save a list of records:
53
+
54
+ >>> labels = [ln.ULabel(f"Label {i}") for i in range(10)]
55
+ >>> ln.save(projects)
56
+
57
+ For a single record, use ``record.save()``:
58
+
59
+ >>> transform = ln.Transform(name="My pipeline")
60
+ >>> transform.save()
61
+
62
+ Update a single existing record:
63
+
64
+ >>> transform = ln.Transform.get("0Cb86EZj")
65
+ >>> transform.name = "New name"
66
+ >>> transform.save()
67
+
68
+ """
69
+ if isinstance(records, Record):
70
+ raise ValueError("Please use record.save() if saving a single record.")
71
+
72
+ # previously, this was all set based,
73
+ # but models without primary keys aren't hashable
74
+ # we distinguish between artifacts and non-artifacts
75
+ # for artifacts, we want to bulk-upload rather than upload one-by-one
76
+ non_artifacts, artifacts = partition(lambda r: isinstance(r, Artifact), records)
77
+ if non_artifacts:
78
+ non_artifacts_old, non_artifacts_new = partition(
79
+ lambda r: r._state.adding or r.pk is None, non_artifacts
80
+ )
81
+ bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
82
+ if non_artifacts_old:
83
+ bulk_update(non_artifacts_old)
84
+ non_artifacts_with_parents = [
85
+ r for r in non_artifacts_new if hasattr(r, "_parents")
86
+ ]
87
+ if len(non_artifacts_with_parents) > 0:
88
+ # this can only happen within bionty right now!!
89
+ # we might extend to core lamindb later
90
+ from bionty.core import add_ontology
91
+
92
+ add_ontology(non_artifacts_with_parents)
93
+
94
+ if artifacts:
95
+ with transaction.atomic():
96
+ for record in artifacts:
97
+ record._save_skip_storage()
98
+ using_key = settings._using_key
99
+ store_artifacts(artifacts, using_key=using_key)
100
+
101
+ # this function returns None as potentially 10k records might be saved
102
+ # refreshing all of them from the DB would mean a severe performance penalty
103
+ # 2nd reason: consistency with Django Model.save(), which also returns None
104
+ return None
105
+
106
+
107
+ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
108
+ records_by_orm = defaultdict(list)
109
+ for record in records:
110
+ records_by_orm[record.__class__].append(record)
111
+ for registry, records in records_by_orm.items():
112
+ registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
113
+
114
+
115
+ def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
116
+ records_by_orm = defaultdict(list)
117
+ for record in records:
118
+ records_by_orm[record.__class__].append(record)
119
+ for registry, records in records_by_orm.items():
120
+ field_names = [
121
+ field.name
122
+ for field in registry._meta.fields
123
+ if (field.name != "created_at" and field.name != "id")
124
+ ]
125
+ registry.objects.bulk_update(records, field_names)
126
+
127
+
128
+ # This is also used within Artifact.save()
129
+ def check_and_attempt_upload(
130
+ artifact: Artifact,
131
+ using_key: str | None = None,
132
+ access_token: str | None = None,
133
+ print_progress: bool = True,
134
+ ) -> Exception | None:
135
+ # if Artifact object is either newly instantiated or replace() was called on
136
+ # a local env it will have a _local_filepath and needs to be uploaded
137
+ if hasattr(artifact, "_local_filepath"):
138
+ try:
139
+ storage_path, cache_path = upload_artifact(
140
+ artifact,
141
+ using_key,
142
+ access_token=access_token,
143
+ print_progress=print_progress,
144
+ )
145
+ except Exception as exception:
146
+ logger.warning(f"could not upload artifact: {artifact}")
147
+ return exception
148
+ # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
149
+ if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
150
+ copy_or_move_to_cache(artifact, storage_path, cache_path)
151
+ # after successful upload, we should remove the attribute so that another call
152
+ # call to save won't upload again, the user should call replace() then
153
+ del artifact._local_filepath
154
+ # returning None means proceed (either success or no action needed)
155
+ return None
156
+
157
+
158
+ def copy_or_move_to_cache(
159
+ artifact: Artifact, storage_path: UPath, cache_path: UPath | None
160
+ ):
161
+ local_path = artifact._local_filepath
162
+
163
+ # in-memory cases
164
+ if local_path is None or not local_path.exists():
165
+ return None
166
+
167
+ local_path = local_path.resolve()
168
+ is_dir = local_path.is_dir()
169
+ cache_dir = settings._storage_settings.cache_dir
170
+
171
+ # just delete from the cache dir if storage_path is local
172
+ if cache_path is None:
173
+ if (
174
+ local_path.as_posix() != storage_path.as_posix()
175
+ and cache_dir in local_path.parents
176
+ ):
177
+ if is_dir:
178
+ shutil.rmtree(local_path)
179
+ else:
180
+ local_path.unlink()
181
+ return None
182
+ # non-local storage_path further
183
+ if local_path != cache_path:
184
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
185
+ if cache_dir in local_path.parents:
186
+ if cache_path.is_dir():
187
+ shutil.rmtree(cache_path)
188
+ local_path.replace(cache_path)
189
+ else:
190
+ if is_dir:
191
+ shutil.copytree(local_path, cache_path)
192
+ else:
193
+ shutil.copy(local_path, cache_path)
194
+ # make sure that the cached version is older than the cloud one
195
+ mts = datetime.now().timestamp() + 1.0
196
+ if is_dir:
197
+ files = (file for file in cache_path.rglob("*") if file.is_file())
198
+ for file in files:
199
+ os.utime(file, times=(mts, mts))
200
+ else:
201
+ os.utime(cache_path, times=(mts, mts))
202
+
203
+
204
+ # This is also used within Artifact.save()
205
+ def check_and_attempt_clearing(
206
+ artifact: Artifact, using_key: str | None = None
207
+ ) -> Exception | None:
208
+ # this is a clean-up operation after replace() was called
209
+ # this will only evaluate to True if replace() was called
210
+ if hasattr(artifact, "_clear_storagekey"):
211
+ try:
212
+ if artifact._clear_storagekey is not None:
213
+ delete_storage_using_key(
214
+ artifact, artifact._clear_storagekey, using_key=using_key
215
+ )
216
+ logger.success(
217
+ f"deleted stale object at storage key {artifact._clear_storagekey}"
218
+ )
219
+ artifact._clear_storagekey = None
220
+ except Exception as exception:
221
+ return exception
222
+ # returning None means proceed (either success or no action needed)
223
+ return None
224
+
225
+
226
+ def store_artifacts(
227
+ artifacts: Iterable[Artifact], using_key: str | None = None
228
+ ) -> None:
229
+ """Upload artifacts in a list of database-committed artifacts to storage.
230
+
231
+ If any upload fails, subsequent artifacts are cleaned up from the DB.
232
+ """
233
+ exception: Exception | None = None
234
+ # because uploads might fail, we need to maintain a new list
235
+ # of the succeeded uploads
236
+ stored_artifacts = []
237
+
238
+ # upload new local artifacts
239
+ for artifact in artifacts:
240
+ exception = check_and_attempt_upload(artifact, using_key)
241
+ if exception is not None:
242
+ break
243
+ stored_artifacts += [artifact]
244
+ exception = check_and_attempt_clearing(artifact, using_key)
245
+ if exception is not None:
246
+ logger.warning(f"clean up of {artifact._clear_storagekey} failed")
247
+ break
248
+
249
+ if exception is not None:
250
+ # clean up metadata for artifacts not uploaded to storage
251
+ with transaction.atomic():
252
+ for artifact in artifacts:
253
+ if artifact not in stored_artifacts:
254
+ artifact._delete_skip_storage()
255
+ error_message = prepare_error_message(artifacts, stored_artifacts, exception)
256
+ # this is bad because we're losing the original traceback
257
+ # needs to be refactored - also, the orginal error should be raised
258
+ raise RuntimeError(error_message)
259
+ return None
260
+
261
+
262
+ def prepare_error_message(records, stored_artifacts, exception) -> str:
263
+ if len(records) == 1 or len(stored_artifacts) == 0:
264
+ error_message = (
265
+ "No entries were uploaded or committed"
266
+ " to the database. See error message:\n\n"
267
+ )
268
+ else:
269
+ error_message = (
270
+ "The following entries have been"
271
+ " successfully uploaded and committed to the database:\n"
272
+ )
273
+ for record in stored_artifacts:
274
+ error_message += (
275
+ f"- {', '.join(record.__repr__().split(', ')[:3]) + ', ...)'}\n"
276
+ )
277
+ error_message += "\nSee error message:\n\n"
278
+ error_message += f"{str(exception)}\n\n{traceback.format_exc()}"
279
+ return error_message
280
+
281
+
282
+ def upload_artifact(
283
+ artifact,
284
+ using_key: str | None = None,
285
+ access_token: str | None = None,
286
+ print_progress: bool = True,
287
+ ) -> tuple[UPath, UPath | None]:
288
+ """Store and add file and its linked entries."""
289
+ # can't currently use filepath_from_artifact here because it resolves to ._local_filepath
290
+ storage_key = auto_storage_key_from_artifact(artifact)
291
+ storage_path, storage_settings = attempt_accessing_path(
292
+ artifact, storage_key, using_key=using_key, access_token=access_token
293
+ )
294
+ if hasattr(artifact, "_to_store") and artifact._to_store:
295
+ logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
296
+ store_file_or_folder(
297
+ artifact._local_filepath, storage_path, print_progress=print_progress
298
+ )
299
+
300
+ if isinstance(storage_path, LocalPathClasses):
301
+ cache_path = None
302
+ else:
303
+ cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
304
+ cache_path = storage_settings.cloud_to_local_no_update(
305
+ storage_path, cache_key=cache_key
306
+ )
307
+
308
+ return storage_path, cache_path
lamindb/_storage.py CHANGED
@@ -1,14 +1,14 @@
1
- from lamindb_setup.core._docs import doc_args
2
- from lamindb_setup.core.upath import UPath, create_path
3
- from lnschema_core import Storage
4
-
5
-
6
- @property # type: ignore
7
- @doc_args(Storage.path.__doc__)
8
- def path(self) -> UPath:
9
- """{}""" # noqa: D415
10
- access_token = self._access_token if hasattr(self, "_access_token") else None
11
- return create_path(self.root, access_token=access_token)
12
-
13
-
14
- Storage.path = path
1
+ from lamindb_setup.core._docs import doc_args
2
+ from lamindb_setup.core.upath import UPath, create_path
3
+ from lnschema_core import Storage
4
+
5
+
6
+ @property # type: ignore
7
+ @doc_args(Storage.path.__doc__)
8
+ def path(self) -> UPath:
9
+ """{}""" # noqa: D415
10
+ access_token = self._access_token if hasattr(self, "_access_token") else None
11
+ return create_path(self.root, access_token=access_token)
12
+
13
+
14
+ Storage.path = path