lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_artifact.py CHANGED
@@ -1,1205 +1,1206 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- from pathlib import Path, PurePath, PurePosixPath
6
- from typing import TYPE_CHECKING, Any, Mapping
7
-
8
- import fsspec
9
- import lamindb_setup as ln_setup
10
- import pandas as pd
11
- from anndata import AnnData
12
- from django.db.models import Q, QuerySet
13
- from lamin_utils import colors, logger
14
- from lamindb_setup import settings as setup_settings
15
- from lamindb_setup._init_instance import register_storage_in_instance
16
- from lamindb_setup.core._docs import doc_args
17
- from lamindb_setup.core._settings_storage import init_storage
18
- from lamindb_setup.core.hashing import hash_dir, hash_file
19
- from lamindb_setup.core.upath import (
20
- create_path,
21
- extract_suffix_from_path,
22
- get_stat_dir_cloud,
23
- get_stat_file_cloud,
24
- )
25
- from lnschema_core.models import Artifact, FeatureManager, ParamManager, Run, Storage
26
- from lnschema_core.types import (
27
- VisibilityChoice,
28
- )
29
-
30
- from lamindb._utils import attach_func_to_class_method
31
- from lamindb.core._data import _track_run_input, describe, view_lineage
32
- from lamindb.core._settings import settings
33
- from lamindb.core.exceptions import IntegrityError, InvalidArgument
34
- from lamindb.core.loaders import load_to_memory
35
- from lamindb.core.storage import (
36
- LocalPathClasses,
37
- UPath,
38
- delete_storage,
39
- infer_suffix,
40
- write_to_disk,
41
- )
42
- from lamindb.core.storage.paths import (
43
- auto_storage_key_from_artifact,
44
- auto_storage_key_from_artifact_uid,
45
- check_path_is_child_of_root,
46
- filepath_cache_key_from_artifact,
47
- filepath_from_artifact,
48
- )
49
- from lamindb.core.versioning import (
50
- create_uid,
51
- message_update_key_in_version_family,
52
- )
53
-
54
- from .core._data import (
55
- add_transform_to_kwargs,
56
- get_run,
57
- save_feature_set_links,
58
- save_feature_sets,
59
- )
60
- from .core.storage.objects import _mudata_is_installed
61
- from .core.storage.paths import AUTO_KEY_PREFIX
62
-
63
- try:
64
- from .core.storage._zarr import zarr_is_adata
65
- except ImportError:
66
-
67
- def zarr_is_adata(storepath): # type: ignore
68
- raise ImportError("Please install zarr: pip install zarr")
69
-
70
-
71
- if TYPE_CHECKING:
72
- from lamindb_setup.core.types import UPathStr
73
- from mudata import MuData
74
- from tiledbsoma import Collection as SOMACollection
75
- from tiledbsoma import Experiment as SOMAExperiment
76
-
77
- from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
78
-
79
-
80
- def process_pathlike(
81
- filepath: UPath,
82
- default_storage: Storage,
83
- using_key: str | None,
84
- skip_existence_check: bool = False,
85
- ) -> tuple[Storage, bool]:
86
- if not skip_existence_check:
87
- try: # check if file exists
88
- if not filepath.exists():
89
- raise FileNotFoundError(filepath)
90
- except PermissionError:
91
- pass
92
- if check_path_is_child_of_root(filepath, default_storage.root):
93
- use_existing_storage_key = True
94
- return default_storage, use_existing_storage_key
95
- else:
96
- # check whether the path is part of one of the existing
97
- # already-registered storage locations
98
- result = False
99
- # within the hub, we don't want to perform check_path_in_existing_storage
100
- if using_key is None:
101
- result = check_path_in_existing_storage(filepath, using_key)
102
- if isinstance(result, Storage):
103
- use_existing_storage_key = True
104
- return result, use_existing_storage_key
105
- else:
106
- # if the path is in the cloud, we have a good candidate
107
- # for the storage root: the bucket
108
- if not isinstance(filepath, LocalPathClasses):
109
- # for a cloud path, new_root is always the bucket name
110
- new_root = list(filepath.parents)[-1]
111
- # do not register remote storage locations on hub if the current instance
112
- # is not managed on the hub
113
- storage_settings, _ = init_storage(
114
- new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
115
- )
116
- storage_record = register_storage_in_instance(storage_settings)
117
- use_existing_storage_key = True
118
- return storage_record, use_existing_storage_key
119
- # if the filepath is local
120
- else:
121
- use_existing_storage_key = False
122
- # if the default storage is local we'll throw an error if the user
123
- # doesn't provide a key
124
- if default_storage.type == "local":
125
- return default_storage, use_existing_storage_key
126
- # if the default storage is in the cloud (the file is going to
127
- # be uploaded upon saving it), we treat the filepath as a cache
128
- else:
129
- return default_storage, use_existing_storage_key
130
-
131
-
132
- def process_data(
133
- provisional_uid: str,
134
- data: UPathStr | pd.DataFrame | AnnData,
135
- format: str | None,
136
- key: str | None,
137
- default_storage: Storage,
138
- using_key: str | None,
139
- skip_existence_check: bool = False,
140
- ) -> tuple[Any, Path | UPath, str, Storage, bool]:
141
- """Serialize a data object that's provided as file or in memory."""
142
- # if not overwritten, data gets stored in default storage
143
- if _mudata_is_installed():
144
- from mudata import MuData
145
-
146
- data_types = (pd.DataFrame, AnnData, MuData)
147
- else:
148
- data_types = (pd.DataFrame, AnnData) # type:ignore
149
-
150
- if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
151
- access_token = (
152
- default_storage._access_token
153
- if hasattr(default_storage, "_access_token")
154
- else None
155
- )
156
- path = create_path(data, access_token=access_token).resolve()
157
- storage, use_existing_storage_key = process_pathlike(
158
- path,
159
- default_storage=default_storage,
160
- using_key=using_key,
161
- skip_existence_check=skip_existence_check,
162
- )
163
- suffix = extract_suffix_from_path(path)
164
- memory_rep = None
165
- elif isinstance(data, data_types):
166
- storage = default_storage
167
- memory_rep = data
168
- if key is not None:
169
- key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
170
- # use suffix as the (adata) format if the format is not provided
171
- if isinstance(data, AnnData) and format is None and len(key_suffix) > 0:
172
- format = key_suffix[1:]
173
- else:
174
- key_suffix = None
175
- suffix = infer_suffix(data, format)
176
- if key_suffix is not None and key_suffix != suffix:
177
- raise InvalidArgument(
178
- f"The suffix '{key_suffix}' of the provided key is incorrect, it should"
179
- f" be '{suffix}'."
180
- )
181
- cache_name = f"{provisional_uid}{suffix}"
182
- path = settings.storage.cache_dir / cache_name
183
- # Alex: I don't understand the line below
184
- if path.suffixes == []:
185
- path = path.with_suffix(suffix)
186
- write_to_disk(data, path)
187
- use_existing_storage_key = False
188
- else:
189
- raise NotImplementedError(
190
- f"Do not know how to create a artifact object from {data}, pass a path"
191
- " instead!"
192
- )
193
- return memory_rep, path, suffix, storage, use_existing_storage_key
194
-
195
-
196
- def get_stat_or_artifact(
197
- path: UPath,
198
- key: str | None = None,
199
- check_hash: bool = True,
200
- is_replace: bool = False,
201
- instance: str | None = None,
202
- ) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
203
- n_objects = None
204
- if settings.creation.artifact_skip_size_hash:
205
- return None, None, None, n_objects, None
206
- stat = path.stat() # one network request
207
- if not isinstance(path, LocalPathClasses):
208
- size, hash, hash_type = None, None, None
209
- if stat is not None:
210
- # convert UPathStatResult to fsspec info dict
211
- stat = stat.as_info()
212
- if "ETag" in stat: # is file
213
- size, hash, hash_type = get_stat_file_cloud(stat)
214
- elif stat["type"] == "directory":
215
- size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
216
- if hash is None:
217
- logger.warning(f"did not add hash for {path}")
218
- return size, hash, hash_type, n_objects, None
219
- else:
220
- if path.is_dir():
221
- size, hash, hash_type, n_objects = hash_dir(path)
222
- else:
223
- hash, hash_type = hash_file(path)
224
- size = stat.st_size
225
- if not check_hash:
226
- return size, hash, hash_type, n_objects, None
227
- previous_artifact_version = None
228
- if key is None or is_replace:
229
- result = Artifact.objects.using(instance).filter(hash=hash).all()
230
- artifact_with_same_hash_exists = len(result) > 0
231
- else:
232
- storage_id = settings.storage.id
233
- result = (
234
- Artifact.objects.using(instance)
235
- .filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
236
- .order_by("-created_at")
237
- .all()
238
- )
239
- artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
240
- if not artifact_with_same_hash_exists and len(result) > 0:
241
- logger.important(
242
- f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
243
- )
244
- previous_artifact_version = result[0]
245
- if artifact_with_same_hash_exists:
246
- if settings.creation.artifact_if_hash_exists == "error":
247
- msg = f"artifact with same hash exists: {result[0]}"
248
- hint = (
249
- "💡 you can make this error a warning:\n"
250
- " ln.settings.creation.artifact_if_hash_exists"
251
- )
252
- raise FileExistsError(f"{msg}\n{hint}")
253
- elif settings.creation.artifact_if_hash_exists == "warn_create_new":
254
- logger.warning(
255
- "creating new Artifact object despite existing artifact with same hash:"
256
- f" {result[0]}"
257
- )
258
- return size, hash, hash_type, n_objects, None
259
- else:
260
- if result[0].visibility == -1:
261
- raise FileExistsError(
262
- f"You're trying to re-create this artifact in trash: {result[0]}"
263
- "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
264
- )
265
- logger.important(f"returning existing artifact with same hash: {result[0]}")
266
- return result[0]
267
- else:
268
- return size, hash, hash_type, n_objects, previous_artifact_version
269
-
270
-
271
- def check_path_in_existing_storage(
272
- path: Path | UPath, using_key: str | None = None
273
- ) -> Storage | bool:
274
- for storage in Storage.objects.using(using_key).filter().all():
275
- # if path is part of storage, return it
276
- if check_path_is_child_of_root(path, root=storage.root):
277
- return storage
278
- return False
279
-
280
-
281
- def get_relative_path_to_directory(
282
- path: PurePath | Path | UPath, directory: PurePath | Path | UPath
283
- ) -> PurePath | Path:
284
- if isinstance(directory, UPath) and not isinstance(directory, LocalPathClasses):
285
- # UPath.relative_to() is not behaving as it should (2023-04-07)
286
- # need to lstrip otherwise inconsistent behavior across trailing slashes
287
- # see test_artifact.py: test_get_relative_path_to_directory
288
- relpath = PurePath(
289
- path.as_posix().replace(directory.as_posix(), "").lstrip("/")
290
- )
291
- elif isinstance(directory, Path):
292
- relpath = path.resolve().relative_to(directory.resolve()) # type: ignore
293
- elif isinstance(directory, PurePath):
294
- relpath = path.relative_to(directory)
295
- else:
296
- raise TypeError("Directory not of type Path or UPath")
297
- return relpath
298
-
299
-
300
- def get_artifact_kwargs_from_data(
301
- *,
302
- data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
303
- key: str | None,
304
- run: Run | None,
305
- format: str | None,
306
- provisional_uid: str,
307
- version: str | None,
308
- default_storage: Storage,
309
- using_key: str | None = None,
310
- is_replace: bool = False,
311
- skip_check_exists: bool = False,
312
- ):
313
- run = get_run(run)
314
- memory_rep, path, suffix, storage, use_existing_storage_key = process_data(
315
- provisional_uid,
316
- data,
317
- format,
318
- key,
319
- default_storage,
320
- using_key,
321
- skip_check_exists,
322
- )
323
- stat_or_artifact = get_stat_or_artifact(
324
- path=path,
325
- key=key,
326
- instance=using_key,
327
- is_replace=is_replace,
328
- )
329
- if isinstance(stat_or_artifact, Artifact):
330
- artifact = stat_or_artifact
331
- # update the run of the existing artifact
332
- if run is not None:
333
- # save the information that this artifact was previously
334
- # produced by another run
335
- if artifact.run is not None:
336
- artifact.run._output_artifacts_with_later_updates.add(artifact)
337
- # update the run of the artifact with the latest run
338
- stat_or_artifact.run = run
339
- stat_or_artifact.transform = run.transform
340
- return artifact, None
341
- else:
342
- size, hash, hash_type, n_objects, revises = stat_or_artifact
343
-
344
- if revises is not None: # update provisional_uid
345
- provisional_uid, revises = create_uid(revises=revises, version=version)
346
- if settings.storage.cache_dir in path.parents:
347
- path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
348
-
349
- check_path_in_storage = False
350
- if use_existing_storage_key:
351
- inferred_key = get_relative_path_to_directory(
352
- path=path, directory=UPath(storage.root)
353
- ).as_posix()
354
- if key is None:
355
- key = inferred_key
356
- else:
357
- if not key == inferred_key:
358
- raise InvalidArgument(
359
- f"The path '{data}' is already in registered storage"
360
- f" '{storage.root}' with key '{inferred_key}'\nYou passed"
361
- f" conflicting key '{key}': please move the file before"
362
- " registering it."
363
- )
364
- check_path_in_storage = True
365
- else:
366
- storage = default_storage
367
-
368
- log_storage_hint(
369
- check_path_in_storage=check_path_in_storage,
370
- storage=storage,
371
- key=key,
372
- uid=provisional_uid,
373
- suffix=suffix,
374
- is_dir=n_objects is not None,
375
- )
376
-
377
- # do we use a virtual or an actual storage key?
378
- key_is_virtual = settings.creation._artifact_use_virtual_keys
379
-
380
- # if the file is already in storage, independent of the default
381
- # we use an actual storage key
382
- if check_path_in_storage:
383
- key_is_virtual = False
384
-
385
- kwargs = {
386
- "uid": provisional_uid,
387
- "suffix": suffix,
388
- "hash": hash,
389
- "_hash_type": hash_type,
390
- "key": key,
391
- "size": size,
392
- "storage_id": storage.id,
393
- # passing both the id and the object
394
- # to make them both available immediately
395
- # after object creation
396
- "n_objects": n_objects,
397
- "n_observations": None, # to implement
398
- "run_id": run.id if run is not None else None,
399
- "run": run,
400
- "_key_is_virtual": key_is_virtual,
401
- "revises": revises,
402
- }
403
- if not isinstance(path, LocalPathClasses):
404
- local_filepath = None
405
- cloud_filepath = path
406
- else:
407
- local_filepath = path
408
- cloud_filepath = None
409
- privates = {
410
- "local_filepath": local_filepath,
411
- "cloud_filepath": cloud_filepath,
412
- "memory_rep": memory_rep,
413
- "check_path_in_storage": check_path_in_storage,
414
- }
415
- return kwargs, privates
416
-
417
-
418
- def log_storage_hint(
419
- *,
420
- check_path_in_storage: bool,
421
- storage: Storage | None,
422
- key: str | None,
423
- uid: str,
424
- suffix: str,
425
- is_dir: bool,
426
- ) -> None:
427
- hint = ""
428
- if check_path_in_storage:
429
- display_root = storage.root # type: ignore
430
- # check whether path is local
431
- if fsspec.utils.get_protocol(storage.root) == "file": # type: ignore
432
- # if it's a local path, check whether it's in the current working directory
433
- root_path = Path(storage.root) # type: ignore
434
- if check_path_is_child_of_root(root_path, Path.cwd()):
435
- # only display the relative path, not the fully resolved path
436
- display_root = root_path.relative_to(Path.cwd())
437
- hint += f"path in storage '{display_root}'" # type: ignore
438
- else:
439
- hint += "path content will be copied to default storage upon `save()`"
440
- if key is None:
441
- storage_key = auto_storage_key_from_artifact_uid(uid, suffix, is_dir)
442
- hint += f" with key `None` ('{storage_key}')"
443
- else:
444
- hint += f" with key '{key}'"
445
- logger.hint(hint)
446
-
447
-
448
- def data_is_anndata(data: AnnData | UPathStr) -> bool:
449
- if isinstance(data, AnnData):
450
- return True
451
- if isinstance(data, (str, Path, UPath)):
452
- data_path = UPath(data)
453
- if data_path.suffix == ".h5ad":
454
- return True
455
- elif data_path.suffix == ".zarr":
456
- # ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
457
- if ".anndata" in data_path.suffixes:
458
- return True
459
- # check only for local, expensive for cloud
460
- if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
461
- return zarr_is_adata(data_path)
462
- else:
463
- logger.warning("We do not check if cloud zarr is AnnData or not.")
464
- return False
465
- return False
466
-
467
-
468
- def data_is_mudata(data: MuData | UPathStr) -> bool:
469
- if _mudata_is_installed():
470
- from mudata import MuData
471
-
472
- if isinstance(data, MuData):
473
- return True
474
- if isinstance(data, (str, Path)):
475
- return UPath(data).suffix in {".h5mu"}
476
- return False
477
-
478
-
479
- def _check_accessor_artifact(data: Any, accessor: str | None = None):
480
- if accessor is None:
481
- if isinstance(data, pd.DataFrame):
482
- logger.warning("data is a DataFrame, please use .from_df()")
483
- accessor = "DataFrame"
484
- return accessor
485
-
486
- data_is_path = isinstance(data, (str, Path))
487
- if data_is_anndata(data):
488
- if not data_is_path:
489
- logger.warning("data is an AnnData, please use .from_anndata()")
490
- accessor = "AnnData"
491
- elif data_is_mudata(data):
492
- if not data_is_path:
493
- logger.warning("data is a MuData, please use .from_mudata()")
494
- accessor = "MuData"
495
- elif not data_is_path: # UPath is a subclass of Path
496
- raise TypeError("data has to be a string, Path, UPath")
497
- return accessor
498
-
499
-
500
- def __init__(artifact: Artifact, *args, **kwargs):
501
- artifact.features = FeatureManager(artifact)
502
- artifact.params = ParamManager(artifact)
503
- # Below checks for the Django-internal call in from_db()
504
- # it'd be better if we could avoid this, but not being able to create a Artifact
505
- # from data with the default constructor renders the central class of the API
506
- # essentially useless
507
- # The danger below is not that a user might pass as many args (12 of it), but rather
508
- # that at some point the Django API might change; on the other hand, this
509
- # condition of for calling the constructor based on kwargs should always
510
- # stay robust
511
- if len(args) == len(artifact._meta.concrete_fields):
512
- super(Artifact, artifact).__init__(*args, **kwargs)
513
- return None
514
- # now we proceed with the user-facing constructor
515
- if len(args) > 1:
516
- raise ValueError("Only one non-keyword arg allowed: data")
517
-
518
- data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
519
- type: str = kwargs.pop("type") if "type" in kwargs else None
520
- key: str | None = kwargs.pop("key") if "key" in kwargs else None
521
- run: Run | None = kwargs.pop("run") if "run" in kwargs else None
522
- description: str | None = (
523
- kwargs.pop("description") if "description" in kwargs else None
524
- )
525
- revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
526
- version: str | None = kwargs.pop("version") if "version" in kwargs else None
527
- visibility: int | None = (
528
- kwargs.pop("visibility")
529
- if "visibility" in kwargs
530
- else VisibilityChoice.default.value
531
- )
532
- format = kwargs.pop("format") if "format" in kwargs else None
533
- _is_internal_call = kwargs.pop("_is_internal_call", False)
534
- skip_check_exists = (
535
- kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
536
- )
537
- if "default_storage" in kwargs:
538
- default_storage = kwargs.pop("default_storage")
539
- else:
540
- if setup_settings.instance.keep_artifacts_local:
541
- default_storage = setup_settings.instance.storage_local.record
542
- else:
543
- default_storage = setup_settings.instance.storage.record
544
- using_key = (
545
- kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
546
- )
547
- accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
548
- accessor = _check_accessor_artifact(data=data, accessor=accessor)
549
- if "is_new_version_of" in kwargs:
550
- logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
551
- revises = kwargs.pop("is_new_version_of")
552
- if not len(kwargs) == 0:
553
- raise ValueError(
554
- "Only data, key, run, description, version, revises, visibility"
555
- f" can be passed, you passed: {kwargs}"
556
- )
557
- if revises is not None and key is not None and revises.key != key:
558
- note = message_update_key_in_version_family(
559
- suid=revises.stem_uid,
560
- existing_key=revises.key,
561
- new_key=key,
562
- registry="Artifact",
563
- )
564
- raise ValueError(
565
- f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
566
- )
567
- if revises is not None:
568
- if not isinstance(revises, Artifact):
569
- raise TypeError("`revises` has to be of type `Artifact`")
570
- if description is None:
571
- description = revises.description
572
- if key is not None and AUTO_KEY_PREFIX in key:
573
- raise ValueError(
574
- f"Do not pass key that contains a managed storage path in `{AUTO_KEY_PREFIX}`"
575
- )
576
- # below is for internal calls that require defining the storage location
577
- # ahead of constructing the Artifact
578
- if isinstance(data, (str, Path)) and AUTO_KEY_PREFIX in str(data):
579
- if _is_internal_call:
580
- is_automanaged_path = True
581
- user_provided_key = key
582
- key = None
583
- else:
584
- raise ValueError(
585
- f"Do not pass path inside the `{AUTO_KEY_PREFIX}` directory."
586
- )
587
- else:
588
- is_automanaged_path = False
589
- provisional_uid, revises = create_uid(revises=revises, version=version)
590
- kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
591
- data=data,
592
- key=key,
593
- run=run,
594
- format=format,
595
- provisional_uid=provisional_uid,
596
- version=version,
597
- default_storage=default_storage,
598
- using_key=using_key,
599
- skip_check_exists=skip_check_exists,
600
- )
601
-
602
- # an object with the same hash already exists
603
- if isinstance(kwargs_or_artifact, Artifact):
604
- from ._record import init_self_from_db, update_attributes
605
-
606
- init_self_from_db(artifact, kwargs_or_artifact)
607
- # adding "key" here is dangerous because key might be auto-populated
608
- update_attributes(artifact, {"description": description})
609
- if artifact.key != key and key is not None:
610
- logger.warning(
611
- f"key {artifact.key} on existing artifact differs from passed key {key}"
612
- )
613
- return None
614
- else:
615
- kwargs = kwargs_or_artifact
616
-
617
- if revises is None:
618
- revises = kwargs_or_artifact.pop("revises")
619
-
620
- if data is not None:
621
- artifact._local_filepath = privates["local_filepath"]
622
- artifact._cloud_filepath = privates["cloud_filepath"]
623
- artifact._memory_rep = privates["memory_rep"]
624
- artifact._to_store = not privates["check_path_in_storage"]
625
-
626
- if is_automanaged_path and _is_internal_call:
627
- kwargs["_key_is_virtual"] = True
628
- assert AUTO_KEY_PREFIX in kwargs["key"] # noqa: S101
629
- uid = kwargs["key"].replace(AUTO_KEY_PREFIX, "").replace(kwargs["suffix"], "")
630
- kwargs["key"] = user_provided_key
631
- if revises is not None:
632
- assert uid.startswith(revises.stem_uid) # noqa: S101
633
- if len(uid) == 16:
634
- if revises is None:
635
- uid += "0000"
636
- else:
637
- uid, revises = create_uid(revises=revises, version=version)
638
- kwargs["uid"] = uid
639
-
640
- # only set key now so that we don't do a look-up on it in case revises is passed
641
- if revises is not None:
642
- kwargs["key"] = revises.key
643
-
644
- kwargs["type"] = type
645
- kwargs["version"] = version
646
- kwargs["description"] = description
647
- kwargs["visibility"] = visibility
648
- kwargs["_accessor"] = accessor
649
- kwargs["revises"] = revises
650
- # this check needs to come down here because key might be populated from an
651
- # existing file path during get_artifact_kwargs_from_data()
652
- if (
653
- kwargs["key"] is None
654
- and kwargs["description"] is None
655
- and kwargs["run"] is None
656
- ):
657
- raise ValueError("Pass one of key, run or description as a parameter")
658
-
659
- add_transform_to_kwargs(kwargs, kwargs["run"])
660
-
661
- super(Artifact, artifact).__init__(**kwargs)
662
-
663
-
664
- @classmethod # type: ignore
665
- @doc_args(Artifact.from_df.__doc__)
666
- def from_df(
667
- cls,
668
- df: pd.DataFrame,
669
- key: str | None = None,
670
- description: str | None = None,
671
- run: Run | None = None,
672
- revises: Artifact | None = None,
673
- **kwargs,
674
- ) -> Artifact:
675
- """{}""" # noqa: D415
676
- artifact = Artifact(
677
- data=df,
678
- key=key,
679
- run=run,
680
- description=description,
681
- revises=revises,
682
- _accessor="DataFrame",
683
- type="dataset",
684
- **kwargs,
685
- )
686
- return artifact
687
-
688
-
689
- @classmethod # type: ignore
690
- @doc_args(Artifact.from_anndata.__doc__)
691
- def from_anndata(
692
- cls,
693
- adata: AnnData | UPathStr,
694
- key: str | None = None,
695
- description: str | None = None,
696
- run: Run | None = None,
697
- revises: Artifact | None = None,
698
- **kwargs,
699
- ) -> Artifact:
700
- """{}""" # noqa: D415
701
- if not data_is_anndata(adata):
702
- raise ValueError("data has to be an AnnData object or a path to AnnData-like")
703
- artifact = Artifact(
704
- data=adata,
705
- key=key,
706
- run=run,
707
- description=description,
708
- revises=revises,
709
- _accessor="AnnData",
710
- type="dataset",
711
- **kwargs,
712
- )
713
- return artifact
714
-
715
-
716
- @classmethod # type: ignore
717
- @doc_args(Artifact.from_mudata.__doc__)
718
- def from_mudata(
719
- cls,
720
- mdata: MuData,
721
- key: str | None = None,
722
- description: str | None = None,
723
- run: Run | None = None,
724
- revises: Artifact | None = None,
725
- **kwargs,
726
- ) -> Artifact:
727
- """{}""" # noqa: D415
728
- artifact = Artifact(
729
- data=mdata,
730
- key=key,
731
- run=run,
732
- description=description,
733
- revises=revises,
734
- _accessor="MuData",
735
- type="dataset",
736
- **kwargs,
737
- )
738
- return artifact
739
-
740
-
741
- @classmethod # type: ignore
742
- @doc_args(Artifact.from_dir.__doc__)
743
- def from_dir(
744
- cls,
745
- path: UPathStr,
746
- key: str | None = None,
747
- *,
748
- run: Run | None = None,
749
- ) -> list[Artifact]:
750
- """{}""" # noqa: D415
751
- logger.warning(
752
- "this creates one artifact per file in the directory - consider"
753
- " ln.Artifact(dir_path) to get one artifact for the entire directory"
754
- )
755
- folderpath: UPath = create_path(path) # returns Path for local
756
- default_storage = settings.storage.record
757
- using_key = settings._using_key
758
- storage, use_existing_storage = process_pathlike(
759
- folderpath, default_storage, using_key
760
- )
761
- folder_key_path: PurePath | Path
762
- if key is None:
763
- if not use_existing_storage:
764
- logger.warning(
765
- "folder is outside existing storage location, will copy files from"
766
- f" {path} to {storage.root}/{folderpath.name}"
767
- )
768
- folder_key_path = Path(folderpath.name)
769
- else:
770
- # maintain the hierachy within an existing storage location
771
- folder_key_path = get_relative_path_to_directory(
772
- folderpath, UPath(storage.root)
773
- )
774
- else:
775
- folder_key_path = Path(key)
776
-
777
- # always sanitize by stripping a trailing slash
778
- folder_key = folder_key_path.as_posix().rstrip("/")
779
-
780
- # TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
781
- pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
782
-
783
- # silence fine-grained logging
784
- verbosity = settings.verbosity
785
- verbosity_int = settings._verbosity_int
786
- if verbosity_int >= 1:
787
- settings.verbosity = "warning"
788
- artifacts_dict = {}
789
- for filepath in folderpath.rglob(pattern):
790
- if filepath.is_file():
791
- relative_path = get_relative_path_to_directory(filepath, folderpath)
792
- artifact_key = folder_key + "/" + relative_path.as_posix()
793
- # if creating from rglob, we don't need to check for existence
794
- artifact = Artifact(
795
- filepath, run=run, key=artifact_key, skip_check_exists=True
796
- )
797
- artifacts_dict[artifact.uid] = artifact
798
- settings.verbosity = verbosity
799
-
800
- # run sanity check on hashes
801
- hashes = [
802
- artifact.hash
803
- for artifact in artifacts_dict.values()
804
- if artifact.hash is not None
805
- ]
806
- uids = artifacts_dict.keys()
807
- if len(set(hashes)) == len(hashes):
808
- artifacts = list(artifacts_dict.values())
809
- else:
810
- # consider exact duplicates (same id, same hash)
811
- # below can't happen anymore because artifacts is a dict now
812
- # if len(set(uids)) == len(set(hashes)):
813
- # logger.warning("dropping duplicate records in list of artifact records")
814
- # artifacts = list(set(uids))
815
- # consider false duplicates (different id, same hash)
816
- if not len(set(uids)) == len(set(hashes)):
817
- seen_hashes = set()
818
- non_unique_artifacts = {
819
- hash: artifact
820
- for hash, artifact in artifacts_dict.items()
821
- if artifact.hash in seen_hashes or seen_hashes.add(artifact.hash) # type: ignore
822
- }
823
- display_non_unique = "\n ".join(
824
- f"{artifact}" for artifact in non_unique_artifacts
825
- )
826
- logger.warning(
827
- "there are multiple artifact uids with the same hashes, dropping"
828
- f" {len(non_unique_artifacts)} duplicates out of"
829
- f" {len(artifacts_dict)} artifacts:\n {display_non_unique}"
830
- )
831
- artifacts = [
832
- artifact
833
- for artifact in artifacts_dict.values()
834
- if artifact not in non_unique_artifacts.values()
835
- ]
836
- logger.success(
837
- f"created {len(artifacts)} artifacts from directory using storage"
838
- f" {storage.root} and key = {folder_key}/"
839
- )
840
- return artifacts
841
-
842
-
843
- # docstring handled through attach_func_to_class_method
844
- def replace(
845
- self,
846
- data: UPathStr,
847
- run: Run | None = None,
848
- format: str | None = None,
849
- ) -> None:
850
- default_storage = settings.storage.record
851
- kwargs, privates = get_artifact_kwargs_from_data(
852
- provisional_uid=self.uid,
853
- data=data,
854
- key=self.key,
855
- run=run,
856
- format=format,
857
- default_storage=default_storage,
858
- version=None,
859
- is_replace=True,
860
- )
861
-
862
- # this artifact already exists
863
- if privates is None:
864
- return kwargs
865
-
866
- check_path_in_storage = privates["check_path_in_storage"]
867
- if check_path_in_storage:
868
- raise ValueError("Can only replace with a local file not in any Storage.")
869
-
870
- if self.key is not None and not self._key_is_virtual:
871
- key_path = PurePosixPath(self.key)
872
- new_filename = f"{key_path.stem}{kwargs['suffix']}"
873
- # the following will only be true if the suffix changes!
874
- if key_path.name != new_filename:
875
- self._clear_storagekey = self.key
876
- self.key = str(key_path.with_name(new_filename))
877
- logger.warning(
878
- f"replacing the file will replace key '{key_path}' with '{self.key}'"
879
- f" and delete '{key_path}' upon `save()`"
880
- )
881
- else:
882
- old_storage = auto_storage_key_from_artifact(self)
883
- is_dir = self.n_objects is not None
884
- new_storage = auto_storage_key_from_artifact_uid(
885
- self.uid, kwargs["suffix"], is_dir
886
- )
887
- if old_storage != new_storage:
888
- self._clear_storagekey = old_storage
889
- if self.key is not None:
890
- new_key_path = PurePosixPath(self.key).with_suffix(kwargs["suffix"])
891
- self.key = str(new_key_path)
892
-
893
- self.suffix = kwargs["suffix"]
894
- self.size = kwargs["size"]
895
- self.hash = kwargs["hash"]
896
- self._hash_type = kwargs["_hash_type"]
897
- self.run_id = kwargs["run_id"]
898
- self.run = kwargs["run"]
899
-
900
- self._local_filepath = privates["local_filepath"]
901
- self._cloud_filepath = privates["cloud_filepath"]
902
- self._memory_rep = privates["memory_rep"]
903
- # no need to upload if new file is already in storage
904
- self._to_store = not check_path_in_storage
905
-
906
-
907
- # docstring handled through attach_func_to_class_method
908
- def open(
909
- self, mode: str = "r", is_run_input: bool | None = None
910
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
911
- # ignore empty suffix for now
912
- suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
913
- if self.suffix not in suffixes:
914
- raise ValueError(
915
- "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
916
- " use one of the following suffixes for the object name:"
917
- f" {', '.join(suffixes[:-1])}."
918
- )
919
- if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
920
- raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
921
-
922
- from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
923
-
924
- using_key = settings._using_key
925
- filepath, cache_key = filepath_cache_key_from_artifact(self, using_key=using_key)
926
- is_tiledbsoma_w = (
927
- filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
928
- ) and mode == "w"
929
- # consider the case where an object is already locally cached
930
- localpath = setup_settings.instance.storage.cloud_to_local_no_update(
931
- filepath, cache_key=cache_key
932
- )
933
- if not is_tiledbsoma_w and localpath.exists():
934
- access = backed_access(localpath, mode, using_key)
935
- else:
936
- access = backed_access(filepath, mode, using_key)
937
- if is_tiledbsoma_w:
938
-
939
- def finalize():
940
- nonlocal self, filepath, localpath
941
- if not isinstance(filepath, LocalPathClasses):
942
- _, hash, _, _ = get_stat_dir_cloud(filepath)
943
- else:
944
- # this can be very slow
945
- _, hash, _, _ = hash_dir(filepath)
946
- if self.hash != hash:
947
- from ._record import init_self_from_db
948
-
949
- new_version = Artifact(
950
- filepath, revises=self, _is_internal_call=True
951
- ).save()
952
- init_self_from_db(self, new_version)
953
-
954
- if localpath != filepath and localpath.exists():
955
- shutil.rmtree(localpath)
956
-
957
- access = _track_writes_factory(access, finalize)
958
- # only call if open is successfull
959
- _track_run_input(self, is_run_input)
960
- return access
961
-
962
-
963
- # can't really just call .cache in .load because of double tracking
964
- def _synchronize_cleanup_on_error(
965
- filepath: UPath, cache_key: str | None = None
966
- ) -> UPath:
967
- try:
968
- cache_path = setup_settings.instance.storage.cloud_to_local(
969
- filepath, cache_key=cache_key, print_progress=True
970
- )
971
- except Exception as e:
972
- if not isinstance(filepath, LocalPathClasses):
973
- cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
974
- filepath, cache_key=cache_key
975
- )
976
- if cache_path.is_file():
977
- cache_path.unlink(missing_ok=True)
978
- elif cache_path.is_dir():
979
- shutil.rmtree(cache_path)
980
- raise e
981
- return cache_path
982
-
983
-
984
- # docstring handled through attach_func_to_class_method
985
- def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
986
- if hasattr(self, "_memory_rep") and self._memory_rep is not None:
987
- access_memory = self._memory_rep
988
- else:
989
- filepath, cache_key = filepath_cache_key_from_artifact(
990
- self, using_key=settings._using_key
991
- )
992
- cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
993
- # cache_path is local so doesn't trigger any sync in load_to_memory
994
- access_memory = load_to_memory(cache_path, **kwargs)
995
- # only call if load is successfull
996
- _track_run_input(self, is_run_input)
997
- return access_memory
998
-
999
-
1000
- # docstring handled through attach_func_to_class_method
1001
- def cache(self, is_run_input: bool | None = None) -> Path:
1002
- filepath, cache_key = filepath_cache_key_from_artifact(
1003
- self, using_key=settings._using_key
1004
- )
1005
- cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
1006
- # only call if sync is successfull
1007
- _track_run_input(self, is_run_input)
1008
- return cache_path
1009
-
1010
-
1011
- # docstring handled through attach_func_to_class_method
1012
- def delete(
1013
- self,
1014
- permanent: bool | None = None,
1015
- storage: bool | None = None,
1016
- using_key: str | None = None,
1017
- ) -> None:
1018
- # this first check means an invalid delete fails fast rather than cascading through
1019
- # database and storage permission errors
1020
- if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
1021
- isettings = setup_settings.instance
1022
- if self.storage.instance_uid != isettings.uid and (storage or storage is None):
1023
- raise IntegrityError(
1024
- "Cannot simply delete artifacts outside of this instance's managed storage locations."
1025
- "\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
1026
- f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
1027
- f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
1028
- )
1029
- # by default, we only move artifacts into the trash (visibility = -1)
1030
- trash_visibility = VisibilityChoice.trash.value
1031
- if self.visibility > trash_visibility and not permanent:
1032
- if storage is not None:
1033
- logger.warning("moving artifact to trash, storage arg is ignored")
1034
- # move to trash
1035
- self.visibility = trash_visibility
1036
- self.save()
1037
- logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
1038
- return
1039
-
1040
- # if the artifact is already in the trash
1041
- # permanent delete skips the trash
1042
- if permanent is None:
1043
- # ask for confirmation of permanent delete
1044
- response = input(
1045
- "Artifact record is already in trash! Are you sure you want to permanently"
1046
- " delete it? (y/n) You can't undo this action."
1047
- )
1048
- delete_record = response == "y"
1049
- else:
1050
- assert permanent # noqa: S101
1051
- delete_record = True
1052
-
1053
- if delete_record:
1054
- # need to grab file path before deletion
1055
- try:
1056
- path, _ = filepath_from_artifact(self, using_key)
1057
- except OSError:
1058
- # we can still delete the record
1059
- logger.warning("Could not get path")
1060
- storage = False
1061
- # only delete in storage if DB delete is successful
1062
- # DB delete might error because of a foreign key constraint violated etc.
1063
- self._delete_skip_storage()
1064
- if self.key is None or self._key_is_virtual:
1065
- # do not ask for confirmation also if storage is None
1066
- delete_in_storage = storage is None or storage
1067
- else:
1068
- # for artifacts with non-virtual semantic storage keys (key is not None)
1069
- # ask for extra-confirmation
1070
- if storage is None:
1071
- response = input(
1072
- f"Are you sure to want to delete {path}? (y/n) You can't undo"
1073
- " this action."
1074
- )
1075
- delete_in_storage = response == "y"
1076
- else:
1077
- delete_in_storage = storage
1078
- if not delete_in_storage:
1079
- logger.important(f"a file/folder remains here: {path}")
1080
- # we don't yet have logic to bring back the deleted metadata record
1081
- # in case storage deletion fails - this is important for ACID down the road
1082
- if delete_in_storage:
1083
- delete_msg = delete_storage(path, raise_file_not_found_error=False)
1084
- if delete_msg != "did-not-delete":
1085
- logger.success(f"deleted {colors.yellow(f'{path}')}")
1086
-
1087
-
1088
- def _delete_skip_storage(artifact, *args, **kwargs) -> None:
1089
- super(Artifact, artifact).delete(*args, **kwargs)
1090
-
1091
-
1092
- # docstring handled through attach_func_to_class_method
1093
- def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1094
- state_was_adding = self._state.adding
1095
- print_progress = kwargs.pop("print_progress", True)
1096
- access_token = kwargs.pop("access_token", None)
1097
- local_path = None
1098
- if upload and setup_settings.instance.keep_artifacts_local:
1099
- # switch local storage location to cloud
1100
- local_path = self.path
1101
- self.storage_id = setup_settings.instance.storage.id
1102
- self._local_filepath = local_path
1103
- # switch to virtual storage key upon upload
1104
- # the local filepath is already cached at that point
1105
- self._key_is_virtual = True
1106
- # ensure that the artifact is uploaded
1107
- self._to_store = True
1108
-
1109
- self._save_skip_storage(**kwargs)
1110
-
1111
- from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
1112
-
1113
- using_key = None
1114
- if "using" in kwargs:
1115
- using_key = kwargs["using"]
1116
- exception = check_and_attempt_upload(
1117
- self, using_key, access_token=access_token, print_progress=print_progress
1118
- )
1119
- if exception is not None:
1120
- self._delete_skip_storage()
1121
- raise RuntimeError(exception)
1122
- exception = check_and_attempt_clearing(self, using_key)
1123
- if exception is not None:
1124
- raise RuntimeError(exception)
1125
- if local_path is not None and not state_was_adding:
1126
- # only move the local artifact to cache if it was not newly created
1127
- local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
1128
- # don't use Path.rename here because of cross-device link error
1129
- # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
1130
- shutil.move(
1131
- local_path, # type: ignore
1132
- local_path_cache,
1133
- )
1134
- logger.important(f"moved local artifact to cache: {local_path_cache}")
1135
- return self
1136
-
1137
-
1138
- def _save_skip_storage(file, **kwargs) -> None:
1139
- save_feature_sets(file)
1140
- super(Artifact, file).save(**kwargs)
1141
- save_feature_set_links(file)
1142
-
1143
-
1144
- @property # type: ignore
1145
- @doc_args(Artifact.path.__doc__)
1146
- def path(self) -> Path | UPath:
1147
- """{}""" # noqa: D415
1148
- # return only the path, without StorageSettings
1149
- filepath, _ = filepath_from_artifact(self, using_key=settings._using_key)
1150
- return filepath
1151
-
1152
-
1153
- # get cache path without triggering sync
1154
- @property # type: ignore
1155
- def _cache_path(self) -> UPath:
1156
- filepath, cache_key = filepath_cache_key_from_artifact(
1157
- self, using_key=settings._using_key
1158
- )
1159
- if isinstance(filepath, LocalPathClasses):
1160
- return filepath
1161
- return setup_settings.instance.storage.cloud_to_local_no_update(
1162
- filepath, cache_key=cache_key
1163
- )
1164
-
1165
-
1166
- # docstring handled through attach_func_to_class_method
1167
- def restore(self) -> None:
1168
- self.visibility = VisibilityChoice.default.value
1169
- self.save()
1170
-
1171
-
1172
- METHOD_NAMES = [
1173
- "__init__",
1174
- "from_anndata",
1175
- "from_df",
1176
- "from_mudata",
1177
- "open",
1178
- "cache",
1179
- "load",
1180
- "delete",
1181
- "save",
1182
- "replace",
1183
- "from_dir",
1184
- "restore",
1185
- ]
1186
-
1187
- if ln_setup._TESTING:
1188
- from inspect import signature
1189
-
1190
- SIGS = {
1191
- name: signature(getattr(Artifact, name))
1192
- for name in METHOD_NAMES
1193
- if name != "__init__"
1194
- }
1195
-
1196
- for name in METHOD_NAMES:
1197
- attach_func_to_class_method(name, Artifact, globals())
1198
-
1199
- # privates currently dealt with separately
1200
- Artifact._delete_skip_storage = _delete_skip_storage
1201
- Artifact._save_skip_storage = _save_skip_storage
1202
- Artifact._cache_path = _cache_path
1203
- Artifact.path = path
1204
- Artifact.describe = describe
1205
- Artifact.view_lineage = view_lineage
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from collections.abc import Mapping
6
+ from pathlib import Path, PurePath, PurePosixPath
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import fsspec
10
+ import lamindb_setup as ln_setup
11
+ import pandas as pd
12
+ from anndata import AnnData
13
+ from django.db.models import Q, QuerySet
14
+ from lamin_utils import colors, logger
15
+ from lamindb_setup import settings as setup_settings
16
+ from lamindb_setup._init_instance import register_storage_in_instance
17
+ from lamindb_setup.core._docs import doc_args
18
+ from lamindb_setup.core._settings_storage import init_storage
19
+ from lamindb_setup.core.hashing import hash_dir, hash_file
20
+ from lamindb_setup.core.upath import (
21
+ create_path,
22
+ extract_suffix_from_path,
23
+ get_stat_dir_cloud,
24
+ get_stat_file_cloud,
25
+ )
26
+ from lnschema_core.models import Artifact, FeatureManager, ParamManager, Run, Storage
27
+ from lnschema_core.types import (
28
+ VisibilityChoice,
29
+ )
30
+
31
+ from lamindb._utils import attach_func_to_class_method
32
+ from lamindb.core._data import _track_run_input, describe, view_lineage
33
+ from lamindb.core._settings import settings
34
+ from lamindb.core.exceptions import IntegrityError, InvalidArgument
35
+ from lamindb.core.loaders import load_to_memory
36
+ from lamindb.core.storage import (
37
+ LocalPathClasses,
38
+ UPath,
39
+ delete_storage,
40
+ infer_suffix,
41
+ write_to_disk,
42
+ )
43
+ from lamindb.core.storage.paths import (
44
+ auto_storage_key_from_artifact,
45
+ auto_storage_key_from_artifact_uid,
46
+ check_path_is_child_of_root,
47
+ filepath_cache_key_from_artifact,
48
+ filepath_from_artifact,
49
+ )
50
+ from lamindb.core.versioning import (
51
+ create_uid,
52
+ message_update_key_in_version_family,
53
+ )
54
+
55
+ from .core._data import (
56
+ add_transform_to_kwargs,
57
+ get_run,
58
+ save_feature_set_links,
59
+ save_feature_sets,
60
+ )
61
+ from .core.storage.objects import _mudata_is_installed
62
+ from .core.storage.paths import AUTO_KEY_PREFIX
63
+
64
+ try:
65
+ from .core.storage._zarr import zarr_is_adata
66
+ except ImportError:
67
+
68
+ def zarr_is_adata(storepath): # type: ignore
69
+ raise ImportError("Please install zarr: pip install zarr")
70
+
71
+
72
+ if TYPE_CHECKING:
73
+ from lamindb_setup.core.types import UPathStr
74
+ from mudata import MuData
75
+ from tiledbsoma import Collection as SOMACollection
76
+ from tiledbsoma import Experiment as SOMAExperiment
77
+
78
+ from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
79
+
80
+
81
+ def process_pathlike(
82
+ filepath: UPath,
83
+ default_storage: Storage,
84
+ using_key: str | None,
85
+ skip_existence_check: bool = False,
86
+ ) -> tuple[Storage, bool]:
87
+ if not skip_existence_check:
88
+ try: # check if file exists
89
+ if not filepath.exists():
90
+ raise FileNotFoundError(filepath)
91
+ except PermissionError:
92
+ pass
93
+ if check_path_is_child_of_root(filepath, default_storage.root):
94
+ use_existing_storage_key = True
95
+ return default_storage, use_existing_storage_key
96
+ else:
97
+ # check whether the path is part of one of the existing
98
+ # already-registered storage locations
99
+ result = False
100
+ # within the hub, we don't want to perform check_path_in_existing_storage
101
+ if using_key is None:
102
+ result = check_path_in_existing_storage(filepath, using_key)
103
+ if isinstance(result, Storage):
104
+ use_existing_storage_key = True
105
+ return result, use_existing_storage_key
106
+ else:
107
+ # if the path is in the cloud, we have a good candidate
108
+ # for the storage root: the bucket
109
+ if not isinstance(filepath, LocalPathClasses):
110
+ # for a cloud path, new_root is always the bucket name
111
+ new_root = list(filepath.parents)[-1]
112
+ # do not register remote storage locations on hub if the current instance
113
+ # is not managed on the hub
114
+ storage_settings, _ = init_storage(
115
+ new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
116
+ )
117
+ storage_record = register_storage_in_instance(storage_settings)
118
+ use_existing_storage_key = True
119
+ return storage_record, use_existing_storage_key
120
+ # if the filepath is local
121
+ else:
122
+ use_existing_storage_key = False
123
+ # if the default storage is local we'll throw an error if the user
124
+ # doesn't provide a key
125
+ if default_storage.type == "local":
126
+ return default_storage, use_existing_storage_key
127
+ # if the default storage is in the cloud (the file is going to
128
+ # be uploaded upon saving it), we treat the filepath as a cache
129
+ else:
130
+ return default_storage, use_existing_storage_key
131
+
132
+
133
+ def process_data(
134
+ provisional_uid: str,
135
+ data: UPathStr | pd.DataFrame | AnnData,
136
+ format: str | None,
137
+ key: str | None,
138
+ default_storage: Storage,
139
+ using_key: str | None,
140
+ skip_existence_check: bool = False,
141
+ ) -> tuple[Any, Path | UPath, str, Storage, bool]:
142
+ """Serialize a data object that's provided as file or in memory."""
143
+ # if not overwritten, data gets stored in default storage
144
+ if _mudata_is_installed():
145
+ from mudata import MuData
146
+
147
+ data_types = (pd.DataFrame, AnnData, MuData)
148
+ else:
149
+ data_types = (pd.DataFrame, AnnData) # type:ignore
150
+
151
+ if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
152
+ access_token = (
153
+ default_storage._access_token
154
+ if hasattr(default_storage, "_access_token")
155
+ else None
156
+ )
157
+ path = create_path(data, access_token=access_token).resolve()
158
+ storage, use_existing_storage_key = process_pathlike(
159
+ path,
160
+ default_storage=default_storage,
161
+ using_key=using_key,
162
+ skip_existence_check=skip_existence_check,
163
+ )
164
+ suffix = extract_suffix_from_path(path)
165
+ memory_rep = None
166
+ elif isinstance(data, data_types):
167
+ storage = default_storage
168
+ memory_rep = data
169
+ if key is not None:
170
+ key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
171
+ # use suffix as the (adata) format if the format is not provided
172
+ if isinstance(data, AnnData) and format is None and len(key_suffix) > 0:
173
+ format = key_suffix[1:]
174
+ else:
175
+ key_suffix = None
176
+ suffix = infer_suffix(data, format)
177
+ if key_suffix is not None and key_suffix != suffix:
178
+ raise InvalidArgument(
179
+ f"The suffix '{key_suffix}' of the provided key is incorrect, it should"
180
+ f" be '{suffix}'."
181
+ )
182
+ cache_name = f"{provisional_uid}{suffix}"
183
+ path = settings.storage.cache_dir / cache_name
184
+ # Alex: I don't understand the line below
185
+ if path.suffixes == []:
186
+ path = path.with_suffix(suffix)
187
+ write_to_disk(data, path)
188
+ use_existing_storage_key = False
189
+ else:
190
+ raise NotImplementedError(
191
+ f"Do not know how to create a artifact object from {data}, pass a path"
192
+ " instead!"
193
+ )
194
+ return memory_rep, path, suffix, storage, use_existing_storage_key
195
+
196
+
197
+ def get_stat_or_artifact(
198
+ path: UPath,
199
+ key: str | None = None,
200
+ check_hash: bool = True,
201
+ is_replace: bool = False,
202
+ instance: str | None = None,
203
+ ) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
204
+ n_objects = None
205
+ if settings.creation.artifact_skip_size_hash:
206
+ return None, None, None, n_objects, None
207
+ stat = path.stat() # one network request
208
+ if not isinstance(path, LocalPathClasses):
209
+ size, hash, hash_type = None, None, None
210
+ if stat is not None:
211
+ # convert UPathStatResult to fsspec info dict
212
+ stat = stat.as_info()
213
+ if "ETag" in stat: # is file
214
+ size, hash, hash_type = get_stat_file_cloud(stat)
215
+ elif stat["type"] == "directory":
216
+ size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
217
+ if hash is None:
218
+ logger.warning(f"did not add hash for {path}")
219
+ return size, hash, hash_type, n_objects, None
220
+ else:
221
+ if path.is_dir():
222
+ size, hash, hash_type, n_objects = hash_dir(path)
223
+ else:
224
+ hash, hash_type = hash_file(path)
225
+ size = stat.st_size
226
+ if not check_hash:
227
+ return size, hash, hash_type, n_objects, None
228
+ previous_artifact_version = None
229
+ if key is None or is_replace:
230
+ result = Artifact.objects.using(instance).filter(hash=hash).all()
231
+ artifact_with_same_hash_exists = len(result) > 0
232
+ else:
233
+ storage_id = settings.storage.id
234
+ result = (
235
+ Artifact.objects.using(instance)
236
+ .filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
237
+ .order_by("-created_at")
238
+ .all()
239
+ )
240
+ artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
241
+ if not artifact_with_same_hash_exists and len(result) > 0:
242
+ logger.important(
243
+ f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
244
+ )
245
+ previous_artifact_version = result[0]
246
+ if artifact_with_same_hash_exists:
247
+ if settings.creation.artifact_if_hash_exists == "error":
248
+ msg = f"artifact with same hash exists: {result[0]}"
249
+ hint = (
250
+ "💡 you can make this error a warning:\n"
251
+ " ln.settings.creation.artifact_if_hash_exists"
252
+ )
253
+ raise FileExistsError(f"{msg}\n{hint}")
254
+ elif settings.creation.artifact_if_hash_exists == "warn_create_new":
255
+ logger.warning(
256
+ "creating new Artifact object despite existing artifact with same hash:"
257
+ f" {result[0]}"
258
+ )
259
+ return size, hash, hash_type, n_objects, None
260
+ else:
261
+ if result[0].visibility == -1:
262
+ raise FileExistsError(
263
+ f"You're trying to re-create this artifact in trash: {result[0]}"
264
+ "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
265
+ )
266
+ logger.important(f"returning existing artifact with same hash: {result[0]}")
267
+ return result[0]
268
+ else:
269
+ return size, hash, hash_type, n_objects, previous_artifact_version
270
+
271
+
272
+ def check_path_in_existing_storage(
273
+ path: Path | UPath, using_key: str | None = None
274
+ ) -> Storage | bool:
275
+ for storage in Storage.objects.using(using_key).filter().all():
276
+ # if path is part of storage, return it
277
+ if check_path_is_child_of_root(path, root=storage.root):
278
+ return storage
279
+ return False
280
+
281
+
282
+ def get_relative_path_to_directory(
283
+ path: PurePath | Path | UPath, directory: PurePath | Path | UPath
284
+ ) -> PurePath | Path:
285
+ if isinstance(directory, UPath) and not isinstance(directory, LocalPathClasses):
286
+ # UPath.relative_to() is not behaving as it should (2023-04-07)
287
+ # need to lstrip otherwise inconsistent behavior across trailing slashes
288
+ # see test_artifact.py: test_get_relative_path_to_directory
289
+ relpath = PurePath(
290
+ path.as_posix().replace(directory.as_posix(), "").lstrip("/")
291
+ )
292
+ elif isinstance(directory, Path):
293
+ relpath = path.resolve().relative_to(directory.resolve()) # type: ignore
294
+ elif isinstance(directory, PurePath):
295
+ relpath = path.relative_to(directory)
296
+ else:
297
+ raise TypeError("Directory not of type Path or UPath")
298
+ return relpath
299
+
300
+
301
+ def get_artifact_kwargs_from_data(
302
+ *,
303
+ data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
304
+ key: str | None,
305
+ run: Run | None,
306
+ format: str | None,
307
+ provisional_uid: str,
308
+ version: str | None,
309
+ default_storage: Storage,
310
+ using_key: str | None = None,
311
+ is_replace: bool = False,
312
+ skip_check_exists: bool = False,
313
+ ):
314
+ run = get_run(run)
315
+ memory_rep, path, suffix, storage, use_existing_storage_key = process_data(
316
+ provisional_uid,
317
+ data,
318
+ format,
319
+ key,
320
+ default_storage,
321
+ using_key,
322
+ skip_check_exists,
323
+ )
324
+ stat_or_artifact = get_stat_or_artifact(
325
+ path=path,
326
+ key=key,
327
+ instance=using_key,
328
+ is_replace=is_replace,
329
+ )
330
+ if isinstance(stat_or_artifact, Artifact):
331
+ artifact = stat_or_artifact
332
+ # update the run of the existing artifact
333
+ if run is not None:
334
+ # save the information that this artifact was previously
335
+ # produced by another run
336
+ if artifact.run is not None:
337
+ artifact.run._output_artifacts_with_later_updates.add(artifact)
338
+ # update the run of the artifact with the latest run
339
+ stat_or_artifact.run = run
340
+ stat_or_artifact.transform = run.transform
341
+ return artifact, None
342
+ else:
343
+ size, hash, hash_type, n_objects, revises = stat_or_artifact
344
+
345
+ if revises is not None: # update provisional_uid
346
+ provisional_uid, revises = create_uid(revises=revises, version=version)
347
+ if settings.storage.cache_dir in path.parents:
348
+ path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
349
+
350
+ check_path_in_storage = False
351
+ if use_existing_storage_key:
352
+ inferred_key = get_relative_path_to_directory(
353
+ path=path, directory=UPath(storage.root)
354
+ ).as_posix()
355
+ if key is None:
356
+ key = inferred_key
357
+ else:
358
+ if not key == inferred_key:
359
+ raise InvalidArgument(
360
+ f"The path '{data}' is already in registered storage"
361
+ f" '{storage.root}' with key '{inferred_key}'\nYou passed"
362
+ f" conflicting key '{key}': please move the file before"
363
+ " registering it."
364
+ )
365
+ check_path_in_storage = True
366
+ else:
367
+ storage = default_storage
368
+
369
+ log_storage_hint(
370
+ check_path_in_storage=check_path_in_storage,
371
+ storage=storage,
372
+ key=key,
373
+ uid=provisional_uid,
374
+ suffix=suffix,
375
+ is_dir=n_objects is not None,
376
+ )
377
+
378
+ # do we use a virtual or an actual storage key?
379
+ key_is_virtual = settings.creation._artifact_use_virtual_keys
380
+
381
+ # if the file is already in storage, independent of the default
382
+ # we use an actual storage key
383
+ if check_path_in_storage:
384
+ key_is_virtual = False
385
+
386
+ kwargs = {
387
+ "uid": provisional_uid,
388
+ "suffix": suffix,
389
+ "hash": hash,
390
+ "_hash_type": hash_type,
391
+ "key": key,
392
+ "size": size,
393
+ "storage_id": storage.id,
394
+ # passing both the id and the object
395
+ # to make them both available immediately
396
+ # after object creation
397
+ "n_objects": n_objects,
398
+ "n_observations": None, # to implement
399
+ "run_id": run.id if run is not None else None,
400
+ "run": run,
401
+ "_key_is_virtual": key_is_virtual,
402
+ "revises": revises,
403
+ }
404
+ if not isinstance(path, LocalPathClasses):
405
+ local_filepath = None
406
+ cloud_filepath = path
407
+ else:
408
+ local_filepath = path
409
+ cloud_filepath = None
410
+ privates = {
411
+ "local_filepath": local_filepath,
412
+ "cloud_filepath": cloud_filepath,
413
+ "memory_rep": memory_rep,
414
+ "check_path_in_storage": check_path_in_storage,
415
+ }
416
+ return kwargs, privates
417
+
418
+
419
+ def log_storage_hint(
420
+ *,
421
+ check_path_in_storage: bool,
422
+ storage: Storage | None,
423
+ key: str | None,
424
+ uid: str,
425
+ suffix: str,
426
+ is_dir: bool,
427
+ ) -> None:
428
+ hint = ""
429
+ if check_path_in_storage:
430
+ display_root = storage.root # type: ignore
431
+ # check whether path is local
432
+ if fsspec.utils.get_protocol(storage.root) == "file": # type: ignore
433
+ # if it's a local path, check whether it's in the current working directory
434
+ root_path = Path(storage.root) # type: ignore
435
+ if check_path_is_child_of_root(root_path, Path.cwd()):
436
+ # only display the relative path, not the fully resolved path
437
+ display_root = root_path.relative_to(Path.cwd())
438
+ hint += f"path in storage '{display_root}'" # type: ignore
439
+ else:
440
+ hint += "path content will be copied to default storage upon `save()`"
441
+ if key is None:
442
+ storage_key = auto_storage_key_from_artifact_uid(uid, suffix, is_dir)
443
+ hint += f" with key `None` ('{storage_key}')"
444
+ else:
445
+ hint += f" with key '{key}'"
446
+ logger.hint(hint)
447
+
448
+
449
+ def data_is_anndata(data: AnnData | UPathStr) -> bool:
450
+ if isinstance(data, AnnData):
451
+ return True
452
+ if isinstance(data, (str, Path, UPath)):
453
+ data_path = UPath(data)
454
+ if data_path.suffix == ".h5ad":
455
+ return True
456
+ elif data_path.suffix == ".zarr":
457
+ # ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
458
+ if ".anndata" in data_path.suffixes:
459
+ return True
460
+ # check only for local, expensive for cloud
461
+ if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
462
+ return zarr_is_adata(data_path)
463
+ else:
464
+ logger.warning("We do not check if cloud zarr is AnnData or not.")
465
+ return False
466
+ return False
467
+
468
+
469
+ def data_is_mudata(data: MuData | UPathStr) -> bool:
470
+ if _mudata_is_installed():
471
+ from mudata import MuData
472
+
473
+ if isinstance(data, MuData):
474
+ return True
475
+ if isinstance(data, (str, Path)):
476
+ return UPath(data).suffix in {".h5mu"}
477
+ return False
478
+
479
+
480
+ def _check_accessor_artifact(data: Any, accessor: str | None = None):
481
+ if accessor is None:
482
+ if isinstance(data, pd.DataFrame):
483
+ logger.warning("data is a DataFrame, please use .from_df()")
484
+ accessor = "DataFrame"
485
+ return accessor
486
+
487
+ data_is_path = isinstance(data, (str, Path))
488
+ if data_is_anndata(data):
489
+ if not data_is_path:
490
+ logger.warning("data is an AnnData, please use .from_anndata()")
491
+ accessor = "AnnData"
492
+ elif data_is_mudata(data):
493
+ if not data_is_path:
494
+ logger.warning("data is a MuData, please use .from_mudata()")
495
+ accessor = "MuData"
496
+ elif not data_is_path: # UPath is a subclass of Path
497
+ raise TypeError("data has to be a string, Path, UPath")
498
+ return accessor
499
+
500
+
501
+ def __init__(artifact: Artifact, *args, **kwargs):
502
+ artifact.features = FeatureManager(artifact)
503
+ artifact.params = ParamManager(artifact)
504
+ # Below checks for the Django-internal call in from_db()
505
+ # it'd be better if we could avoid this, but not being able to create a Artifact
506
+ # from data with the default constructor renders the central class of the API
507
+ # essentially useless
508
+ # The danger below is not that a user might pass as many args (12 of it), but rather
509
+ # that at some point the Django API might change; on the other hand, this
510
+ # condition of for calling the constructor based on kwargs should always
511
+ # stay robust
512
+ if len(args) == len(artifact._meta.concrete_fields):
513
+ super(Artifact, artifact).__init__(*args, **kwargs)
514
+ return None
515
+ # now we proceed with the user-facing constructor
516
+ if len(args) > 1:
517
+ raise ValueError("Only one non-keyword arg allowed: data")
518
+
519
+ data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
520
+ type: str = kwargs.pop("type") if "type" in kwargs else None
521
+ key: str | None = kwargs.pop("key") if "key" in kwargs else None
522
+ run: Run | None = kwargs.pop("run") if "run" in kwargs else None
523
+ description: str | None = (
524
+ kwargs.pop("description") if "description" in kwargs else None
525
+ )
526
+ revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
527
+ version: str | None = kwargs.pop("version") if "version" in kwargs else None
528
+ visibility: int | None = (
529
+ kwargs.pop("visibility")
530
+ if "visibility" in kwargs
531
+ else VisibilityChoice.default.value
532
+ )
533
+ format = kwargs.pop("format") if "format" in kwargs else None
534
+ _is_internal_call = kwargs.pop("_is_internal_call", False)
535
+ skip_check_exists = (
536
+ kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
537
+ )
538
+ if "default_storage" in kwargs:
539
+ default_storage = kwargs.pop("default_storage")
540
+ else:
541
+ if setup_settings.instance.keep_artifacts_local:
542
+ default_storage = setup_settings.instance.storage_local.record
543
+ else:
544
+ default_storage = setup_settings.instance.storage.record
545
+ using_key = (
546
+ kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
547
+ )
548
+ accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
549
+ accessor = _check_accessor_artifact(data=data, accessor=accessor)
550
+ if "is_new_version_of" in kwargs:
551
+ logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
552
+ revises = kwargs.pop("is_new_version_of")
553
+ if not len(kwargs) == 0:
554
+ raise ValueError(
555
+ "Only data, key, run, description, version, revises, visibility"
556
+ f" can be passed, you passed: {kwargs}"
557
+ )
558
+ if revises is not None and key is not None and revises.key != key:
559
+ note = message_update_key_in_version_family(
560
+ suid=revises.stem_uid,
561
+ existing_key=revises.key,
562
+ new_key=key,
563
+ registry="Artifact",
564
+ )
565
+ raise ValueError(
566
+ f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
567
+ )
568
+ if revises is not None:
569
+ if not isinstance(revises, Artifact):
570
+ raise TypeError("`revises` has to be of type `Artifact`")
571
+ if description is None:
572
+ description = revises.description
573
+ if key is not None and AUTO_KEY_PREFIX in key:
574
+ raise ValueError(
575
+ f"Do not pass key that contains a managed storage path in `{AUTO_KEY_PREFIX}`"
576
+ )
577
+ # below is for internal calls that require defining the storage location
578
+ # ahead of constructing the Artifact
579
+ if isinstance(data, (str, Path)) and AUTO_KEY_PREFIX in str(data):
580
+ if _is_internal_call:
581
+ is_automanaged_path = True
582
+ user_provided_key = key
583
+ key = None
584
+ else:
585
+ raise ValueError(
586
+ f"Do not pass path inside the `{AUTO_KEY_PREFIX}` directory."
587
+ )
588
+ else:
589
+ is_automanaged_path = False
590
+ provisional_uid, revises = create_uid(revises=revises, version=version)
591
+ kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
592
+ data=data,
593
+ key=key,
594
+ run=run,
595
+ format=format,
596
+ provisional_uid=provisional_uid,
597
+ version=version,
598
+ default_storage=default_storage,
599
+ using_key=using_key,
600
+ skip_check_exists=skip_check_exists,
601
+ )
602
+
603
+ # an object with the same hash already exists
604
+ if isinstance(kwargs_or_artifact, Artifact):
605
+ from ._record import init_self_from_db, update_attributes
606
+
607
+ init_self_from_db(artifact, kwargs_or_artifact)
608
+ # adding "key" here is dangerous because key might be auto-populated
609
+ update_attributes(artifact, {"description": description})
610
+ if artifact.key != key and key is not None:
611
+ logger.warning(
612
+ f"key {artifact.key} on existing artifact differs from passed key {key}"
613
+ )
614
+ return None
615
+ else:
616
+ kwargs = kwargs_or_artifact
617
+
618
+ if revises is None:
619
+ revises = kwargs_or_artifact.pop("revises")
620
+
621
+ if data is not None:
622
+ artifact._local_filepath = privates["local_filepath"]
623
+ artifact._cloud_filepath = privates["cloud_filepath"]
624
+ artifact._memory_rep = privates["memory_rep"]
625
+ artifact._to_store = not privates["check_path_in_storage"]
626
+
627
+ if is_automanaged_path and _is_internal_call:
628
+ kwargs["_key_is_virtual"] = True
629
+ assert AUTO_KEY_PREFIX in kwargs["key"] # noqa: S101
630
+ uid = kwargs["key"].replace(AUTO_KEY_PREFIX, "").replace(kwargs["suffix"], "")
631
+ kwargs["key"] = user_provided_key
632
+ if revises is not None:
633
+ assert uid.startswith(revises.stem_uid) # noqa: S101
634
+ if len(uid) == 16:
635
+ if revises is None:
636
+ uid += "0000"
637
+ else:
638
+ uid, revises = create_uid(revises=revises, version=version)
639
+ kwargs["uid"] = uid
640
+
641
+ # only set key now so that we don't do a look-up on it in case revises is passed
642
+ if revises is not None:
643
+ kwargs["key"] = revises.key
644
+
645
+ kwargs["type"] = type
646
+ kwargs["version"] = version
647
+ kwargs["description"] = description
648
+ kwargs["visibility"] = visibility
649
+ kwargs["_accessor"] = accessor
650
+ kwargs["revises"] = revises
651
+ # this check needs to come down here because key might be populated from an
652
+ # existing file path during get_artifact_kwargs_from_data()
653
+ if (
654
+ kwargs["key"] is None
655
+ and kwargs["description"] is None
656
+ and kwargs["run"] is None
657
+ ):
658
+ raise ValueError("Pass one of key, run or description as a parameter")
659
+
660
+ add_transform_to_kwargs(kwargs, kwargs["run"])
661
+
662
+ super(Artifact, artifact).__init__(**kwargs)
663
+
664
+
665
+ @classmethod # type: ignore
666
+ @doc_args(Artifact.from_df.__doc__)
667
+ def from_df(
668
+ cls,
669
+ df: pd.DataFrame,
670
+ key: str | None = None,
671
+ description: str | None = None,
672
+ run: Run | None = None,
673
+ revises: Artifact | None = None,
674
+ **kwargs,
675
+ ) -> Artifact:
676
+ """{}""" # noqa: D415
677
+ artifact = Artifact(
678
+ data=df,
679
+ key=key,
680
+ run=run,
681
+ description=description,
682
+ revises=revises,
683
+ _accessor="DataFrame",
684
+ type="dataset",
685
+ **kwargs,
686
+ )
687
+ return artifact
688
+
689
+
690
+ @classmethod # type: ignore
691
+ @doc_args(Artifact.from_anndata.__doc__)
692
+ def from_anndata(
693
+ cls,
694
+ adata: AnnData | UPathStr,
695
+ key: str | None = None,
696
+ description: str | None = None,
697
+ run: Run | None = None,
698
+ revises: Artifact | None = None,
699
+ **kwargs,
700
+ ) -> Artifact:
701
+ """{}""" # noqa: D415
702
+ if not data_is_anndata(adata):
703
+ raise ValueError("data has to be an AnnData object or a path to AnnData-like")
704
+ artifact = Artifact(
705
+ data=adata,
706
+ key=key,
707
+ run=run,
708
+ description=description,
709
+ revises=revises,
710
+ _accessor="AnnData",
711
+ type="dataset",
712
+ **kwargs,
713
+ )
714
+ return artifact
715
+
716
+
717
+ @classmethod # type: ignore
718
+ @doc_args(Artifact.from_mudata.__doc__)
719
+ def from_mudata(
720
+ cls,
721
+ mdata: MuData,
722
+ key: str | None = None,
723
+ description: str | None = None,
724
+ run: Run | None = None,
725
+ revises: Artifact | None = None,
726
+ **kwargs,
727
+ ) -> Artifact:
728
+ """{}""" # noqa: D415
729
+ artifact = Artifact(
730
+ data=mdata,
731
+ key=key,
732
+ run=run,
733
+ description=description,
734
+ revises=revises,
735
+ _accessor="MuData",
736
+ type="dataset",
737
+ **kwargs,
738
+ )
739
+ return artifact
740
+
741
+
742
+ @classmethod # type: ignore
743
+ @doc_args(Artifact.from_dir.__doc__)
744
+ def from_dir(
745
+ cls,
746
+ path: UPathStr,
747
+ key: str | None = None,
748
+ *,
749
+ run: Run | None = None,
750
+ ) -> list[Artifact]:
751
+ """{}""" # noqa: D415
752
+ logger.warning(
753
+ "this creates one artifact per file in the directory - consider"
754
+ " ln.Artifact(dir_path) to get one artifact for the entire directory"
755
+ )
756
+ folderpath: UPath = create_path(path) # returns Path for local
757
+ default_storage = settings.storage.record
758
+ using_key = settings._using_key
759
+ storage, use_existing_storage = process_pathlike(
760
+ folderpath, default_storage, using_key
761
+ )
762
+ folder_key_path: PurePath | Path
763
+ if key is None:
764
+ if not use_existing_storage:
765
+ logger.warning(
766
+ "folder is outside existing storage location, will copy files from"
767
+ f" {path} to {storage.root}/{folderpath.name}"
768
+ )
769
+ folder_key_path = Path(folderpath.name)
770
+ else:
771
+ # maintain the hierachy within an existing storage location
772
+ folder_key_path = get_relative_path_to_directory(
773
+ folderpath, UPath(storage.root)
774
+ )
775
+ else:
776
+ folder_key_path = Path(key)
777
+
778
+ # always sanitize by stripping a trailing slash
779
+ folder_key = folder_key_path.as_posix().rstrip("/")
780
+
781
+ # TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
782
+ pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
783
+
784
+ # silence fine-grained logging
785
+ verbosity = settings.verbosity
786
+ verbosity_int = settings._verbosity_int
787
+ if verbosity_int >= 1:
788
+ settings.verbosity = "warning"
789
+ artifacts_dict = {}
790
+ for filepath in folderpath.rglob(pattern):
791
+ if filepath.is_file():
792
+ relative_path = get_relative_path_to_directory(filepath, folderpath)
793
+ artifact_key = folder_key + "/" + relative_path.as_posix()
794
+ # if creating from rglob, we don't need to check for existence
795
+ artifact = Artifact(
796
+ filepath, run=run, key=artifact_key, skip_check_exists=True
797
+ )
798
+ artifacts_dict[artifact.uid] = artifact
799
+ settings.verbosity = verbosity
800
+
801
+ # run sanity check on hashes
802
+ hashes = [
803
+ artifact.hash
804
+ for artifact in artifacts_dict.values()
805
+ if artifact.hash is not None
806
+ ]
807
+ uids = artifacts_dict.keys()
808
+ if len(set(hashes)) == len(hashes):
809
+ artifacts = list(artifacts_dict.values())
810
+ else:
811
+ # consider exact duplicates (same id, same hash)
812
+ # below can't happen anymore because artifacts is a dict now
813
+ # if len(set(uids)) == len(set(hashes)):
814
+ # logger.warning("dropping duplicate records in list of artifact records")
815
+ # artifacts = list(set(uids))
816
+ # consider false duplicates (different id, same hash)
817
+ if not len(set(uids)) == len(set(hashes)):
818
+ seen_hashes = set()
819
+ non_unique_artifacts = {
820
+ hash: artifact
821
+ for hash, artifact in artifacts_dict.items()
822
+ if artifact.hash in seen_hashes or seen_hashes.add(artifact.hash) # type: ignore
823
+ }
824
+ display_non_unique = "\n ".join(
825
+ f"{artifact}" for artifact in non_unique_artifacts
826
+ )
827
+ logger.warning(
828
+ "there are multiple artifact uids with the same hashes, dropping"
829
+ f" {len(non_unique_artifacts)} duplicates out of"
830
+ f" {len(artifacts_dict)} artifacts:\n {display_non_unique}"
831
+ )
832
+ artifacts = [
833
+ artifact
834
+ for artifact in artifacts_dict.values()
835
+ if artifact not in non_unique_artifacts.values()
836
+ ]
837
+ logger.success(
838
+ f"created {len(artifacts)} artifacts from directory using storage"
839
+ f" {storage.root} and key = {folder_key}/"
840
+ )
841
+ return artifacts
842
+
843
+
844
+ # docstring handled through attach_func_to_class_method
845
+ def replace(
846
+ self,
847
+ data: UPathStr,
848
+ run: Run | None = None,
849
+ format: str | None = None,
850
+ ) -> None:
851
+ default_storage = settings.storage.record
852
+ kwargs, privates = get_artifact_kwargs_from_data(
853
+ provisional_uid=self.uid,
854
+ data=data,
855
+ key=self.key,
856
+ run=run,
857
+ format=format,
858
+ default_storage=default_storage,
859
+ version=None,
860
+ is_replace=True,
861
+ )
862
+
863
+ # this artifact already exists
864
+ if privates is None:
865
+ return kwargs
866
+
867
+ check_path_in_storage = privates["check_path_in_storage"]
868
+ if check_path_in_storage:
869
+ raise ValueError("Can only replace with a local file not in any Storage.")
870
+
871
+ if self.key is not None and not self._key_is_virtual:
872
+ key_path = PurePosixPath(self.key)
873
+ new_filename = f"{key_path.stem}{kwargs['suffix']}"
874
+ # the following will only be true if the suffix changes!
875
+ if key_path.name != new_filename:
876
+ self._clear_storagekey = self.key
877
+ self.key = str(key_path.with_name(new_filename))
878
+ logger.warning(
879
+ f"replacing the file will replace key '{key_path}' with '{self.key}'"
880
+ f" and delete '{key_path}' upon `save()`"
881
+ )
882
+ else:
883
+ old_storage = auto_storage_key_from_artifact(self)
884
+ is_dir = self.n_objects is not None
885
+ new_storage = auto_storage_key_from_artifact_uid(
886
+ self.uid, kwargs["suffix"], is_dir
887
+ )
888
+ if old_storage != new_storage:
889
+ self._clear_storagekey = old_storage
890
+ if self.key is not None:
891
+ new_key_path = PurePosixPath(self.key).with_suffix(kwargs["suffix"])
892
+ self.key = str(new_key_path)
893
+
894
+ self.suffix = kwargs["suffix"]
895
+ self.size = kwargs["size"]
896
+ self.hash = kwargs["hash"]
897
+ self._hash_type = kwargs["_hash_type"]
898
+ self.run_id = kwargs["run_id"]
899
+ self.run = kwargs["run"]
900
+
901
+ self._local_filepath = privates["local_filepath"]
902
+ self._cloud_filepath = privates["cloud_filepath"]
903
+ self._memory_rep = privates["memory_rep"]
904
+ # no need to upload if new file is already in storage
905
+ self._to_store = not check_path_in_storage
906
+
907
+
908
+ # docstring handled through attach_func_to_class_method
909
+ def open(
910
+ self, mode: str = "r", is_run_input: bool | None = None
911
+ ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
912
+ # ignore empty suffix for now
913
+ suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
914
+ if self.suffix not in suffixes:
915
+ raise ValueError(
916
+ "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
917
+ " use one of the following suffixes for the object name:"
918
+ f" {', '.join(suffixes[:-1])}."
919
+ )
920
+ if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
921
+ raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
922
+
923
+ from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
924
+
925
+ using_key = settings._using_key
926
+ filepath, cache_key = filepath_cache_key_from_artifact(self, using_key=using_key)
927
+ is_tiledbsoma_w = (
928
+ filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
929
+ ) and mode == "w"
930
+ # consider the case where an object is already locally cached
931
+ localpath = setup_settings.instance.storage.cloud_to_local_no_update(
932
+ filepath, cache_key=cache_key
933
+ )
934
+ if not is_tiledbsoma_w and localpath.exists():
935
+ access = backed_access(localpath, mode, using_key)
936
+ else:
937
+ access = backed_access(filepath, mode, using_key)
938
+ if is_tiledbsoma_w:
939
+
940
+ def finalize():
941
+ nonlocal self, filepath, localpath
942
+ if not isinstance(filepath, LocalPathClasses):
943
+ _, hash, _, _ = get_stat_dir_cloud(filepath)
944
+ else:
945
+ # this can be very slow
946
+ _, hash, _, _ = hash_dir(filepath)
947
+ if self.hash != hash:
948
+ from ._record import init_self_from_db
949
+
950
+ new_version = Artifact(
951
+ filepath, revises=self, _is_internal_call=True
952
+ ).save()
953
+ init_self_from_db(self, new_version)
954
+
955
+ if localpath != filepath and localpath.exists():
956
+ shutil.rmtree(localpath)
957
+
958
+ access = _track_writes_factory(access, finalize)
959
+ # only call if open is successfull
960
+ _track_run_input(self, is_run_input)
961
+ return access
962
+
963
+
964
+ # can't really just call .cache in .load because of double tracking
965
+ def _synchronize_cleanup_on_error(
966
+ filepath: UPath, cache_key: str | None = None
967
+ ) -> UPath:
968
+ try:
969
+ cache_path = setup_settings.instance.storage.cloud_to_local(
970
+ filepath, cache_key=cache_key, print_progress=True
971
+ )
972
+ except Exception as e:
973
+ if not isinstance(filepath, LocalPathClasses):
974
+ cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
975
+ filepath, cache_key=cache_key
976
+ )
977
+ if cache_path.is_file():
978
+ cache_path.unlink(missing_ok=True)
979
+ elif cache_path.is_dir():
980
+ shutil.rmtree(cache_path)
981
+ raise e
982
+ return cache_path
983
+
984
+
985
+ # docstring handled through attach_func_to_class_method
986
+ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
987
+ if hasattr(self, "_memory_rep") and self._memory_rep is not None:
988
+ access_memory = self._memory_rep
989
+ else:
990
+ filepath, cache_key = filepath_cache_key_from_artifact(
991
+ self, using_key=settings._using_key
992
+ )
993
+ cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
994
+ # cache_path is local so doesn't trigger any sync in load_to_memory
995
+ access_memory = load_to_memory(cache_path, **kwargs)
996
+ # only call if load is successfull
997
+ _track_run_input(self, is_run_input)
998
+ return access_memory
999
+
1000
+
1001
+ # docstring handled through attach_func_to_class_method
1002
+ def cache(self, is_run_input: bool | None = None) -> Path:
1003
+ filepath, cache_key = filepath_cache_key_from_artifact(
1004
+ self, using_key=settings._using_key
1005
+ )
1006
+ cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
1007
+ # only call if sync is successfull
1008
+ _track_run_input(self, is_run_input)
1009
+ return cache_path
1010
+
1011
+
1012
+ # docstring handled through attach_func_to_class_method
1013
+ def delete(
1014
+ self,
1015
+ permanent: bool | None = None,
1016
+ storage: bool | None = None,
1017
+ using_key: str | None = None,
1018
+ ) -> None:
1019
+ # this first check means an invalid delete fails fast rather than cascading through
1020
+ # database and storage permission errors
1021
+ if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
1022
+ isettings = setup_settings.instance
1023
+ if self.storage.instance_uid != isettings.uid and (storage or storage is None):
1024
+ raise IntegrityError(
1025
+ "Cannot simply delete artifacts outside of this instance's managed storage locations."
1026
+ "\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
1027
+ f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
1028
+ f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
1029
+ )
1030
+ # by default, we only move artifacts into the trash (visibility = -1)
1031
+ trash_visibility = VisibilityChoice.trash.value
1032
+ if self.visibility > trash_visibility and not permanent:
1033
+ if storage is not None:
1034
+ logger.warning("moving artifact to trash, storage arg is ignored")
1035
+ # move to trash
1036
+ self.visibility = trash_visibility
1037
+ self.save()
1038
+ logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
1039
+ return
1040
+
1041
+ # if the artifact is already in the trash
1042
+ # permanent delete skips the trash
1043
+ if permanent is None:
1044
+ # ask for confirmation of permanent delete
1045
+ response = input(
1046
+ "Artifact record is already in trash! Are you sure you want to permanently"
1047
+ " delete it? (y/n) You can't undo this action."
1048
+ )
1049
+ delete_record = response == "y"
1050
+ else:
1051
+ assert permanent # noqa: S101
1052
+ delete_record = True
1053
+
1054
+ if delete_record:
1055
+ # need to grab file path before deletion
1056
+ try:
1057
+ path, _ = filepath_from_artifact(self, using_key)
1058
+ except OSError:
1059
+ # we can still delete the record
1060
+ logger.warning("Could not get path")
1061
+ storage = False
1062
+ # only delete in storage if DB delete is successful
1063
+ # DB delete might error because of a foreign key constraint violated etc.
1064
+ self._delete_skip_storage()
1065
+ if self.key is None or self._key_is_virtual:
1066
+ # do not ask for confirmation also if storage is None
1067
+ delete_in_storage = storage is None or storage
1068
+ else:
1069
+ # for artifacts with non-virtual semantic storage keys (key is not None)
1070
+ # ask for extra-confirmation
1071
+ if storage is None:
1072
+ response = input(
1073
+ f"Are you sure to want to delete {path}? (y/n) You can't undo"
1074
+ " this action."
1075
+ )
1076
+ delete_in_storage = response == "y"
1077
+ else:
1078
+ delete_in_storage = storage
1079
+ if not delete_in_storage:
1080
+ logger.important(f"a file/folder remains here: {path}")
1081
+ # we don't yet have logic to bring back the deleted metadata record
1082
+ # in case storage deletion fails - this is important for ACID down the road
1083
+ if delete_in_storage:
1084
+ delete_msg = delete_storage(path, raise_file_not_found_error=False)
1085
+ if delete_msg != "did-not-delete":
1086
+ logger.success(f"deleted {colors.yellow(f'{path}')}")
1087
+
1088
+
1089
+ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
1090
+ super(Artifact, artifact).delete(*args, **kwargs)
1091
+
1092
+
1093
+ # docstring handled through attach_func_to_class_method
1094
+ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1095
+ state_was_adding = self._state.adding
1096
+ print_progress = kwargs.pop("print_progress", True)
1097
+ access_token = kwargs.pop("access_token", None)
1098
+ local_path = None
1099
+ if upload and setup_settings.instance.keep_artifacts_local:
1100
+ # switch local storage location to cloud
1101
+ local_path = self.path
1102
+ self.storage_id = setup_settings.instance.storage.id
1103
+ self._local_filepath = local_path
1104
+ # switch to virtual storage key upon upload
1105
+ # the local filepath is already cached at that point
1106
+ self._key_is_virtual = True
1107
+ # ensure that the artifact is uploaded
1108
+ self._to_store = True
1109
+
1110
+ self._save_skip_storage(**kwargs)
1111
+
1112
+ from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
1113
+
1114
+ using_key = None
1115
+ if "using" in kwargs:
1116
+ using_key = kwargs["using"]
1117
+ exception = check_and_attempt_upload(
1118
+ self, using_key, access_token=access_token, print_progress=print_progress
1119
+ )
1120
+ if exception is not None:
1121
+ self._delete_skip_storage()
1122
+ raise RuntimeError(exception)
1123
+ exception = check_and_attempt_clearing(self, using_key)
1124
+ if exception is not None:
1125
+ raise RuntimeError(exception)
1126
+ if local_path is not None and not state_was_adding:
1127
+ # only move the local artifact to cache if it was not newly created
1128
+ local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
1129
+ # don't use Path.rename here because of cross-device link error
1130
+ # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
1131
+ shutil.move(
1132
+ local_path, # type: ignore
1133
+ local_path_cache,
1134
+ )
1135
+ logger.important(f"moved local artifact to cache: {local_path_cache}")
1136
+ return self
1137
+
1138
+
1139
+ def _save_skip_storage(file, **kwargs) -> None:
1140
+ save_feature_sets(file)
1141
+ super(Artifact, file).save(**kwargs)
1142
+ save_feature_set_links(file)
1143
+
1144
+
1145
+ @property # type: ignore
1146
+ @doc_args(Artifact.path.__doc__)
1147
+ def path(self) -> Path | UPath:
1148
+ """{}""" # noqa: D415
1149
+ # return only the path, without StorageSettings
1150
+ filepath, _ = filepath_from_artifact(self, using_key=settings._using_key)
1151
+ return filepath
1152
+
1153
+
1154
+ # get cache path without triggering sync
1155
+ @property # type: ignore
1156
+ def _cache_path(self) -> UPath:
1157
+ filepath, cache_key = filepath_cache_key_from_artifact(
1158
+ self, using_key=settings._using_key
1159
+ )
1160
+ if isinstance(filepath, LocalPathClasses):
1161
+ return filepath
1162
+ return setup_settings.instance.storage.cloud_to_local_no_update(
1163
+ filepath, cache_key=cache_key
1164
+ )
1165
+
1166
+
1167
+ # docstring handled through attach_func_to_class_method
1168
+ def restore(self) -> None:
1169
+ self.visibility = VisibilityChoice.default.value
1170
+ self.save()
1171
+
1172
+
1173
+ METHOD_NAMES = [
1174
+ "__init__",
1175
+ "from_anndata",
1176
+ "from_df",
1177
+ "from_mudata",
1178
+ "open",
1179
+ "cache",
1180
+ "load",
1181
+ "delete",
1182
+ "save",
1183
+ "replace",
1184
+ "from_dir",
1185
+ "restore",
1186
+ ]
1187
+
1188
+ if ln_setup._TESTING:
1189
+ from inspect import signature
1190
+
1191
+ SIGS = {
1192
+ name: signature(getattr(Artifact, name))
1193
+ for name in METHOD_NAMES
1194
+ if name != "__init__"
1195
+ }
1196
+
1197
+ for name in METHOD_NAMES:
1198
+ attach_func_to_class_method(name, Artifact, globals())
1199
+
1200
+ # privates currently dealt with separately
1201
+ Artifact._delete_skip_storage = _delete_skip_storage
1202
+ Artifact._save_skip_storage = _save_skip_storage
1203
+ Artifact._cache_path = _cache_path
1204
+ Artifact.path = path
1205
+ Artifact.describe = describe
1206
+ Artifact.view_lineage = view_lineage