lamindb 0.45.0__py3-none-any.whl → 0.46a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_file_methods.py DELETED
@@ -1,319 +0,0 @@
1
- from itertools import islice
2
- from pathlib import Path
3
- from typing import Optional, Union, overload # noqa
4
-
5
- from lamin_logger import colors, logger
6
- from lamindb_setup import settings as setup_settings
7
- from lnschema_core.models import File, Run
8
- from lnschema_core.types import DataLike, PathLike
9
- from upath import UPath
10
-
11
- from lamindb._context import context
12
- from lamindb._file import from_dir, init_file, replace_file
13
- from lamindb._file_access import filepath_from_file
14
- from lamindb.dev._settings import settings
15
- from lamindb.dev.storage import delete_storage, load_to_memory
16
-
17
- try:
18
- from lamindb.dev.storage._backed_access import AnnDataAccessor, BackedAccessor
19
- except ImportError:
20
-
21
- class AnnDataAccessor: # type: ignore
22
- pass
23
-
24
- class BackedAccessor: # type: ignore
25
- pass
26
-
27
-
28
- File.__doc__ = """Files: data artifacts.
29
-
30
- Args:
31
- data: `Union[PathLike, DataLike]` A file path or an in-memory data
32
- object (`DataFrame`, `AnnData`) to serialize. Can be a cloud path, e.g.,
33
- `"s3://my-bucket/my_samples/my_file.fcs"`.
34
- key: `Optional[str] = None` A storage key: a relative filepath within the
35
- current default storage, e.g., `"my_samples/my_file.fcs"`.
36
- name: `Optional[str] = None` A name or title. Useful if key is auto-generated.
37
- run: `Optional[Run] = None` The run that created the file, gets auto-linked
38
- if `ln.track()` was called.
39
-
40
- Track where files come from by passing the generating :class:`~lamindb.Run`.
41
-
42
- Often, files store jointly measured observations of features: track them
43
- with :class:`~lamindb.FeatureSet`.
44
-
45
- If files have corresponding representations in storage and memory, LaminDB
46
- makes some configurable default choices (e.g., serialize a `DataFrame` as a
47
- `.parquet` file).
48
-
49
- .. admonition:: Examples for storage-memory correspondence
50
-
51
- Listed are typical `suffix` values & in memory data objects.
52
-
53
- - Table: `.csv`, `.tsv`, `.parquet`, `.ipc`
54
- ⟷ `pd.DataFrame`, `polars.DataFrame`
55
- - Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
56
- - Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
57
- - Array: zarr directory, TileDB store ⟷ zarr loader, TileDB loader
58
- - Fastq: `.fastq` ⟷ /
59
- - VCF: `.vcf` ⟷ /
60
- - QC: `.html` ⟷ /
61
-
62
- .. note::
63
-
64
- In some cases (`.zarr`), a `File` is present as many small objects in what
65
- appears to be a "folder" in storage. Hence, we often refer to files as data
66
- artifacts.
67
-
68
- """
69
-
70
-
71
- def backed(
72
- file: File, is_run_input: Optional[bool] = None
73
- ) -> Union[AnnDataAccessor, BackedAccessor]:
74
- """Return a cloud-backed data object to stream."""
75
- suffixes = (".h5", ".hdf5", ".h5ad", ".zrad", ".zarr")
76
- if file.suffix not in suffixes:
77
- raise ValueError(
78
- "File should have a zarr or h5 object as the underlying data, please use"
79
- " one of the following suffixes for the object name:"
80
- f" {', '.join(suffixes)}."
81
- )
82
- _track_run_input(file, is_run_input)
83
- from lamindb.dev.storage._backed_access import backed_access
84
-
85
- return backed_access(file)
86
-
87
-
88
- def _track_run_input(file: File, is_run_input: Optional[bool] = None):
89
- if is_run_input is None:
90
- if context.run is not None and not settings.track_run_inputs:
91
- logger.hint("Track this file as a run input by passing `is_run_input=True`")
92
- track_run_input = settings.track_run_inputs
93
- else:
94
- track_run_input = is_run_input
95
- if track_run_input:
96
- if context.run is None:
97
- raise ValueError(
98
- "No global run context set. Call ln.context.track() or link input to a"
99
- " run object via `run.inputs.append(file)`"
100
- )
101
- if not file.input_of.contains(context.run):
102
- context.run.save()
103
- file.input_of.add(context.run)
104
-
105
-
106
- def load(
107
- file: File, is_run_input: Optional[bool] = None, stream: bool = False
108
- ) -> DataLike:
109
- """Stage and load to memory.
110
-
111
- Returns in-memory representation if possible, e.g., an `AnnData` object
112
- for an `h5ad` file.
113
- """
114
- _track_run_input(file, is_run_input)
115
- return load_to_memory(filepath_from_file(file), stream=stream)
116
-
117
-
118
- def stage(file: File, is_run_input: Optional[bool] = None) -> Path:
119
- """Update cache from cloud storage if outdated.
120
-
121
- Returns a path to a locally cached on-disk object (say, a
122
- `.jpg` file).
123
- """
124
- if file.suffix in (".zrad", ".zarr"):
125
- raise RuntimeError("zarr object can't be staged, please use load() or stream()")
126
- _track_run_input(file, is_run_input)
127
- return setup_settings.instance.storage.cloud_to_local(filepath_from_file(file))
128
-
129
-
130
- def delete(file, storage: Optional[bool] = None) -> None:
131
- """Delete file, optionall from storage.
132
-
133
- Args:
134
- storage: `Optional[bool] = None` Indicate whether you want to delete the
135
- file in storage.
136
-
137
- Example:
138
-
139
- For any `File` object `file`, call:
140
-
141
- >>> file.delete(storage=True) # storage=True auto-confirms deletion in storage
142
- """
143
- if storage is None:
144
- response = input(f"Are you sure you want to delete {file} from storage? (y/n)")
145
- if response == "y":
146
- delete_in_storage = True
147
- else:
148
- delete_in_storage = storage
149
- if delete_in_storage:
150
- filepath = file.path()
151
- delete_storage(filepath)
152
- logger.success(f"Deleted stored object {colors.yellow(f'{filepath}')}")
153
- file._delete_skip_storage()
154
-
155
-
156
- def _delete_skip_storage(file, *args, **kwargs) -> None:
157
- super(File, file).delete(*args, **kwargs)
158
-
159
-
160
- def save(file, *args, **kwargs) -> None:
161
- """Save the file to database & storage."""
162
- file._save_skip_storage(*args, **kwargs)
163
- from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
164
-
165
- exception = check_and_attempt_upload(file)
166
- if exception is not None:
167
- file._delete_skip_storage()
168
- raise RuntimeError(exception)
169
- exception = check_and_attempt_clearing(file)
170
- if exception is not None:
171
- raise RuntimeError(exception)
172
-
173
-
174
- def _save_skip_storage(file, *args, **kwargs) -> None:
175
- if file.transform is not None:
176
- file.transform.save()
177
- if file.run is not None:
178
- file.run.save()
179
- super(File, file).save(*args, **kwargs)
180
-
181
-
182
- def path(self) -> Union[Path, UPath]:
183
- """Path on storage."""
184
- from lamindb._file_access import filepath_from_file
185
-
186
- return filepath_from_file(self)
187
-
188
-
189
- # adapted from: https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python # noqa
190
- @classmethod # type: ignore
191
- def tree(
192
- cls: File,
193
- prefix: Optional[str] = None,
194
- *,
195
- level: int = -1,
196
- limit_to_directories: bool = False,
197
- length_limit: int = 1000,
198
- ):
199
- """Given a prefix, print a visual tree structure of files."""
200
- space = " "
201
- branch = "│ "
202
- tee = "├── "
203
- last = "└── "
204
-
205
- if prefix is None:
206
- dir_path = settings.storage
207
- else:
208
- dir_path = settings.storage / prefix
209
- files = 0
210
- directories = 0
211
-
212
- def inner(dir_path: Union[Path, UPath], prefix: str = "", level=-1):
213
- nonlocal files, directories
214
- if not level:
215
- return # 0, stop iterating
216
- stripped_dir_path = dir_path.as_posix().rstrip("/")
217
- # do not iterate through zarr directories
218
- if stripped_dir_path.endswith((".zarr", ".zrad")):
219
- return
220
- # this is needed so that the passed folder is not listed
221
- contents = [
222
- i
223
- for i in dir_path.iterdir()
224
- if i.as_posix().rstrip("/") != stripped_dir_path
225
- ]
226
- if limit_to_directories:
227
- contents = [d for d in contents if d.is_dir()]
228
- pointers = [tee] * (len(contents) - 1) + [last]
229
- for pointer, path in zip(pointers, contents):
230
- if path.is_dir():
231
- yield prefix + pointer + path.name
232
- directories += 1
233
- extension = branch if pointer == tee else space
234
- yield from inner(path, prefix=prefix + extension, level=level - 1)
235
- elif not limit_to_directories:
236
- yield prefix + pointer + path.name
237
- files += 1
238
-
239
- folder_tree = f"{dir_path.name}"
240
- iterator = inner(dir_path, level=level)
241
- for line in islice(iterator, length_limit):
242
- folder_tree += f"\n{line}"
243
- if next(iterator, None):
244
- folder_tree += f"... length_limit, {length_limit}, reached, counted:"
245
- print(folder_tree)
246
- print(f"\n{directories} directories" + (f", {files} files" if files else ""))
247
-
248
-
249
- # likely needs an arg `key`
250
- def replace(
251
- file,
252
- data: Union[PathLike, DataLike],
253
- run: Optional[Run] = None,
254
- format: Optional[str] = None,
255
- ) -> None:
256
- """Replace file content.
257
-
258
- Args:
259
- data: `Union[PathLike, DataLike]` A file path or an in-memory data
260
- object (`DataFrame`, `AnnData`).
261
- run: `Optional[Run] = None` The run that created the file, gets
262
- auto-linked if `ln.track()` was called.
263
-
264
- Examples:
265
-
266
- Say we made a change to the content of a file (e.g., edited the image
267
- `paradisi05_laminopathic_nuclei.jpg`).
268
-
269
- This is how we replace the old file in storage with the new file:
270
-
271
- >>> file.replace("paradisi05_laminopathic_nuclei.jpg")
272
- >>> file.save()
273
-
274
- Note that this neither changes the storage key nor the filename.
275
-
276
- However, it will update the suffix if the file type changes.
277
- """
278
- replace_file(file, data, run, format)
279
-
280
-
281
- @overload
282
- def __init__(
283
- file,
284
- data: Union[PathLike, DataLike],
285
- key: Optional[str] = None,
286
- name: Optional[str] = None,
287
- run: Optional[Run] = None,
288
- ):
289
- ...
290
-
291
-
292
- @overload
293
- def __init__(
294
- file,
295
- **kwargs,
296
- ):
297
- ...
298
-
299
-
300
- def __init__( # type: ignore
301
- file,
302
- *args,
303
- **kwargs,
304
- ):
305
- init_file(file, *args, **kwargs)
306
-
307
-
308
- File.backed = backed
309
- File.stage = stage
310
- File.load = load
311
- File.delete = delete
312
- File._delete_skip_storage = _delete_skip_storage
313
- File.save = save
314
- File._save_skip_storage = _save_skip_storage
315
- File.replace = replace
316
- File.__init__ = __init__
317
- File.path = path
318
- File.from_dir = from_dir
319
- File.tree = tree
@@ -1,36 +0,0 @@
1
- lamindb/__init__.py,sha256=c-k0F_bebM6z2gszM6RDL6Q34hIENEu70GMs4C2-6ME,2344
2
- lamindb/_baseorm_methods.py,sha256=AY2t_pXRq_LRgiQwjlW98Q0U5WhY-VD8QArGpK8eOlU,17875
3
- lamindb/_context.py,sha256=RToF0A85C86FDXP2X2q6nYHCYN1BuVzZ_DNWuAar4KU,14331
4
- lamindb/_delete.py,sha256=V6zDd-9NibHddJMkiiXmHR_Z-ojyfiUlPeN5N4V5H4Q,1277
5
- lamindb/_featureset_methods.py,sha256=9UfN3Sm6z2D1niHuZiXrnPZ1aeZdq_6WaOGCKby2Kd8,2151
6
- lamindb/_file.py,sha256=8DS7O-7xi9P6jo4pgWheEl5EbMkfAZhe_7WUgxIoY6c,16268
7
- lamindb/_file_access.py,sha256=9JE2Z-3rNFWuacBwH1efwEtrBWoK0AzjqifJFCqDcf8,1858
8
- lamindb/_file_methods.py,sha256=QNKbPIlp37TW-rtwe28af__DdvtZkp6MyJPIKCSL8cI,10058
9
- lamindb/_from_values.py,sha256=SZ4xdCEWn2N9sNBBBeygqL04AnTk9_bI8etBdwYH3D4,12421
10
- lamindb/_logger.py,sha256=d1jQdFL4gMKbzcc1dMOgKoEaUlWhyK-VsnxYLlG9caY,48
11
- lamindb/_save.py,sha256=mf1sPsLYRiGzZPLebWW_THLqO62DRcFnwuUSLEy11tU,6965
12
- lamindb/_select.py,sha256=AVlei_ije5tjioHQ1mbDmMc4t4hQQl8mIhxuED6VZII,768
13
- lamindb/_transform_methods.py,sha256=EG87wlUffGI_-N92Qu-qklNRs4hTEeHpEtspYIVGKtc,911
14
- lamindb/_view.py,sha256=U_6fx4nHwrnJqnHOiwh0k4hZ4SOq7BMb6HWBAm28Ifw,1467
15
- lamindb/types.py,sha256=cKenRTHukqxNb_7dmgYnB-ek8qkkUH0LKWkEVuP82PM,163
16
- lamindb/dev/__init__.py,sha256=fIbiAuzGgwumZomc1UsiG3aks9issMDbVQB5abGaJcI,276
17
- lamindb/dev/_settings.py,sha256=052gxuMIWmdgob5N0ptmF-u255EteIkZaMIpYFaeh8M,2480
18
- lamindb/dev/hashing.py,sha256=J13tGFYDrkSXOQLU1zlswwl8G5MsOcDGc5HeVqSsKa0,848
19
- lamindb/dev/datasets/__init__.py,sha256=zVWsoZj313Ho1MIw-d7dzLOllQu2e7oIicwx5MZa_qo,751
20
- lamindb/dev/datasets/_core.py,sha256=P-0X79Z645bZ7K6SyjJXpNJlwAlkKE4l4jxdayTB2kw,8907
21
- lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
22
- lamindb/dev/storage/__init__.py,sha256=vaoHMbkQ6RSDlOKgtOrN9QC3dSKwmIqvIcxu3JRv008,431
23
- lamindb/dev/storage/_anndata_sizes.py,sha256=OOM9mJmhvho5JacsuMsHTXoWfvF0vjxRvg_Pi9VkAo4,730
24
- lamindb/dev/storage/_backed_access.py,sha256=3F1-SgdNUwLUfpEtM68B1VvwMhwXhk5bkTTfZq-UB6M,15503
25
- lamindb/dev/storage/_file.py,sha256=so8p4CXRDgFMcapi8pF3cuGExtcz6_-NVMO5m_idKhI,4182
26
- lamindb/dev/storage/_object.py,sha256=z42LBQb_zICJrY7vXI2GrBzpnDN3WhFmW32STfAgzis,900
27
- lamindb/dev/storage/_zarr.py,sha256=7W1Jos1QOOF3f41uML_arQoDTNPZVpRyP2m3SLWaCAo,2766
28
- lamindb/schema/__init__.py,sha256=PznznlFvbeNSZKpn1RS6Gv0JMXFkLmU2_ej_1hVLSTs,796
29
- lamindb/schema/_core.py,sha256=nWR3X_rNd1AbWw3naMiBi8ppAEpqIDyEYqM54feRB_s,766
30
- lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
31
- lamindb/setup/dev/__init__.py,sha256=iD0f2lx_Hgp-udkiPGal7si5waJSOgvnG6Id-g1mMOY,213
32
- lamindb-0.45.0.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
33
- lamindb-0.45.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- lamindb-0.45.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
35
- lamindb-0.45.0.dist-info/METADATA,sha256=LrSn5Gc6mSiWUB2VYWO3yPIN8FJ2J70oNWe8U7mSX1Y,10835
36
- lamindb-0.45.0.dist-info/RECORD,,
File without changes