lamindb 0.45.0__py3-none-any.whl → 0.46a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -9
- lamindb/_context.py +11 -12
- lamindb/_dataset.py +142 -0
- lamindb/_delete.py +6 -6
- lamindb/_feature_set.py +138 -0
- lamindb/_file.py +322 -81
- lamindb/_from_values.py +57 -160
- lamindb/_orm.py +398 -0
- lamindb/_save.py +26 -10
- lamindb/_select.py +3 -3
- lamindb/_view.py +2 -2
- lamindb/dev/__init__.py +2 -2
- lamindb/dev/_settings.py +2 -1
- lamindb/dev/datasets/__init__.py +6 -0
- lamindb/dev/datasets/_core.py +30 -0
- lamindb/dev/hashing.py +4 -0
- lamindb/dev/storage/__init__.py +4 -3
- lamindb/dev/storage/_backed_access.py +3 -3
- lamindb/dev/storage/{_file.py → file.py} +48 -3
- lamindb/dev/storage/{_object.py → object.py} +1 -0
- lamindb/dev/utils.py +9 -0
- lamindb/types.py +9 -1
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/METADATA +20 -17
- lamindb-0.46a1.dist-info/RECORD +36 -0
- lamindb/_baseorm_methods.py +0 -535
- lamindb/_featureset_methods.py +0 -73
- lamindb/_file_access.py +0 -48
- lamindb/_file_methods.py +0 -319
- lamindb-0.45.0.dist-info/RECORD +0 -36
- /lamindb/{_transform_methods.py → _transform.py} +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/LICENSE +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/WHEEL +0 -0
- {lamindb-0.45.0.dist-info → lamindb-0.46a1.dist-info}/entry_points.txt +0 -0
lamindb/_file_methods.py
DELETED
@@ -1,319 +0,0 @@
|
|
1
|
-
from itertools import islice
|
2
|
-
from pathlib import Path
|
3
|
-
from typing import Optional, Union, overload # noqa
|
4
|
-
|
5
|
-
from lamin_logger import colors, logger
|
6
|
-
from lamindb_setup import settings as setup_settings
|
7
|
-
from lnschema_core.models import File, Run
|
8
|
-
from lnschema_core.types import DataLike, PathLike
|
9
|
-
from upath import UPath
|
10
|
-
|
11
|
-
from lamindb._context import context
|
12
|
-
from lamindb._file import from_dir, init_file, replace_file
|
13
|
-
from lamindb._file_access import filepath_from_file
|
14
|
-
from lamindb.dev._settings import settings
|
15
|
-
from lamindb.dev.storage import delete_storage, load_to_memory
|
16
|
-
|
17
|
-
try:
|
18
|
-
from lamindb.dev.storage._backed_access import AnnDataAccessor, BackedAccessor
|
19
|
-
except ImportError:
|
20
|
-
|
21
|
-
class AnnDataAccessor: # type: ignore
|
22
|
-
pass
|
23
|
-
|
24
|
-
class BackedAccessor: # type: ignore
|
25
|
-
pass
|
26
|
-
|
27
|
-
|
28
|
-
File.__doc__ = """Files: data artifacts.
|
29
|
-
|
30
|
-
Args:
|
31
|
-
data: `Union[PathLike, DataLike]` A file path or an in-memory data
|
32
|
-
object (`DataFrame`, `AnnData`) to serialize. Can be a cloud path, e.g.,
|
33
|
-
`"s3://my-bucket/my_samples/my_file.fcs"`.
|
34
|
-
key: `Optional[str] = None` A storage key: a relative filepath within the
|
35
|
-
current default storage, e.g., `"my_samples/my_file.fcs"`.
|
36
|
-
name: `Optional[str] = None` A name or title. Useful if key is auto-generated.
|
37
|
-
run: `Optional[Run] = None` The run that created the file, gets auto-linked
|
38
|
-
if `ln.track()` was called.
|
39
|
-
|
40
|
-
Track where files come from by passing the generating :class:`~lamindb.Run`.
|
41
|
-
|
42
|
-
Often, files store jointly measured observations of features: track them
|
43
|
-
with :class:`~lamindb.FeatureSet`.
|
44
|
-
|
45
|
-
If files have corresponding representations in storage and memory, LaminDB
|
46
|
-
makes some configurable default choices (e.g., serialize a `DataFrame` as a
|
47
|
-
`.parquet` file).
|
48
|
-
|
49
|
-
.. admonition:: Examples for storage-memory correspondence
|
50
|
-
|
51
|
-
Listed are typical `suffix` values & in memory data objects.
|
52
|
-
|
53
|
-
- Table: `.csv`, `.tsv`, `.parquet`, `.ipc`
|
54
|
-
⟷ `pd.DataFrame`, `polars.DataFrame`
|
55
|
-
- Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
|
56
|
-
- Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
|
57
|
-
- Array: zarr directory, TileDB store ⟷ zarr loader, TileDB loader
|
58
|
-
- Fastq: `.fastq` ⟷ /
|
59
|
-
- VCF: `.vcf` ⟷ /
|
60
|
-
- QC: `.html` ⟷ /
|
61
|
-
|
62
|
-
.. note::
|
63
|
-
|
64
|
-
In some cases (`.zarr`), a `File` is present as many small objects in what
|
65
|
-
appears to be a "folder" in storage. Hence, we often refer to files as data
|
66
|
-
artifacts.
|
67
|
-
|
68
|
-
"""
|
69
|
-
|
70
|
-
|
71
|
-
def backed(
|
72
|
-
file: File, is_run_input: Optional[bool] = None
|
73
|
-
) -> Union[AnnDataAccessor, BackedAccessor]:
|
74
|
-
"""Return a cloud-backed data object to stream."""
|
75
|
-
suffixes = (".h5", ".hdf5", ".h5ad", ".zrad", ".zarr")
|
76
|
-
if file.suffix not in suffixes:
|
77
|
-
raise ValueError(
|
78
|
-
"File should have a zarr or h5 object as the underlying data, please use"
|
79
|
-
" one of the following suffixes for the object name:"
|
80
|
-
f" {', '.join(suffixes)}."
|
81
|
-
)
|
82
|
-
_track_run_input(file, is_run_input)
|
83
|
-
from lamindb.dev.storage._backed_access import backed_access
|
84
|
-
|
85
|
-
return backed_access(file)
|
86
|
-
|
87
|
-
|
88
|
-
def _track_run_input(file: File, is_run_input: Optional[bool] = None):
|
89
|
-
if is_run_input is None:
|
90
|
-
if context.run is not None and not settings.track_run_inputs:
|
91
|
-
logger.hint("Track this file as a run input by passing `is_run_input=True`")
|
92
|
-
track_run_input = settings.track_run_inputs
|
93
|
-
else:
|
94
|
-
track_run_input = is_run_input
|
95
|
-
if track_run_input:
|
96
|
-
if context.run is None:
|
97
|
-
raise ValueError(
|
98
|
-
"No global run context set. Call ln.context.track() or link input to a"
|
99
|
-
" run object via `run.inputs.append(file)`"
|
100
|
-
)
|
101
|
-
if not file.input_of.contains(context.run):
|
102
|
-
context.run.save()
|
103
|
-
file.input_of.add(context.run)
|
104
|
-
|
105
|
-
|
106
|
-
def load(
|
107
|
-
file: File, is_run_input: Optional[bool] = None, stream: bool = False
|
108
|
-
) -> DataLike:
|
109
|
-
"""Stage and load to memory.
|
110
|
-
|
111
|
-
Returns in-memory representation if possible, e.g., an `AnnData` object
|
112
|
-
for an `h5ad` file.
|
113
|
-
"""
|
114
|
-
_track_run_input(file, is_run_input)
|
115
|
-
return load_to_memory(filepath_from_file(file), stream=stream)
|
116
|
-
|
117
|
-
|
118
|
-
def stage(file: File, is_run_input: Optional[bool] = None) -> Path:
|
119
|
-
"""Update cache from cloud storage if outdated.
|
120
|
-
|
121
|
-
Returns a path to a locally cached on-disk object (say, a
|
122
|
-
`.jpg` file).
|
123
|
-
"""
|
124
|
-
if file.suffix in (".zrad", ".zarr"):
|
125
|
-
raise RuntimeError("zarr object can't be staged, please use load() or stream()")
|
126
|
-
_track_run_input(file, is_run_input)
|
127
|
-
return setup_settings.instance.storage.cloud_to_local(filepath_from_file(file))
|
128
|
-
|
129
|
-
|
130
|
-
def delete(file, storage: Optional[bool] = None) -> None:
|
131
|
-
"""Delete file, optionall from storage.
|
132
|
-
|
133
|
-
Args:
|
134
|
-
storage: `Optional[bool] = None` Indicate whether you want to delete the
|
135
|
-
file in storage.
|
136
|
-
|
137
|
-
Example:
|
138
|
-
|
139
|
-
For any `File` object `file`, call:
|
140
|
-
|
141
|
-
>>> file.delete(storage=True) # storage=True auto-confirms deletion in storage
|
142
|
-
"""
|
143
|
-
if storage is None:
|
144
|
-
response = input(f"Are you sure you want to delete {file} from storage? (y/n)")
|
145
|
-
if response == "y":
|
146
|
-
delete_in_storage = True
|
147
|
-
else:
|
148
|
-
delete_in_storage = storage
|
149
|
-
if delete_in_storage:
|
150
|
-
filepath = file.path()
|
151
|
-
delete_storage(filepath)
|
152
|
-
logger.success(f"Deleted stored object {colors.yellow(f'{filepath}')}")
|
153
|
-
file._delete_skip_storage()
|
154
|
-
|
155
|
-
|
156
|
-
def _delete_skip_storage(file, *args, **kwargs) -> None:
|
157
|
-
super(File, file).delete(*args, **kwargs)
|
158
|
-
|
159
|
-
|
160
|
-
def save(file, *args, **kwargs) -> None:
|
161
|
-
"""Save the file to database & storage."""
|
162
|
-
file._save_skip_storage(*args, **kwargs)
|
163
|
-
from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
|
164
|
-
|
165
|
-
exception = check_and_attempt_upload(file)
|
166
|
-
if exception is not None:
|
167
|
-
file._delete_skip_storage()
|
168
|
-
raise RuntimeError(exception)
|
169
|
-
exception = check_and_attempt_clearing(file)
|
170
|
-
if exception is not None:
|
171
|
-
raise RuntimeError(exception)
|
172
|
-
|
173
|
-
|
174
|
-
def _save_skip_storage(file, *args, **kwargs) -> None:
|
175
|
-
if file.transform is not None:
|
176
|
-
file.transform.save()
|
177
|
-
if file.run is not None:
|
178
|
-
file.run.save()
|
179
|
-
super(File, file).save(*args, **kwargs)
|
180
|
-
|
181
|
-
|
182
|
-
def path(self) -> Union[Path, UPath]:
|
183
|
-
"""Path on storage."""
|
184
|
-
from lamindb._file_access import filepath_from_file
|
185
|
-
|
186
|
-
return filepath_from_file(self)
|
187
|
-
|
188
|
-
|
189
|
-
# adapted from: https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python # noqa
|
190
|
-
@classmethod # type: ignore
|
191
|
-
def tree(
|
192
|
-
cls: File,
|
193
|
-
prefix: Optional[str] = None,
|
194
|
-
*,
|
195
|
-
level: int = -1,
|
196
|
-
limit_to_directories: bool = False,
|
197
|
-
length_limit: int = 1000,
|
198
|
-
):
|
199
|
-
"""Given a prefix, print a visual tree structure of files."""
|
200
|
-
space = " "
|
201
|
-
branch = "│ "
|
202
|
-
tee = "├── "
|
203
|
-
last = "└── "
|
204
|
-
|
205
|
-
if prefix is None:
|
206
|
-
dir_path = settings.storage
|
207
|
-
else:
|
208
|
-
dir_path = settings.storage / prefix
|
209
|
-
files = 0
|
210
|
-
directories = 0
|
211
|
-
|
212
|
-
def inner(dir_path: Union[Path, UPath], prefix: str = "", level=-1):
|
213
|
-
nonlocal files, directories
|
214
|
-
if not level:
|
215
|
-
return # 0, stop iterating
|
216
|
-
stripped_dir_path = dir_path.as_posix().rstrip("/")
|
217
|
-
# do not iterate through zarr directories
|
218
|
-
if stripped_dir_path.endswith((".zarr", ".zrad")):
|
219
|
-
return
|
220
|
-
# this is needed so that the passed folder is not listed
|
221
|
-
contents = [
|
222
|
-
i
|
223
|
-
for i in dir_path.iterdir()
|
224
|
-
if i.as_posix().rstrip("/") != stripped_dir_path
|
225
|
-
]
|
226
|
-
if limit_to_directories:
|
227
|
-
contents = [d for d in contents if d.is_dir()]
|
228
|
-
pointers = [tee] * (len(contents) - 1) + [last]
|
229
|
-
for pointer, path in zip(pointers, contents):
|
230
|
-
if path.is_dir():
|
231
|
-
yield prefix + pointer + path.name
|
232
|
-
directories += 1
|
233
|
-
extension = branch if pointer == tee else space
|
234
|
-
yield from inner(path, prefix=prefix + extension, level=level - 1)
|
235
|
-
elif not limit_to_directories:
|
236
|
-
yield prefix + pointer + path.name
|
237
|
-
files += 1
|
238
|
-
|
239
|
-
folder_tree = f"{dir_path.name}"
|
240
|
-
iterator = inner(dir_path, level=level)
|
241
|
-
for line in islice(iterator, length_limit):
|
242
|
-
folder_tree += f"\n{line}"
|
243
|
-
if next(iterator, None):
|
244
|
-
folder_tree += f"... length_limit, {length_limit}, reached, counted:"
|
245
|
-
print(folder_tree)
|
246
|
-
print(f"\n{directories} directories" + (f", {files} files" if files else ""))
|
247
|
-
|
248
|
-
|
249
|
-
# likely needs an arg `key`
|
250
|
-
def replace(
|
251
|
-
file,
|
252
|
-
data: Union[PathLike, DataLike],
|
253
|
-
run: Optional[Run] = None,
|
254
|
-
format: Optional[str] = None,
|
255
|
-
) -> None:
|
256
|
-
"""Replace file content.
|
257
|
-
|
258
|
-
Args:
|
259
|
-
data: `Union[PathLike, DataLike]` A file path or an in-memory data
|
260
|
-
object (`DataFrame`, `AnnData`).
|
261
|
-
run: `Optional[Run] = None` The run that created the file, gets
|
262
|
-
auto-linked if `ln.track()` was called.
|
263
|
-
|
264
|
-
Examples:
|
265
|
-
|
266
|
-
Say we made a change to the content of a file (e.g., edited the image
|
267
|
-
`paradisi05_laminopathic_nuclei.jpg`).
|
268
|
-
|
269
|
-
This is how we replace the old file in storage with the new file:
|
270
|
-
|
271
|
-
>>> file.replace("paradisi05_laminopathic_nuclei.jpg")
|
272
|
-
>>> file.save()
|
273
|
-
|
274
|
-
Note that this neither changes the storage key nor the filename.
|
275
|
-
|
276
|
-
However, it will update the suffix if the file type changes.
|
277
|
-
"""
|
278
|
-
replace_file(file, data, run, format)
|
279
|
-
|
280
|
-
|
281
|
-
@overload
|
282
|
-
def __init__(
|
283
|
-
file,
|
284
|
-
data: Union[PathLike, DataLike],
|
285
|
-
key: Optional[str] = None,
|
286
|
-
name: Optional[str] = None,
|
287
|
-
run: Optional[Run] = None,
|
288
|
-
):
|
289
|
-
...
|
290
|
-
|
291
|
-
|
292
|
-
@overload
|
293
|
-
def __init__(
|
294
|
-
file,
|
295
|
-
**kwargs,
|
296
|
-
):
|
297
|
-
...
|
298
|
-
|
299
|
-
|
300
|
-
def __init__( # type: ignore
|
301
|
-
file,
|
302
|
-
*args,
|
303
|
-
**kwargs,
|
304
|
-
):
|
305
|
-
init_file(file, *args, **kwargs)
|
306
|
-
|
307
|
-
|
308
|
-
File.backed = backed
|
309
|
-
File.stage = stage
|
310
|
-
File.load = load
|
311
|
-
File.delete = delete
|
312
|
-
File._delete_skip_storage = _delete_skip_storage
|
313
|
-
File.save = save
|
314
|
-
File._save_skip_storage = _save_skip_storage
|
315
|
-
File.replace = replace
|
316
|
-
File.__init__ = __init__
|
317
|
-
File.path = path
|
318
|
-
File.from_dir = from_dir
|
319
|
-
File.tree = tree
|
lamindb-0.45.0.dist-info/RECORD
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
lamindb/__init__.py,sha256=c-k0F_bebM6z2gszM6RDL6Q34hIENEu70GMs4C2-6ME,2344
|
2
|
-
lamindb/_baseorm_methods.py,sha256=AY2t_pXRq_LRgiQwjlW98Q0U5WhY-VD8QArGpK8eOlU,17875
|
3
|
-
lamindb/_context.py,sha256=RToF0A85C86FDXP2X2q6nYHCYN1BuVzZ_DNWuAar4KU,14331
|
4
|
-
lamindb/_delete.py,sha256=V6zDd-9NibHddJMkiiXmHR_Z-ojyfiUlPeN5N4V5H4Q,1277
|
5
|
-
lamindb/_featureset_methods.py,sha256=9UfN3Sm6z2D1niHuZiXrnPZ1aeZdq_6WaOGCKby2Kd8,2151
|
6
|
-
lamindb/_file.py,sha256=8DS7O-7xi9P6jo4pgWheEl5EbMkfAZhe_7WUgxIoY6c,16268
|
7
|
-
lamindb/_file_access.py,sha256=9JE2Z-3rNFWuacBwH1efwEtrBWoK0AzjqifJFCqDcf8,1858
|
8
|
-
lamindb/_file_methods.py,sha256=QNKbPIlp37TW-rtwe28af__DdvtZkp6MyJPIKCSL8cI,10058
|
9
|
-
lamindb/_from_values.py,sha256=SZ4xdCEWn2N9sNBBBeygqL04AnTk9_bI8etBdwYH3D4,12421
|
10
|
-
lamindb/_logger.py,sha256=d1jQdFL4gMKbzcc1dMOgKoEaUlWhyK-VsnxYLlG9caY,48
|
11
|
-
lamindb/_save.py,sha256=mf1sPsLYRiGzZPLebWW_THLqO62DRcFnwuUSLEy11tU,6965
|
12
|
-
lamindb/_select.py,sha256=AVlei_ije5tjioHQ1mbDmMc4t4hQQl8mIhxuED6VZII,768
|
13
|
-
lamindb/_transform_methods.py,sha256=EG87wlUffGI_-N92Qu-qklNRs4hTEeHpEtspYIVGKtc,911
|
14
|
-
lamindb/_view.py,sha256=U_6fx4nHwrnJqnHOiwh0k4hZ4SOq7BMb6HWBAm28Ifw,1467
|
15
|
-
lamindb/types.py,sha256=cKenRTHukqxNb_7dmgYnB-ek8qkkUH0LKWkEVuP82PM,163
|
16
|
-
lamindb/dev/__init__.py,sha256=fIbiAuzGgwumZomc1UsiG3aks9issMDbVQB5abGaJcI,276
|
17
|
-
lamindb/dev/_settings.py,sha256=052gxuMIWmdgob5N0ptmF-u255EteIkZaMIpYFaeh8M,2480
|
18
|
-
lamindb/dev/hashing.py,sha256=J13tGFYDrkSXOQLU1zlswwl8G5MsOcDGc5HeVqSsKa0,848
|
19
|
-
lamindb/dev/datasets/__init__.py,sha256=zVWsoZj313Ho1MIw-d7dzLOllQu2e7oIicwx5MZa_qo,751
|
20
|
-
lamindb/dev/datasets/_core.py,sha256=P-0X79Z645bZ7K6SyjJXpNJlwAlkKE4l4jxdayTB2kw,8907
|
21
|
-
lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
|
22
|
-
lamindb/dev/storage/__init__.py,sha256=vaoHMbkQ6RSDlOKgtOrN9QC3dSKwmIqvIcxu3JRv008,431
|
23
|
-
lamindb/dev/storage/_anndata_sizes.py,sha256=OOM9mJmhvho5JacsuMsHTXoWfvF0vjxRvg_Pi9VkAo4,730
|
24
|
-
lamindb/dev/storage/_backed_access.py,sha256=3F1-SgdNUwLUfpEtM68B1VvwMhwXhk5bkTTfZq-UB6M,15503
|
25
|
-
lamindb/dev/storage/_file.py,sha256=so8p4CXRDgFMcapi8pF3cuGExtcz6_-NVMO5m_idKhI,4182
|
26
|
-
lamindb/dev/storage/_object.py,sha256=z42LBQb_zICJrY7vXI2GrBzpnDN3WhFmW32STfAgzis,900
|
27
|
-
lamindb/dev/storage/_zarr.py,sha256=7W1Jos1QOOF3f41uML_arQoDTNPZVpRyP2m3SLWaCAo,2766
|
28
|
-
lamindb/schema/__init__.py,sha256=PznznlFvbeNSZKpn1RS6Gv0JMXFkLmU2_ej_1hVLSTs,796
|
29
|
-
lamindb/schema/_core.py,sha256=nWR3X_rNd1AbWw3naMiBi8ppAEpqIDyEYqM54feRB_s,766
|
30
|
-
lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
|
31
|
-
lamindb/setup/dev/__init__.py,sha256=iD0f2lx_Hgp-udkiPGal7si5waJSOgvnG6Id-g1mMOY,213
|
32
|
-
lamindb-0.45.0.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
|
33
|
-
lamindb-0.45.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
-
lamindb-0.45.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
35
|
-
lamindb-0.45.0.dist-info/METADATA,sha256=LrSn5Gc6mSiWUB2VYWO3yPIN8FJ2J70oNWe8U7mSX1Y,10835
|
36
|
-
lamindb-0.45.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|