lamindb 0.69.9__py3-none-any.whl → 0.70.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +461 -126
- lamindb/_artifact.py +69 -20
- lamindb/_can_validate.py +13 -18
- lamindb/_collection.py +48 -44
- lamindb/_feature_set.py +20 -8
- lamindb/_finish.py +28 -42
- lamindb/_from_values.py +23 -17
- lamindb/_registry.py +7 -2
- lamindb/core/__init__.py +16 -4
- lamindb/core/_data.py +22 -16
- lamindb/core/_feature_manager.py +80 -25
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +106 -52
- lamindb/core/_run_context.py +0 -1
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/_core.py +42 -2
- lamindb/core/storage/_backed_access.py +8 -4
- lamindb/core/storage/file.py +9 -0
- lamindb/core/storage/object.py +19 -0
- lamindb/integrations/_vitessce.py +18 -9
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/METADATA +7 -8
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/RECORD +25 -25
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/LICENSE +0 -0
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/WHEEL +0 -0
@@ -11,7 +11,9 @@ from lamin_utils import logger
|
|
11
11
|
from lamindb_setup.core.upath import UPath
|
12
12
|
|
13
13
|
from .storage._backed_access import (
|
14
|
+
ArrayType,
|
14
15
|
ArrayTypes,
|
16
|
+
GroupType,
|
15
17
|
GroupTypes,
|
16
18
|
StorageType,
|
17
19
|
_safer_read_index,
|
@@ -55,6 +57,12 @@ class MappedCollection:
|
|
55
57
|
If your `AnnData` collection is in the cloud, move them into a local cache
|
56
58
|
first for faster access.
|
57
59
|
|
60
|
+
`__getitem__` of the `MappedCollection` object takes a single integer index
|
61
|
+
and returns a dictionary with the observation data sample for this index from
|
62
|
+
the `AnnData` objects in `path_list`. The dictionary has keys for `layers_keys`
|
63
|
+
(`.X` is in `"X"`), `obs_keys`, `obsm_keys` (under `f"obsm_{key}"`) and also `"_store_idx"`
|
64
|
+
for the index of the `AnnData` object containing this observation sample.
|
65
|
+
|
58
66
|
.. note::
|
59
67
|
|
60
68
|
For a guide, see :doc:`docs:scrna5`.
|
@@ -70,23 +78,28 @@ class MappedCollection:
|
|
70
78
|
|
71
79
|
Args:
|
72
80
|
path_list: A list of paths to `AnnData` objects stored in `.h5ad` or `.zarr` formats.
|
73
|
-
|
81
|
+
layers_keys: Keys from the ``.layers`` slot. ``layers_keys=None`` or ``"X"`` in the list
|
82
|
+
retrieves ``.X``.
|
83
|
+
obsm_keys: Keys from the ``.obsm`` slots.
|
84
|
+
obs_keys: Keys from the ``.obs`` slots.
|
74
85
|
join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
|
75
86
|
does not join.
|
76
87
|
encode_labels: Encode labels into integers.
|
77
|
-
Can be a list with elements from ``
|
88
|
+
Can be a list with elements from ``obs_keys``.
|
78
89
|
unknown_label: Encode this label to -1.
|
79
|
-
Can be a dictionary with keys from ``
|
90
|
+
Can be a dictionary with keys from ``obs_keys`` if ``encode_labels=True``
|
80
91
|
or from ``encode_labels`` if it is a list.
|
81
|
-
cache_categories: Enable caching categories of ``
|
92
|
+
cache_categories: Enable caching categories of ``obs_keys`` for faster access.
|
82
93
|
parallel: Enable sampling with multiple processes.
|
83
|
-
dtype: Convert numpy arrays from ``.X``
|
94
|
+
dtype: Convert numpy arrays from ``.X``, ``.layers`` and ``.obsm``
|
84
95
|
"""
|
85
96
|
|
86
97
|
def __init__(
|
87
98
|
self,
|
88
99
|
path_list: list[UPathStr],
|
89
|
-
|
100
|
+
layers_keys: str | list[str] | None = None,
|
101
|
+
obs_keys: str | list[str] | None = None,
|
102
|
+
obsm_keys: str | list[str] | None = None,
|
90
103
|
join: Literal["inner", "outer"] | None = "inner",
|
91
104
|
encode_labels: bool | list[str] = True,
|
92
105
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -96,27 +109,37 @@ class MappedCollection:
|
|
96
109
|
):
|
97
110
|
assert join in {None, "inner", "outer"}
|
98
111
|
|
99
|
-
|
100
|
-
|
112
|
+
if layers_keys is None:
|
113
|
+
self.layers_keys = ["X"]
|
114
|
+
else:
|
115
|
+
self.layers_keys = (
|
116
|
+
[layers_keys] if isinstance(layers_keys, str) else layers_keys
|
117
|
+
)
|
118
|
+
|
119
|
+
obsm_keys = [obsm_keys] if isinstance(obsm_keys, str) else obsm_keys
|
120
|
+
self.obsm_keys = obsm_keys
|
121
|
+
|
122
|
+
obs_keys = [obs_keys] if isinstance(obs_keys, str) else obs_keys
|
123
|
+
self.obs_keys = obs_keys
|
101
124
|
|
102
125
|
if isinstance(encode_labels, list):
|
103
126
|
if len(encode_labels) == 0:
|
104
127
|
encode_labels = False
|
105
|
-
elif
|
106
|
-
enc_label in
|
128
|
+
elif obs_keys is None or not all(
|
129
|
+
enc_label in obs_keys for enc_label in encode_labels
|
107
130
|
):
|
108
131
|
raise ValueError(
|
109
|
-
"All elements of `encode_labels` should be in `
|
132
|
+
"All elements of `encode_labels` should be in `obs_keys`."
|
110
133
|
)
|
111
134
|
else:
|
112
135
|
if encode_labels:
|
113
|
-
encode_labels =
|
136
|
+
encode_labels = obs_keys if obs_keys is not None else False
|
114
137
|
self.encode_labels = encode_labels
|
115
138
|
|
116
139
|
if encode_labels and isinstance(unknown_label, dict):
|
117
140
|
if not all(unkey in encode_labels for unkey in unknown_label): # type: ignore
|
118
141
|
raise ValueError(
|
119
|
-
"All keys of `unknown_label` should be in `encode_labels` and `
|
142
|
+
"All keys of `unknown_label` should be in `encode_labels` and `obs_keys`."
|
120
143
|
)
|
121
144
|
self.unknown_label = unknown_label
|
122
145
|
|
@@ -141,12 +164,16 @@ class MappedCollection:
|
|
141
164
|
|
142
165
|
self.join_vars = join
|
143
166
|
self.var_indices = None
|
167
|
+
self.var_joint = None
|
168
|
+
self.n_vars_list = None
|
169
|
+
self.n_vars = None
|
144
170
|
if self.join_vars is not None:
|
145
171
|
self._make_join_vars()
|
172
|
+
self.n_vars = len(self.var_joint)
|
146
173
|
|
147
|
-
if self.
|
174
|
+
if self.obs_keys is not None:
|
148
175
|
if cache_categories:
|
149
|
-
self._cache_categories(self.
|
176
|
+
self._cache_categories(self.obs_keys)
|
150
177
|
else:
|
151
178
|
self._cache_cats: dict = {}
|
152
179
|
self.encoders: dict = {}
|
@@ -169,10 +196,10 @@ class MappedCollection:
|
|
169
196
|
self.conns.append(conn)
|
170
197
|
self.storages.append(storage)
|
171
198
|
|
172
|
-
def _cache_categories(self,
|
199
|
+
def _cache_categories(self, obs_keys: list):
|
173
200
|
self._cache_cats = {}
|
174
201
|
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
175
|
-
for label in
|
202
|
+
for label in obs_keys:
|
176
203
|
self._cache_cats[label] = []
|
177
204
|
for storage in self.storages:
|
178
205
|
with _Connect(storage) as store:
|
@@ -197,11 +224,13 @@ class MappedCollection:
|
|
197
224
|
|
198
225
|
def _make_join_vars(self):
|
199
226
|
var_list = []
|
227
|
+
self.n_vars_list = []
|
200
228
|
for storage in self.storages:
|
201
229
|
with _Connect(storage) as store:
|
202
|
-
|
230
|
+
vars = _safer_read_index(store["var"])
|
231
|
+
var_list.append(vars)
|
232
|
+
self.n_vars_list.append(len(vars))
|
203
233
|
|
204
|
-
self.var_joint = None
|
205
234
|
vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
|
206
235
|
if vars_eq:
|
207
236
|
self.join_vars = None
|
@@ -223,6 +252,20 @@ class MappedCollection:
|
|
223
252
|
def __len__(self):
|
224
253
|
return self.n_obs
|
225
254
|
|
255
|
+
@property
|
256
|
+
def shape(self):
|
257
|
+
"""Shape of the (virtually aligned) dataset."""
|
258
|
+
return (self.n_obs, self.n_vars)
|
259
|
+
|
260
|
+
@property
|
261
|
+
def original_shapes(self):
|
262
|
+
"""Shapes of the underlying AnnData objects."""
|
263
|
+
if self.n_vars_list is None:
|
264
|
+
n_vars_list = [None] * len(self.n_obs_list)
|
265
|
+
else:
|
266
|
+
n_vars_list = self.n_vars_list
|
267
|
+
return list(zip(self.n_obs_list, n_vars_list))
|
268
|
+
|
226
269
|
def __getitem__(self, idx: int):
|
227
270
|
obs_idx = self.indices[idx]
|
228
271
|
storage_idx = self.storage_idx[idx]
|
@@ -232,17 +275,28 @@ class MappedCollection:
|
|
232
275
|
var_idxs_join = None
|
233
276
|
|
234
277
|
with _Connect(self.storages[storage_idx]) as store:
|
235
|
-
out = {
|
236
|
-
|
237
|
-
|
238
|
-
|
278
|
+
out = {}
|
279
|
+
for layers_key in self.layers_keys:
|
280
|
+
lazy_data = (
|
281
|
+
store["X"] if layers_key == "X" else store["layers"][layers_key]
|
282
|
+
)
|
283
|
+
out[layers_key] = self._get_data_idx(
|
284
|
+
lazy_data, obs_idx, self.join_vars, var_idxs_join, self.n_vars
|
285
|
+
)
|
286
|
+
if self.obsm_keys is not None:
|
287
|
+
for obsm_key in self.obsm_keys:
|
288
|
+
lazy_data = store["obsm"][obsm_key]
|
289
|
+
out[f"obsm_{obsm_key}"] = self._get_data_idx(lazy_data, obs_idx)
|
290
|
+
out["_store_idx"] = storage_idx
|
291
|
+
if self.obs_keys is not None:
|
292
|
+
for label in self.obs_keys:
|
239
293
|
if label in self._cache_cats:
|
240
294
|
cats = self._cache_cats[label][storage_idx]
|
241
295
|
if cats is None:
|
242
296
|
cats = []
|
243
297
|
else:
|
244
298
|
cats = None
|
245
|
-
label_idx = self.
|
299
|
+
label_idx = self._get_obs_idx(store, obs_idx, label, cats)
|
246
300
|
if label in self.encoders:
|
247
301
|
label_idx = self.encoders[label][label_idx]
|
248
302
|
out[label] = label_idx
|
@@ -250,46 +304,46 @@ class MappedCollection:
|
|
250
304
|
|
251
305
|
def _get_data_idx(
|
252
306
|
self,
|
253
|
-
|
307
|
+
lazy_data: ArrayType | GroupType, # type: ignore
|
254
308
|
idx: int,
|
309
|
+
join_vars: Literal["inner", "outer"] | None = None,
|
255
310
|
var_idxs_join: list | None = None,
|
256
|
-
|
311
|
+
n_vars_out: int | None = None,
|
257
312
|
):
|
258
313
|
"""Get the index for the data."""
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
result = layer_idx
|
314
|
+
if isinstance(lazy_data, ArrayTypes): # type: ignore
|
315
|
+
lazy_data_idx = lazy_data[idx] # type: ignore
|
316
|
+
if join_vars is None:
|
317
|
+
result = lazy_data_idx
|
264
318
|
if self._dtype is not None:
|
265
319
|
result = result.astype(self._dtype, copy=False)
|
266
|
-
elif
|
267
|
-
dtype =
|
268
|
-
result = np.zeros(
|
269
|
-
result[var_idxs_join] =
|
320
|
+
elif join_vars == "outer":
|
321
|
+
dtype = lazy_data_idx.dtype if self._dtype is None else self._dtype
|
322
|
+
result = np.zeros(n_vars_out, dtype=dtype)
|
323
|
+
result[var_idxs_join] = lazy_data_idx
|
270
324
|
else: # inner join
|
271
|
-
result =
|
325
|
+
result = lazy_data_idx[var_idxs_join]
|
272
326
|
if self._dtype is not None:
|
273
327
|
result = result.astype(self._dtype, copy=False)
|
274
328
|
return result
|
275
329
|
else: # assume csr_matrix here
|
276
|
-
data =
|
277
|
-
indices =
|
278
|
-
indptr =
|
330
|
+
data = lazy_data["data"] # type: ignore
|
331
|
+
indices = lazy_data["indices"] # type: ignore
|
332
|
+
indptr = lazy_data["indptr"] # type: ignore
|
279
333
|
s = slice(*(indptr[idx : idx + 2]))
|
280
334
|
data_s = data[s]
|
281
335
|
dtype = data_s.dtype if self._dtype is None else self._dtype
|
282
|
-
if
|
283
|
-
|
284
|
-
|
336
|
+
if join_vars == "outer":
|
337
|
+
lazy_data_idx = np.zeros(n_vars_out, dtype=dtype)
|
338
|
+
lazy_data_idx[var_idxs_join[indices[s]]] = data_s
|
285
339
|
else:
|
286
|
-
|
287
|
-
|
288
|
-
if
|
289
|
-
|
290
|
-
return
|
340
|
+
lazy_data_idx = np.zeros(lazy_data.attrs["shape"][1], dtype=dtype) # type: ignore
|
341
|
+
lazy_data_idx[indices[s]] = data_s
|
342
|
+
if join_vars == "inner":
|
343
|
+
lazy_data_idx = lazy_data_idx[var_idxs_join]
|
344
|
+
return lazy_data_idx
|
291
345
|
|
292
|
-
def
|
346
|
+
def _get_obs_idx(
|
293
347
|
self,
|
294
348
|
storage: StorageType,
|
295
349
|
idx: int,
|
@@ -317,12 +371,12 @@ class MappedCollection:
|
|
317
371
|
label = label.decode("utf-8")
|
318
372
|
return label
|
319
373
|
|
320
|
-
def get_label_weights(self,
|
374
|
+
def get_label_weights(self, obs_keys: str | list[str]):
|
321
375
|
"""Get all weights for the given label keys."""
|
322
|
-
if isinstance(
|
323
|
-
|
376
|
+
if isinstance(obs_keys, str):
|
377
|
+
obs_keys = [obs_keys]
|
324
378
|
labels_list = []
|
325
|
-
for label_key in
|
379
|
+
for label_key in obs_keys:
|
326
380
|
labels_to_str = self.get_merged_labels(label_key).astype(str).astype("O")
|
327
381
|
labels_list.append(labels_to_str)
|
328
382
|
if len(labels_list) > 1:
|
lamindb/core/_run_context.py
CHANGED
lamindb/core/_settings.py
CHANGED
@@ -64,7 +64,7 @@ class Settings:
|
|
64
64
|
FAQ: :doc:`/faq/idempotency`
|
65
65
|
"""
|
66
66
|
track_run_inputs: bool = True
|
67
|
-
"""Track files as input upon `.load()`, `.
|
67
|
+
"""Track files as input upon `.load()`, `.cache()` and `.backed()`.
|
68
68
|
|
69
69
|
Requires a global run context with :func:`~lamindb.track` was created!
|
70
70
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -299,7 +299,7 @@ def anndata_human_immune_cells(
|
|
299
299
|
adata = sc.read('Global.h5ad')
|
300
300
|
adata.obs = adata.obs[['donor_id', 'tissue', 'cell_type', 'assay', 'tissue_ontology_term_id', 'cell_type_ontology_term_id', 'assay_ontology_term_id']].copy()
|
301
301
|
sc.pp.subsample(adata, fraction=0.005)
|
302
|
-
del adata.uns["
|
302
|
+
del adata.uns["development_cache_ontology_term_id_colors"]
|
303
303
|
del adata.uns["sex_ontology_term_id_colors"]
|
304
304
|
adata.write('human_immune.h5ad')
|
305
305
|
"""
|
@@ -401,7 +401,47 @@ def mudata_papalexi21_subset(): # pragma: no cover
|
|
401
401
|
"papalexi21_subset.h5mu",
|
402
402
|
)
|
403
403
|
|
404
|
-
|
404
|
+
mdata = md.read_h5mu(filepath)
|
405
|
+
for mod in ["rna", "adt", "hto", "gdo"]:
|
406
|
+
mdata[mod].obs.drop(
|
407
|
+
mdata[mod].obs.columns, axis=1, inplace=True
|
408
|
+
) # Drop all columns
|
409
|
+
for col in mdata.obs.columns:
|
410
|
+
for mod in ["rna", "adt", "hto", "gdo"]:
|
411
|
+
if col.endswith(f"_{mod.upper()}"):
|
412
|
+
new_col = col.replace(f"{mod}:", "")
|
413
|
+
if new_col != col:
|
414
|
+
mdata[mod].obs[new_col] = mdata.obs.pop(col)
|
415
|
+
else:
|
416
|
+
new_col = col.replace(f"{mod}:", "")
|
417
|
+
if new_col not in mdata.obs.columns and col in mdata.obs.columns:
|
418
|
+
mdata.obs[new_col] = mdata.obs.pop(col)
|
419
|
+
|
420
|
+
for col in mdata.obs.columns:
|
421
|
+
for mod in ["rna", "adt", "hto", "gdo"]:
|
422
|
+
if col.endswith(f"_{mod.upper()}"):
|
423
|
+
del mdata.obs[col]
|
424
|
+
|
425
|
+
for col in [
|
426
|
+
"orig.ident",
|
427
|
+
"MULTI_ID",
|
428
|
+
"NT",
|
429
|
+
"S.Score",
|
430
|
+
"G2M.Score",
|
431
|
+
"Phase",
|
432
|
+
"gene_target",
|
433
|
+
"guide_ID",
|
434
|
+
"HTO_classification",
|
435
|
+
]:
|
436
|
+
del mdata.obs[col]
|
437
|
+
mdata.update()
|
438
|
+
|
439
|
+
mdata["rna"].obs["percent.mito"] = mdata.obs.pop("percent.mito")
|
440
|
+
mdata["hto"].obs["technique"] = "cell hashing"
|
441
|
+
mdata["hto"].obs["technique"] = mdata["hto"].obs["technique"].astype("category")
|
442
|
+
mdata.update()
|
443
|
+
|
444
|
+
return mdata
|
405
445
|
|
406
446
|
|
407
447
|
def df_iris() -> pd.DataFrame:
|
@@ -100,7 +100,7 @@ def _records_to_df(obj):
|
|
100
100
|
return obj
|
101
101
|
|
102
102
|
|
103
|
-
class
|
103
|
+
class AccessRegistry:
|
104
104
|
def __init__(self):
|
105
105
|
self._registry = {}
|
106
106
|
self._openers = {}
|
@@ -141,7 +141,7 @@ class Registry:
|
|
141
141
|
|
142
142
|
|
143
143
|
# storage specific functions should be registered and called through the registry
|
144
|
-
registry =
|
144
|
+
registry = AccessRegistry()
|
145
145
|
|
146
146
|
|
147
147
|
@registry.register_open("h5py")
|
@@ -176,8 +176,12 @@ def safer_read_partial(elem, indices):
|
|
176
176
|
indices_increasing = []
|
177
177
|
indices_inverse = []
|
178
178
|
for indices_dim in indices:
|
179
|
-
|
180
|
-
|
179
|
+
# should be integer or bool
|
180
|
+
# ignore bool or increasing unique integers
|
181
|
+
if (
|
182
|
+
isinstance(indices_dim, np.ndarray)
|
183
|
+
and indices_dim.dtype != "bool"
|
184
|
+
and not np.all(np.diff(indices_dim) > 0)
|
181
185
|
):
|
182
186
|
idx_unique, idx_inverse = np.unique(indices_dim, return_inverse=True)
|
183
187
|
indices_increasing.append(idx_unique)
|
lamindb/core/storage/file.py
CHANGED
@@ -22,6 +22,7 @@ from lnschema_core.models import Artifact, Storage
|
|
22
22
|
from lamindb.core._settings import settings
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
|
+
import mudata as md
|
25
26
|
from lamindb_setup.core.types import UPathStr
|
26
27
|
|
27
28
|
try:
|
@@ -170,6 +171,13 @@ def read_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
|
170
171
|
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
171
172
|
|
172
173
|
|
174
|
+
def read_mdata_h5mu(filepath: UPathStr, **kwargs) -> md.MuData:
|
175
|
+
import mudata as md
|
176
|
+
|
177
|
+
path_sanitized = Path(filepath)
|
178
|
+
return md.read_h5mu(path_sanitized, **kwargs)
|
179
|
+
|
180
|
+
|
173
181
|
def load_html(path: UPathStr):
|
174
182
|
if is_run_from_ipython:
|
175
183
|
with open(path, encoding="utf-8") as f:
|
@@ -224,6 +232,7 @@ def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
|
|
224
232
|
".zrad": read_adata_zarr,
|
225
233
|
".html": load_html,
|
226
234
|
".json": load_json,
|
235
|
+
".h5mu": read_mdata_h5mu,
|
227
236
|
}
|
228
237
|
|
229
238
|
reader = READER_FUNCS.get(filepath.suffix)
|
lamindb/core/storage/object.py
CHANGED
@@ -9,6 +9,14 @@ if TYPE_CHECKING:
|
|
9
9
|
from lamindb_setup.core.types import UPathStr
|
10
10
|
|
11
11
|
|
12
|
+
def _mudata_is_installed():
|
13
|
+
try:
|
14
|
+
import mudata
|
15
|
+
except ImportError:
|
16
|
+
return False
|
17
|
+
return True
|
18
|
+
|
19
|
+
|
12
20
|
def infer_suffix(dmem, adata_format: str | None = None):
|
13
21
|
"""Infer LaminDB storage file suffix from a data object."""
|
14
22
|
if isinstance(dmem, AnnData):
|
@@ -25,6 +33,11 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
25
33
|
elif isinstance(dmem, DataFrame):
|
26
34
|
return ".parquet"
|
27
35
|
else:
|
36
|
+
if _mudata_is_installed():
|
37
|
+
from mudata import MuData
|
38
|
+
|
39
|
+
if isinstance(dmem, MuData):
|
40
|
+
return ".h5mu"
|
28
41
|
raise NotImplementedError
|
29
42
|
|
30
43
|
|
@@ -34,4 +47,10 @@ def write_to_file(dmem, filepath: UPathStr):
|
|
34
47
|
elif isinstance(dmem, DataFrame):
|
35
48
|
dmem.to_parquet(filepath)
|
36
49
|
else:
|
50
|
+
if _mudata_is_installed():
|
51
|
+
from mudata import MuData
|
52
|
+
|
53
|
+
if isinstance(dmem, MuData):
|
54
|
+
dmem.write(filepath)
|
55
|
+
return
|
37
56
|
raise NotImplementedError
|
@@ -2,39 +2,48 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import json
|
4
4
|
from datetime import datetime, timezone
|
5
|
+
from typing import TYPE_CHECKING
|
5
6
|
|
6
7
|
import lamindb_setup as ln_setup
|
7
8
|
from lamin_utils import logger
|
8
9
|
|
9
10
|
from lamindb._artifact import Artifact
|
10
11
|
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from vitessce import VitessceConfig
|
14
|
+
|
11
15
|
|
12
|
-
# tested in lamin-spatial
|
13
|
-
|
14
|
-
def save_vitessce_config(vitessce_config, description: str) -> Artifact:
|
16
|
+
# tested & context in https://github.com/laminlabs/lamin-spatial
|
17
|
+
def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> Artifact:
|
15
18
|
"""Takes a ``VitessceConfig`` object and saves it as an artifact.
|
16
19
|
|
17
20
|
Args:
|
18
21
|
vitessce_config (``VitessceConfig``): A VitessceConfig object.
|
19
22
|
description: A description for the artifact.
|
20
23
|
"""
|
24
|
+
# can't assume vitessce is installed
|
21
25
|
from vitessce import VitessceConfig
|
22
26
|
|
23
|
-
|
27
|
+
# create a local _data export_ in a folder
|
24
28
|
timestamp = datetime.now(timezone.utc).isoformat().split(".")[0]
|
25
|
-
|
26
|
-
vitessce_config.export(to="files", base_url="", out_dir=
|
27
|
-
logger.important(f"local export: {
|
28
|
-
artifact
|
29
|
+
vitesse_export_folder = f"./vitessce_export_{timestamp}.vitessce"
|
30
|
+
vitessce_config.export(to="files", base_url="", out_dir=vitesse_export_folder)
|
31
|
+
logger.important(f"local export: {vitesse_export_folder}")
|
32
|
+
# create an artifact and store the local export in th cloud
|
33
|
+
artifact = Artifact(vitesse_export_folder, description=description)
|
29
34
|
artifact.save()
|
35
|
+
# create a JSON export that points to the data in the cloud
|
30
36
|
config_dict = vitessce_config.to_dict(base_url=artifact.path.to_url())
|
31
37
|
logger.important(f"base url: {artifact.path.to_url()}")
|
38
|
+
# manually place that JSON export into the local data export folder
|
32
39
|
config_filename = "vitessce_config.json"
|
33
|
-
config_file_local_path = f"{
|
40
|
+
config_file_local_path = f"{vitesse_export_folder}/{config_filename}"
|
34
41
|
with open(config_file_local_path, "w") as file:
|
35
42
|
json.dump(config_dict, file)
|
43
|
+
# manually place that JSON export into the previously registered artifact folder
|
36
44
|
config_file_path = artifact.path / config_filename
|
37
45
|
config_file_path.upload_from(config_file_local_path)
|
46
|
+
# log the the URLs
|
38
47
|
logger.important(f"config url: {config_file_path.to_url()}")
|
39
48
|
slug = ln_setup.settings.instance.slug
|
40
49
|
logger.important(f"go to: https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.70.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,15 +9,15 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.69.
|
12
|
+
Requires-Dist: lnschema_core==0.65.0
|
13
|
+
Requires-Dist: lamindb_setup==0.69.2
|
14
14
|
Requires-Dist: lamin_utils==0.13.1
|
15
|
-
Requires-Dist: lamin_cli==0.12.
|
15
|
+
Requires-Dist: lamin_cli==0.12.3
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
19
19
|
Requires-Dist: python-dateutil
|
20
|
-
Requires-Dist: anndata>=0.8.0,<0.10.
|
20
|
+
Requires-Dist: anndata>=0.8.0,<0.10.8
|
21
21
|
Requires-Dist: scipy<1.13.0rc1
|
22
22
|
Requires-Dist: fsspec
|
23
23
|
Requires-Dist: pandas
|
@@ -27,7 +27,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
|
|
27
27
|
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
28
28
|
Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
|
29
29
|
Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
|
30
|
-
Requires-Dist: bionty==0.42.
|
30
|
+
Requires-Dist: bionty==0.42.8 ; extra == "bionty"
|
31
31
|
Requires-Dist: pandas<2 ; extra == "dev"
|
32
32
|
Requires-Dist: pre-commit ; extra == "dev"
|
33
33
|
Requires-Dist: nox ; extra == "dev"
|
@@ -37,7 +37,6 @@ Requires-Dist: coverage ; extra == "dev"
|
|
37
37
|
Requires-Dist: pytest-cov ; extra == "dev"
|
38
38
|
Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
|
39
39
|
Requires-Dist: faker-biology ; extra == "dev"
|
40
|
-
Requires-Dist: lamin_vault ; extra == "dev"
|
41
40
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
42
41
|
Requires-Dist: readfcs>=1.1.7 ; extra == "fcs"
|
43
42
|
Requires-Dist: fsspec[gs]==2023.12.2 ; extra == "gcp"
|
@@ -65,7 +64,7 @@ Provides-Extra: zarr
|
|
65
64
|
- Track data lineage across notebooks & pipelines.
|
66
65
|
- Integrate registries for experimental metadata & in-house ontologies.
|
67
66
|
- Validate, standardize & annotate.
|
68
|
-
- Collaborate across distributed
|
67
|
+
- Collaborate across distributed databases.
|
69
68
|
|
70
69
|
## Documentation
|
71
70
|
|