lamindb 1.7.1__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_finish.py +8 -3
- lamindb/core/_context.py +32 -17
- lamindb/core/_settings.py +31 -5
- lamindb/core/datasets/_small.py +50 -20
- lamindb/core/storage/_backed_access.py +36 -28
- lamindb/core/storage/_polars_lazy_df.py +44 -5
- lamindb/core/storage/_tiledbsoma.py +1 -2
- lamindb/curators/__init__.py +0 -6
- lamindb/curators/_legacy.py +1 -579
- lamindb/curators/core.py +53 -14
- lamindb/examples/__init__.py +2 -0
- lamindb/examples/cellxgene/__init__.py +11 -0
- lamindb/examples/cellxgene/_cellxgene.py +238 -0
- lamindb/{curators/_cellxgene_schemas/schema_versions.csv → examples/cellxgene/cxg_schema_versions.csv} +11 -0
- lamindb/models/_describe.py +69 -56
- lamindb/models/_django.py +80 -53
- lamindb/models/_feature_manager.py +37 -34
- lamindb/models/artifact.py +44 -71
- lamindb/models/can_curate.py +3 -1
- lamindb/models/feature.py +43 -28
- lamindb/models/schema.py +37 -21
- lamindb/models/sqlrecord.py +48 -46
- lamindb/models/storage.py +83 -34
- lamindb-1.9.0.dist-info/METADATA +144 -0
- {lamindb-1.7.1.dist-info → lamindb-1.9.0.dist-info}/RECORD +28 -27
- lamindb/curators/_cellxgene_schemas/__init__.py +0 -198
- lamindb-1.7.1.dist-info/METADATA +0 -68
- {lamindb-1.7.1.dist-info → lamindb-1.9.0.dist-info}/LICENSE +0 -0
- {lamindb-1.7.1.dist-info → lamindb-1.9.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_finish.py
CHANGED
@@ -260,9 +260,9 @@ def save_context_core(
|
|
260
260
|
is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
|
261
261
|
source_code_path = filepath
|
262
262
|
report_path: Path | None = None
|
263
|
-
save_source_code_and_report =
|
263
|
+
save_source_code_and_report = filepath.exists()
|
264
264
|
if (
|
265
|
-
is_run_from_ipython and notebook_runner != "nbconvert"
|
265
|
+
is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
|
266
266
|
): # python notebooks in interactive session
|
267
267
|
import nbproject
|
268
268
|
|
@@ -281,7 +281,7 @@ def save_context_core(
|
|
281
281
|
logger.warning(
|
282
282
|
"the notebook on disk wasn't saved within the last 10 sec"
|
283
283
|
)
|
284
|
-
if is_ipynb: # could be from CLI outside interactive session
|
284
|
+
if is_ipynb and filepath.exists(): # could be from CLI outside interactive session
|
285
285
|
try:
|
286
286
|
import jupytext # noqa: F401
|
287
287
|
from nbproject.dev import (
|
@@ -315,6 +315,8 @@ def save_context_core(
|
|
315
315
|
".ipynb", ".py"
|
316
316
|
)
|
317
317
|
notebook_to_script(transform.description, filepath, source_code_path)
|
318
|
+
elif is_ipynb and not filepath.exists():
|
319
|
+
logger.warning("notebook file does not exist in compute environment")
|
318
320
|
elif is_r_notebook:
|
319
321
|
if filepath.with_suffix(".nb.html").exists():
|
320
322
|
report_path = filepath.with_suffix(".nb.html")
|
@@ -365,6 +367,9 @@ def save_context_core(
|
|
365
367
|
base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
|
366
368
|
paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
|
367
369
|
existing_paths = [path for path in paths if path.exists()]
|
370
|
+
if len(existing_paths) == 2:
|
371
|
+
# let's not store the python environment for an R session for now
|
372
|
+
existing_paths = [base_path / "r_pak_lockfile.json"]
|
368
373
|
|
369
374
|
if existing_paths:
|
370
375
|
overwrite_env = True
|
lamindb/core/_context.py
CHANGED
@@ -17,20 +17,18 @@ from lamin_utils import logger
|
|
17
17
|
from lamindb_setup.core import deprecated
|
18
18
|
from lamindb_setup.core.hashing import hash_file
|
19
19
|
|
20
|
-
from
|
21
|
-
from lamindb.base.ids import base62_12
|
22
|
-
from lamindb.models import Run, Transform, format_field_value
|
23
|
-
|
24
|
-
from ..core._settings import settings
|
20
|
+
from ..base.ids import base62_12
|
25
21
|
from ..errors import (
|
26
22
|
InvalidArgument,
|
27
23
|
TrackNotCalled,
|
28
24
|
UpdateContext,
|
29
25
|
)
|
26
|
+
from ..models import Run, Transform, format_field_value
|
30
27
|
from ..models._is_versioned import bump_version as bump_version_function
|
31
28
|
from ..models._is_versioned import (
|
32
29
|
increment_base62,
|
33
30
|
)
|
31
|
+
from ._settings import is_read_only_connection, settings
|
34
32
|
from ._sync_git import get_transform_reference_from_git_repo
|
35
33
|
from ._track_environment import track_python_environment
|
36
34
|
|
@@ -324,6 +322,7 @@ class Context:
|
|
324
322
|
params: dict | None = None,
|
325
323
|
new_run: bool | None = None,
|
326
324
|
path: str | None = None,
|
325
|
+
pypackages: bool | None = None,
|
327
326
|
) -> None:
|
328
327
|
"""Track a run of your notebook or script.
|
329
328
|
|
@@ -343,6 +342,7 @@ class Context:
|
|
343
342
|
(default notebook), if `True`, creates new run (default non-notebook).
|
344
343
|
path: Filepath of notebook or script. Only needed if it can't be
|
345
344
|
automatically detected.
|
345
|
+
pypackages: If `True` or `None`, infers Python packages used in a notebook.
|
346
346
|
|
347
347
|
Examples:
|
348
348
|
|
@@ -365,10 +365,8 @@ class Context:
|
|
365
365
|
save_context_core,
|
366
366
|
)
|
367
367
|
|
368
|
-
instance_settings = ln_setup.settings.instance
|
369
368
|
# similar logic here: https://github.com/laminlabs/lamindb/pull/2527
|
370
|
-
|
371
|
-
if instance_settings.dialect == "postgresql" and "read" in instance_settings.db:
|
369
|
+
if is_read_only_connection():
|
372
370
|
logger.warning("skipping track(), connected in read-only mode")
|
373
371
|
return None
|
374
372
|
if project is None:
|
@@ -428,7 +426,9 @@ class Context:
|
|
428
426
|
if transform is None:
|
429
427
|
description = None
|
430
428
|
if is_run_from_ipython:
|
431
|
-
self._path, description = self._track_notebook(
|
429
|
+
self._path, description = self._track_notebook(
|
430
|
+
path_str=path, pypackages=pypackages
|
431
|
+
)
|
432
432
|
transform_type = "notebook"
|
433
433
|
transform_ref = None
|
434
434
|
transform_ref_type = None
|
@@ -591,11 +591,14 @@ class Context:
|
|
591
591
|
self,
|
592
592
|
*,
|
593
593
|
path_str: str | None,
|
594
|
+
pypackages: bool | None = None,
|
594
595
|
) -> tuple[Path, str | None]:
|
595
596
|
if path_str is None:
|
596
597
|
path, self._notebook_runner = get_notebook_path()
|
597
598
|
else:
|
598
599
|
path = Path(path_str)
|
600
|
+
if pypackages is None:
|
601
|
+
pypackages = True
|
599
602
|
description = None
|
600
603
|
path_str = path.as_posix()
|
601
604
|
if path_str.endswith("Untitled.ipynb"):
|
@@ -616,10 +619,11 @@ class Context:
|
|
616
619
|
if nbproject_title is not None:
|
617
620
|
description = nbproject_title
|
618
621
|
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
622
|
+
if pypackages:
|
623
|
+
self._logging_message_imports += (
|
624
|
+
"notebook imports:"
|
625
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
626
|
+
)
|
623
627
|
except Exception:
|
624
628
|
logger.debug("reading the notebook file failed")
|
625
629
|
pass
|
@@ -689,10 +693,21 @@ class Context:
|
|
689
693
|
source_code_path = ln_setup.settings.cache_dir / self._path.name.replace(
|
690
694
|
".ipynb", ".py"
|
691
695
|
)
|
692
|
-
|
693
|
-
|
696
|
+
if (
|
697
|
+
self._path.exists()
|
698
|
+
): # notebook kernel might be running on a different machine
|
699
|
+
notebook_to_script(description, self._path, source_code_path)
|
700
|
+
transform_hash, _ = hash_file(source_code_path)
|
701
|
+
else:
|
702
|
+
logger.debug(
|
703
|
+
"skipping notebook hash comparison, notebook kernel running on a different machine"
|
704
|
+
)
|
705
|
+
transform_hash = None
|
694
706
|
# see whether we find a transform with the exact same hash
|
695
|
-
|
707
|
+
if transform_hash is not None:
|
708
|
+
aux_transform = Transform.filter(hash=transform_hash).one_or_none()
|
709
|
+
else:
|
710
|
+
aux_transform = None
|
696
711
|
# if the user did not pass a uid and there is no matching aux_transform
|
697
712
|
# need to search for the transform based on the filename
|
698
713
|
if self.uid is None and aux_transform is None:
|
@@ -856,7 +871,7 @@ class Context:
|
|
856
871
|
and not transform_was_saved
|
857
872
|
):
|
858
873
|
raise UpdateContext(
|
859
|
-
f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* key and `ln.track("{
|
874
|
+
f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* key and `ln.track("{base62_12()}0000")`.'
|
860
875
|
)
|
861
876
|
# check whether transform source code was already saved
|
862
877
|
if transform_was_saved:
|
lamindb/core/_settings.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
+
import sys
|
4
5
|
from typing import TYPE_CHECKING
|
5
6
|
|
6
7
|
import lamindb_setup as ln_setup
|
7
8
|
from lamin_utils import colors, logger
|
9
|
+
from lamindb_setup import settings as setup_settings
|
8
10
|
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
|
-
from lamindb_setup.core
|
11
|
+
from lamindb_setup.core import deprecated
|
10
12
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
13
|
|
12
14
|
from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
|
@@ -19,6 +21,15 @@ if TYPE_CHECKING:
|
|
19
21
|
from lamindb_setup.core._settings_storage import StorageSettings
|
20
22
|
from upath import UPath
|
21
23
|
|
24
|
+
|
25
|
+
def is_read_only_connection() -> bool:
|
26
|
+
instance = setup_settings.instance
|
27
|
+
if instance.dialect == "postgresql":
|
28
|
+
db_url = instance.db
|
29
|
+
return "read" in db_url or "public" in db_url
|
30
|
+
return False
|
31
|
+
|
32
|
+
|
22
33
|
VERBOSITY_TO_INT = {
|
23
34
|
"error": 0, # 40
|
24
35
|
"warning": 1, # 30
|
@@ -44,6 +55,9 @@ class Settings:
|
|
44
55
|
self._sync_git_repo: str | None = None
|
45
56
|
|
46
57
|
def __repr__(self) -> str: # pragma: no cover
|
58
|
+
if "sphinx" in sys.modules:
|
59
|
+
return object.__repr__(self)
|
60
|
+
|
47
61
|
cls_name = colors.green(self.__class__.__name__)
|
48
62
|
verbosity_color = colors.yellow if self.verbosity == "warning" else colors.green
|
49
63
|
verbosity_str = verbosity_color(self.verbosity)
|
@@ -181,6 +195,8 @@ class Settings:
|
|
181
195
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
182
196
|
if isinstance(path_kwargs, tuple):
|
183
197
|
path, kwargs = path_kwargs
|
198
|
+
if isinstance(kwargs, str):
|
199
|
+
kwargs = {"host": kwargs}
|
184
200
|
else:
|
185
201
|
path, kwargs = path_kwargs, {}
|
186
202
|
set_managed_storage(path, **kwargs)
|
@@ -196,18 +212,28 @@ class Settings:
|
|
196
212
|
return ln_setup.settings.cache_dir
|
197
213
|
|
198
214
|
@property
|
199
|
-
def
|
215
|
+
def local_storage(self) -> StorageSettings:
|
200
216
|
"""An additional local default storage (a path to its root).
|
201
217
|
|
202
218
|
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
203
219
|
|
204
220
|
Guide: :doc:`faq/keep-artifacts-local`
|
205
221
|
"""
|
206
|
-
return ln_setup.settings.instance.
|
222
|
+
return ln_setup.settings.instance.local_storage
|
223
|
+
|
224
|
+
@local_storage.setter
|
225
|
+
def local_storage(self, local_root: Path):
|
226
|
+
ln_setup.settings.instance.local_storage = local_root
|
227
|
+
|
228
|
+
@property
|
229
|
+
@deprecated("local_storage")
|
230
|
+
def storage_local(self) -> StorageSettings:
|
231
|
+
return self.local_storage
|
207
232
|
|
208
233
|
@storage_local.setter
|
209
|
-
|
210
|
-
|
234
|
+
@deprecated("local_storage")
|
235
|
+
def storage_local(self, local_root_host: tuple[Path | str, str]):
|
236
|
+
self.local_storage = local_root_host # type: ignore
|
211
237
|
|
212
238
|
@property
|
213
239
|
def verbosity(self) -> str:
|
lamindb/core/datasets/_small.py
CHANGED
@@ -9,35 +9,65 @@ import pandas as pd
|
|
9
9
|
|
10
10
|
def small_dataset3_cellxgene(
|
11
11
|
otype: Literal["DataFrame", "AnnData"] = "AnnData",
|
12
|
+
with_obs_defaults: bool = False,
|
13
|
+
with_obs_typo: bool = False,
|
12
14
|
) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
|
13
15
|
# TODO: consider other ids for other organisms
|
14
16
|
# "ENSMUSG00002076988"
|
15
17
|
var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
18
|
+
|
19
|
+
lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
|
20
|
+
obs_df = pd.DataFrame(
|
21
|
+
{
|
22
|
+
"disease_ontology_term_id": [
|
23
|
+
"MONDO:0004975",
|
24
|
+
"MONDO:0004980",
|
25
|
+
"MONDO:0004980",
|
26
|
+
],
|
27
|
+
"development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
|
28
|
+
"organism": ["human", "human", "human"],
|
29
|
+
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
30
|
+
"tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
|
31
|
+
"cell_type": ["T cell", "B cell", "B cell"],
|
32
|
+
"self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
|
33
|
+
"donor_id": ["-1", "1", "2"],
|
34
|
+
"is_primary_data": [False, False, False],
|
35
|
+
"suspension_type": ["cell", "cell", "cell"],
|
36
|
+
"tissue_type": ["tissue", "tissue", "tissue"],
|
37
|
+
},
|
29
38
|
index=["barcode1", "barcode2", "barcode3"],
|
30
39
|
)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
|
40
|
+
|
41
|
+
var_df = pd.DataFrame(
|
42
|
+
index=var_ids, data={"feature_is_filtered": [False, False, False]}
|
35
43
|
)
|
44
|
+
|
45
|
+
X = pd.DataFrame(
|
46
|
+
{
|
47
|
+
var_ids[0]: [2, 3, 3],
|
48
|
+
var_ids[1]: [3, 4, 5],
|
49
|
+
var_ids[2]: [4, 2, 3],
|
50
|
+
},
|
51
|
+
index=["barcode1", "barcode2", "barcode3"],
|
52
|
+
dtype="float32",
|
53
|
+
)
|
54
|
+
|
55
|
+
obs_df["donor_id"] = obs_df["donor_id"].astype("category")
|
56
|
+
|
36
57
|
if otype == "DataFrame":
|
37
|
-
return
|
58
|
+
return pd.concat([X, obs_df], axis=1)
|
38
59
|
else:
|
39
|
-
|
40
|
-
|
60
|
+
adata = ad.AnnData(X=X, obs=obs_df, var=var_df)
|
61
|
+
adata.uns["title"] = "CELLxGENE example"
|
62
|
+
adata.obsm["X_pca"] = np.array(
|
63
|
+
[[-1.2, 0.8], [0.5, -0.3], [0.7, -0.5]], dtype="float32"
|
64
|
+
)
|
65
|
+
# CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
|
66
|
+
adata.raw = adata.copy()
|
67
|
+
adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
|
68
|
+
if with_obs_defaults:
|
69
|
+
adata.obs["assay"] = "single-cell RNA sequencing"
|
70
|
+
return adata
|
41
71
|
|
42
72
|
|
43
73
|
def anndata_with_obs() -> ad.AnnData:
|
@@ -163,6 +163,11 @@ def _open_dataframe(
|
|
163
163
|
engine: Literal["pyarrow", "polars"] = "pyarrow",
|
164
164
|
**kwargs,
|
165
165
|
) -> PyArrowDataset | Iterator[PolarsLazyFrame]:
|
166
|
+
if engine not in {"pyarrow", "polars"}:
|
167
|
+
raise ValueError(
|
168
|
+
f"Unknown engine: {engine}. It should be 'pyarrow' or 'polars'."
|
169
|
+
)
|
170
|
+
|
166
171
|
df_suffix: str
|
167
172
|
if suffix is None:
|
168
173
|
df_suffixes = _flat_suffixes(paths)
|
@@ -175,34 +180,37 @@ def _open_dataframe(
|
|
175
180
|
else:
|
176
181
|
df_suffix = suffix
|
177
182
|
|
178
|
-
if engine == "pyarrow":
|
179
|
-
if df_suffix not in PYARROW_SUFFIXES:
|
180
|
-
raise ValueError(
|
181
|
-
f"{df_suffix} files are not supported by pyarrow, "
|
182
|
-
f"they should have one of these formats: {', '.join(PYARROW_SUFFIXES)}."
|
183
|
-
)
|
184
|
-
# this checks that the filesystem is the same for all paths
|
185
|
-
# this is a requirement of pyarrow.dataset.dataset
|
186
|
-
if not isinstance(paths, Path): # is a list then
|
187
|
-
fs = getattr(paths[0], "fs", None)
|
188
|
-
for path in paths[1:]:
|
189
|
-
# this assumes that the filesystems are cached by fsspec
|
190
|
-
if getattr(path, "fs", None) is not fs:
|
191
|
-
raise ValueError(
|
192
|
-
"The collection has artifacts with different filesystems, "
|
193
|
-
"this is not supported by pyarrow."
|
194
|
-
)
|
195
|
-
dataframe = _open_pyarrow_dataset(paths, **kwargs)
|
196
|
-
elif engine == "polars":
|
197
|
-
if df_suffix not in POLARS_SUFFIXES:
|
198
|
-
raise ValueError(
|
199
|
-
f"{df_suffix} files are not supported by polars, "
|
200
|
-
f"they should have one of these formats: {', '.join(POLARS_SUFFIXES)}."
|
201
|
-
)
|
202
|
-
dataframe = _open_polars_lazy_df(paths, **kwargs)
|
203
|
-
else:
|
183
|
+
if engine == "pyarrow" and df_suffix not in PYARROW_SUFFIXES:
|
204
184
|
raise ValueError(
|
205
|
-
f"
|
185
|
+
f"{df_suffix} files are not supported by pyarrow, "
|
186
|
+
f"they should have one of these formats: {', '.join(PYARROW_SUFFIXES)}."
|
187
|
+
)
|
188
|
+
elif engine == "polars" and df_suffix not in POLARS_SUFFIXES:
|
189
|
+
raise ValueError(
|
190
|
+
f"{df_suffix} files are not supported by polars, "
|
191
|
+
f"they should have one of these formats: {', '.join(POLARS_SUFFIXES)}."
|
206
192
|
)
|
207
193
|
|
208
|
-
|
194
|
+
polars_without_fsspec = engine == "polars" and not kwargs.get("use_fsspec", False)
|
195
|
+
if (engine == "pyarrow" or polars_without_fsspec) and not isinstance(paths, Path):
|
196
|
+
# this checks that the filesystem is the same for all paths
|
197
|
+
# this is a requirement of pyarrow.dataset.dataset
|
198
|
+
fs = getattr(paths[0], "fs", None)
|
199
|
+
for path in paths[1:]:
|
200
|
+
# this assumes that the filesystems are cached by fsspec
|
201
|
+
if getattr(path, "fs", None) is not fs:
|
202
|
+
engine_msg = (
|
203
|
+
"polars engine without passing `use_fsspec=True`"
|
204
|
+
if engine == "polars"
|
205
|
+
else "pyarrow engine"
|
206
|
+
)
|
207
|
+
raise ValueError(
|
208
|
+
"The collection has artifacts with different filesystems, "
|
209
|
+
f"this is not supported for {engine_msg}."
|
210
|
+
)
|
211
|
+
|
212
|
+
return (
|
213
|
+
_open_pyarrow_dataset(paths, **kwargs)
|
214
|
+
if engine == "pyarrow"
|
215
|
+
else _open_polars_lazy_df(paths, **kwargs)
|
216
|
+
)
|
@@ -4,6 +4,8 @@ from contextlib import contextmanager
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import TYPE_CHECKING
|
6
6
|
|
7
|
+
from lamindb_setup.core.upath import get_storage_region
|
8
|
+
|
7
9
|
if TYPE_CHECKING:
|
8
10
|
from collections.abc import Iterator
|
9
11
|
|
@@ -13,9 +15,35 @@ if TYPE_CHECKING:
|
|
13
15
|
POLARS_SUFFIXES = (".parquet", ".csv", ".ndjson", ".ipc")
|
14
16
|
|
15
17
|
|
18
|
+
def _polars_storage_options(storepath: UPath) -> dict[str, str | bool]:
|
19
|
+
storage_options: dict[str, str | bool] = {}
|
20
|
+
s3fs_options = storepath.storage_options
|
21
|
+
|
22
|
+
endpoint_url = s3fs_options.get("endpoint_url", None)
|
23
|
+
if endpoint_url is not None:
|
24
|
+
storage_options["aws_virtual_hosted_style_request"] = False
|
25
|
+
storage_options["aws_endpoint_url"] = endpoint_url
|
26
|
+
if endpoint_url.startswith("http://"):
|
27
|
+
storage_options["aws_allow_http"] = True
|
28
|
+
else:
|
29
|
+
storage_options["aws_region"] = get_storage_region(storepath)
|
30
|
+
|
31
|
+
if s3fs_options.get("anon", False):
|
32
|
+
storage_options["aws_skip_signature"] = True
|
33
|
+
else:
|
34
|
+
if "key" in s3fs_options:
|
35
|
+
storage_options["aws_access_key_id"] = s3fs_options["key"]
|
36
|
+
if "secret" in s3fs_options:
|
37
|
+
storage_options["aws_secret_access_key"] = s3fs_options["secret"]
|
38
|
+
if "token" in s3fs_options:
|
39
|
+
storage_options["aws_session_token"] = s3fs_options["token"]
|
40
|
+
|
41
|
+
return storage_options
|
42
|
+
|
43
|
+
|
16
44
|
@contextmanager
|
17
45
|
def _open_polars_lazy_df(
|
18
|
-
paths: UPath | list[UPath], **kwargs
|
46
|
+
paths: UPath | list[UPath], use_fsspec: bool = False, **kwargs
|
19
47
|
) -> Iterator[PolarsLazyFrame]:
|
20
48
|
try:
|
21
49
|
import polars as pl
|
@@ -38,14 +66,25 @@ def _open_polars_lazy_df(
|
|
38
66
|
path_list += [p for p in path.rglob("*") if p.suffix != ""]
|
39
67
|
else:
|
40
68
|
path_list.append(path)
|
69
|
+
# assume the filesystem is the same for all
|
70
|
+
# it is checked in _open_dataframe
|
71
|
+
path0 = path_list[0]
|
72
|
+
storage_options = None
|
73
|
+
if not use_fsspec:
|
74
|
+
storage_options = kwargs.pop("storage_options", None)
|
75
|
+
if path0.protocol == "s3" and storage_options is None:
|
76
|
+
storage_options = _polars_storage_options(path0)
|
41
77
|
|
42
78
|
open_files = []
|
43
79
|
|
44
80
|
try:
|
45
81
|
for path in path_list:
|
46
|
-
open_files.append(path.open(mode="rb"))
|
82
|
+
open_files.append(path.open(mode="rb") if use_fsspec else path.as_posix())
|
47
83
|
|
48
|
-
yield scans[path_list[0].suffix](
|
84
|
+
yield scans[path_list[0].suffix](
|
85
|
+
open_files, storage_options=storage_options, **kwargs
|
86
|
+
)
|
49
87
|
finally:
|
50
|
-
|
51
|
-
open_file
|
88
|
+
if use_fsspec:
|
89
|
+
for open_file in open_files:
|
90
|
+
open_file.close()
|
@@ -8,8 +8,7 @@ import pyarrow as pa
|
|
8
8
|
from anndata import AnnData, read_h5ad
|
9
9
|
from lamin_utils import logger
|
10
10
|
from lamindb_setup import settings as setup_settings
|
11
|
-
from lamindb_setup.core.
|
12
|
-
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
11
|
+
from lamindb_setup.core.upath import LocalPathClasses, create_path, get_storage_region
|
13
12
|
from packaging import version
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
lamindb/curators/__init__.py
CHANGED
@@ -18,10 +18,6 @@ Modules.
|
|
18
18
|
|
19
19
|
"""
|
20
20
|
|
21
|
-
from ._legacy import ( # backward compat
|
22
|
-
CellxGeneAnnDataCatManager,
|
23
|
-
PertAnnDataCatManager,
|
24
|
-
)
|
25
21
|
from .core import (
|
26
22
|
AnnDataCurator,
|
27
23
|
DataFrameCurator,
|
@@ -31,8 +27,6 @@ from .core import (
|
|
31
27
|
)
|
32
28
|
|
33
29
|
__all__ = [
|
34
|
-
"CellxGeneAnnDataCatManager",
|
35
|
-
"PertAnnDataCatManager",
|
36
30
|
"AnnDataCurator",
|
37
31
|
"DataFrameCurator",
|
38
32
|
"MuDataCurator",
|