lamindb 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_finish.py +8 -3
- lamindb/core/_context.py +27 -8
- lamindb/core/datasets/_small.py +50 -20
- lamindb/core/storage/_polars_lazy_df.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +1 -2
- lamindb/curators/__init__.py +0 -6
- lamindb/curators/_legacy.py +1 -579
- lamindb/curators/core.py +35 -12
- lamindb/examples/__init__.py +2 -0
- lamindb/examples/cellxgene/__init__.py +11 -0
- lamindb/examples/cellxgene/_cellxgene.py +238 -0
- lamindb/{curators/_cellxgene_schemas/schema_versions.csv → examples/cellxgene/cxg_schema_versions.csv} +11 -0
- lamindb/models/_describe.py +69 -56
- lamindb/models/_django.py +55 -47
- lamindb/models/_feature_manager.py +37 -34
- lamindb/models/artifact.py +40 -36
- lamindb/models/can_curate.py +3 -1
- lamindb/models/feature.py +29 -8
- lamindb/models/schema.py +37 -21
- lamindb/models/sqlrecord.py +48 -46
- lamindb-1.9.0.dist-info/METADATA +144 -0
- {lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/RECORD +25 -24
- lamindb/curators/_cellxgene_schemas/__init__.py +0 -198
- lamindb-1.8.0.dist-info/METADATA +0 -68
- {lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/LICENSE +0 -0
- {lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_finish.py
CHANGED
@@ -260,9 +260,9 @@ def save_context_core(
|
|
260
260
|
is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
|
261
261
|
source_code_path = filepath
|
262
262
|
report_path: Path | None = None
|
263
|
-
save_source_code_and_report =
|
263
|
+
save_source_code_and_report = filepath.exists()
|
264
264
|
if (
|
265
|
-
is_run_from_ipython and notebook_runner != "nbconvert"
|
265
|
+
is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
|
266
266
|
): # python notebooks in interactive session
|
267
267
|
import nbproject
|
268
268
|
|
@@ -281,7 +281,7 @@ def save_context_core(
|
|
281
281
|
logger.warning(
|
282
282
|
"the notebook on disk wasn't saved within the last 10 sec"
|
283
283
|
)
|
284
|
-
if is_ipynb: # could be from CLI outside interactive session
|
284
|
+
if is_ipynb and filepath.exists(): # could be from CLI outside interactive session
|
285
285
|
try:
|
286
286
|
import jupytext # noqa: F401
|
287
287
|
from nbproject.dev import (
|
@@ -315,6 +315,8 @@ def save_context_core(
|
|
315
315
|
".ipynb", ".py"
|
316
316
|
)
|
317
317
|
notebook_to_script(transform.description, filepath, source_code_path)
|
318
|
+
elif is_ipynb and not filepath.exists():
|
319
|
+
logger.warning("notebook file does not exist in compute environment")
|
318
320
|
elif is_r_notebook:
|
319
321
|
if filepath.with_suffix(".nb.html").exists():
|
320
322
|
report_path = filepath.with_suffix(".nb.html")
|
@@ -365,6 +367,9 @@ def save_context_core(
|
|
365
367
|
base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
|
366
368
|
paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
|
367
369
|
existing_paths = [path for path in paths if path.exists()]
|
370
|
+
if len(existing_paths) == 2:
|
371
|
+
# let's not store the python environment for an R session for now
|
372
|
+
existing_paths = [base_path / "r_pak_lockfile.json"]
|
368
373
|
|
369
374
|
if existing_paths:
|
370
375
|
overwrite_env = True
|
lamindb/core/_context.py
CHANGED
@@ -322,6 +322,7 @@ class Context:
|
|
322
322
|
params: dict | None = None,
|
323
323
|
new_run: bool | None = None,
|
324
324
|
path: str | None = None,
|
325
|
+
pypackages: bool | None = None,
|
325
326
|
) -> None:
|
326
327
|
"""Track a run of your notebook or script.
|
327
328
|
|
@@ -341,6 +342,7 @@ class Context:
|
|
341
342
|
(default notebook), if `True`, creates new run (default non-notebook).
|
342
343
|
path: Filepath of notebook or script. Only needed if it can't be
|
343
344
|
automatically detected.
|
345
|
+
pypackages: If `True` or `None`, infers Python packages used in a notebook.
|
344
346
|
|
345
347
|
Examples:
|
346
348
|
|
@@ -424,7 +426,9 @@ class Context:
|
|
424
426
|
if transform is None:
|
425
427
|
description = None
|
426
428
|
if is_run_from_ipython:
|
427
|
-
self._path, description = self._track_notebook(
|
429
|
+
self._path, description = self._track_notebook(
|
430
|
+
path_str=path, pypackages=pypackages
|
431
|
+
)
|
428
432
|
transform_type = "notebook"
|
429
433
|
transform_ref = None
|
430
434
|
transform_ref_type = None
|
@@ -587,11 +591,14 @@ class Context:
|
|
587
591
|
self,
|
588
592
|
*,
|
589
593
|
path_str: str | None,
|
594
|
+
pypackages: bool | None = None,
|
590
595
|
) -> tuple[Path, str | None]:
|
591
596
|
if path_str is None:
|
592
597
|
path, self._notebook_runner = get_notebook_path()
|
593
598
|
else:
|
594
599
|
path = Path(path_str)
|
600
|
+
if pypackages is None:
|
601
|
+
pypackages = True
|
595
602
|
description = None
|
596
603
|
path_str = path.as_posix()
|
597
604
|
if path_str.endswith("Untitled.ipynb"):
|
@@ -612,10 +619,11 @@ class Context:
|
|
612
619
|
if nbproject_title is not None:
|
613
620
|
description = nbproject_title
|
614
621
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
622
|
+
if pypackages:
|
623
|
+
self._logging_message_imports += (
|
624
|
+
"notebook imports:"
|
625
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
626
|
+
)
|
619
627
|
except Exception:
|
620
628
|
logger.debug("reading the notebook file failed")
|
621
629
|
pass
|
@@ -685,10 +693,21 @@ class Context:
|
|
685
693
|
source_code_path = ln_setup.settings.cache_dir / self._path.name.replace(
|
686
694
|
".ipynb", ".py"
|
687
695
|
)
|
688
|
-
|
689
|
-
|
696
|
+
if (
|
697
|
+
self._path.exists()
|
698
|
+
): # notebook kernel might be running on a different machine
|
699
|
+
notebook_to_script(description, self._path, source_code_path)
|
700
|
+
transform_hash, _ = hash_file(source_code_path)
|
701
|
+
else:
|
702
|
+
logger.debug(
|
703
|
+
"skipping notebook hash comparison, notebook kernel running on a different machine"
|
704
|
+
)
|
705
|
+
transform_hash = None
|
690
706
|
# see whether we find a transform with the exact same hash
|
691
|
-
|
707
|
+
if transform_hash is not None:
|
708
|
+
aux_transform = Transform.filter(hash=transform_hash).one_or_none()
|
709
|
+
else:
|
710
|
+
aux_transform = None
|
692
711
|
# if the user did not pass a uid and there is no matching aux_transform
|
693
712
|
# need to search for the transform based on the filename
|
694
713
|
if self.uid is None and aux_transform is None:
|
lamindb/core/datasets/_small.py
CHANGED
@@ -9,35 +9,65 @@ import pandas as pd
|
|
9
9
|
|
10
10
|
def small_dataset3_cellxgene(
|
11
11
|
otype: Literal["DataFrame", "AnnData"] = "AnnData",
|
12
|
+
with_obs_defaults: bool = False,
|
13
|
+
with_obs_typo: bool = False,
|
12
14
|
) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
|
13
15
|
# TODO: consider other ids for other organisms
|
14
16
|
# "ENSMUSG00002076988"
|
15
17
|
var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
18
|
+
|
19
|
+
lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
|
20
|
+
obs_df = pd.DataFrame(
|
21
|
+
{
|
22
|
+
"disease_ontology_term_id": [
|
23
|
+
"MONDO:0004975",
|
24
|
+
"MONDO:0004980",
|
25
|
+
"MONDO:0004980",
|
26
|
+
],
|
27
|
+
"development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
|
28
|
+
"organism": ["human", "human", "human"],
|
29
|
+
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
30
|
+
"tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
|
31
|
+
"cell_type": ["T cell", "B cell", "B cell"],
|
32
|
+
"self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
|
33
|
+
"donor_id": ["-1", "1", "2"],
|
34
|
+
"is_primary_data": [False, False, False],
|
35
|
+
"suspension_type": ["cell", "cell", "cell"],
|
36
|
+
"tissue_type": ["tissue", "tissue", "tissue"],
|
37
|
+
},
|
29
38
|
index=["barcode1", "barcode2", "barcode3"],
|
30
39
|
)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
|
40
|
+
|
41
|
+
var_df = pd.DataFrame(
|
42
|
+
index=var_ids, data={"feature_is_filtered": [False, False, False]}
|
35
43
|
)
|
44
|
+
|
45
|
+
X = pd.DataFrame(
|
46
|
+
{
|
47
|
+
var_ids[0]: [2, 3, 3],
|
48
|
+
var_ids[1]: [3, 4, 5],
|
49
|
+
var_ids[2]: [4, 2, 3],
|
50
|
+
},
|
51
|
+
index=["barcode1", "barcode2", "barcode3"],
|
52
|
+
dtype="float32",
|
53
|
+
)
|
54
|
+
|
55
|
+
obs_df["donor_id"] = obs_df["donor_id"].astype("category")
|
56
|
+
|
36
57
|
if otype == "DataFrame":
|
37
|
-
return
|
58
|
+
return pd.concat([X, obs_df], axis=1)
|
38
59
|
else:
|
39
|
-
|
40
|
-
|
60
|
+
adata = ad.AnnData(X=X, obs=obs_df, var=var_df)
|
61
|
+
adata.uns["title"] = "CELLxGENE example"
|
62
|
+
adata.obsm["X_pca"] = np.array(
|
63
|
+
[[-1.2, 0.8], [0.5, -0.3], [0.7, -0.5]], dtype="float32"
|
64
|
+
)
|
65
|
+
# CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
|
66
|
+
adata.raw = adata.copy()
|
67
|
+
adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
|
68
|
+
if with_obs_defaults:
|
69
|
+
adata.obs["assay"] = "single-cell RNA sequencing"
|
70
|
+
return adata
|
41
71
|
|
42
72
|
|
43
73
|
def anndata_with_obs() -> ad.AnnData:
|
@@ -4,7 +4,7 @@ from contextlib import contextmanager
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import TYPE_CHECKING
|
6
6
|
|
7
|
-
from lamindb_setup.core.
|
7
|
+
from lamindb_setup.core.upath import get_storage_region
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from collections.abc import Iterator
|
@@ -8,8 +8,7 @@ import pyarrow as pa
|
|
8
8
|
from anndata import AnnData, read_h5ad
|
9
9
|
from lamin_utils import logger
|
10
10
|
from lamindb_setup import settings as setup_settings
|
11
|
-
from lamindb_setup.core.
|
12
|
-
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
11
|
+
from lamindb_setup.core.upath import LocalPathClasses, create_path, get_storage_region
|
13
12
|
from packaging import version
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
lamindb/curators/__init__.py
CHANGED
@@ -18,10 +18,6 @@ Modules.
|
|
18
18
|
|
19
19
|
"""
|
20
20
|
|
21
|
-
from ._legacy import ( # backward compat
|
22
|
-
CellxGeneAnnDataCatManager,
|
23
|
-
PertAnnDataCatManager,
|
24
|
-
)
|
25
21
|
from .core import (
|
26
22
|
AnnDataCurator,
|
27
23
|
DataFrameCurator,
|
@@ -31,8 +27,6 @@ from .core import (
|
|
31
27
|
)
|
32
28
|
|
33
29
|
__all__ = [
|
34
|
-
"CellxGeneAnnDataCatManager",
|
35
|
-
"PertAnnDataCatManager",
|
36
30
|
"AnnDataCurator",
|
37
31
|
"DataFrameCurator",
|
38
32
|
"MuDataCurator",
|