lamindb 1.8.0__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -108,7 +108,7 @@ Backwards compatibility.
108
108
 
109
109
  # ruff: noqa: I001
110
110
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
111
- __version__ = "1.8.0"
111
+ __version__ = "1.9.1"
112
112
 
113
113
  import warnings
114
114
 
lamindb/_finish.py CHANGED
@@ -260,9 +260,9 @@ def save_context_core(
260
260
  is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
261
261
  source_code_path = filepath
262
262
  report_path: Path | None = None
263
- save_source_code_and_report = True
263
+ save_source_code_and_report = filepath.exists()
264
264
  if (
265
- is_run_from_ipython and notebook_runner != "nbconvert"
265
+ is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
266
266
  ): # python notebooks in interactive session
267
267
  import nbproject
268
268
 
@@ -281,7 +281,7 @@ def save_context_core(
281
281
  logger.warning(
282
282
  "the notebook on disk wasn't saved within the last 10 sec"
283
283
  )
284
- if is_ipynb: # could be from CLI outside interactive session
284
+ if is_ipynb and filepath.exists(): # could be from CLI outside interactive session
285
285
  try:
286
286
  import jupytext # noqa: F401
287
287
  from nbproject.dev import (
@@ -315,6 +315,8 @@ def save_context_core(
315
315
  ".ipynb", ".py"
316
316
  )
317
317
  notebook_to_script(transform.description, filepath, source_code_path)
318
+ elif is_ipynb and not filepath.exists():
319
+ logger.warning("notebook file does not exist in compute environment")
318
320
  elif is_r_notebook:
319
321
  if filepath.with_suffix(".nb.html").exists():
320
322
  report_path = filepath.with_suffix(".nb.html")
@@ -365,6 +367,9 @@ def save_context_core(
365
367
  base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
366
368
  paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
367
369
  existing_paths = [path for path in paths if path.exists()]
370
+ if len(existing_paths) == 2:
371
+ # let's not store the python environment for an R session for now
372
+ existing_paths = [base_path / "r_pak_lockfile.json"]
368
373
 
369
374
  if existing_paths:
370
375
  overwrite_env = True
lamindb/core/_context.py CHANGED
@@ -322,6 +322,7 @@ class Context:
322
322
  params: dict | None = None,
323
323
  new_run: bool | None = None,
324
324
  path: str | None = None,
325
+ pypackages: bool | None = None,
325
326
  ) -> None:
326
327
  """Track a run of your notebook or script.
327
328
 
@@ -341,6 +342,7 @@ class Context:
341
342
  (default notebook), if `True`, creates new run (default non-notebook).
342
343
  path: Filepath of notebook or script. Only needed if it can't be
343
344
  automatically detected.
345
+ pypackages: If `True` or `None`, infers Python packages used in a notebook.
344
346
 
345
347
  Examples:
346
348
 
@@ -424,7 +426,9 @@ class Context:
424
426
  if transform is None:
425
427
  description = None
426
428
  if is_run_from_ipython:
427
- self._path, description = self._track_notebook(path_str=path)
429
+ self._path, description = self._track_notebook(
430
+ path_str=path, pypackages=pypackages
431
+ )
428
432
  transform_type = "notebook"
429
433
  transform_ref = None
430
434
  transform_ref_type = None
@@ -587,11 +591,14 @@ class Context:
587
591
  self,
588
592
  *,
589
593
  path_str: str | None,
594
+ pypackages: bool | None = None,
590
595
  ) -> tuple[Path, str | None]:
591
596
  if path_str is None:
592
597
  path, self._notebook_runner = get_notebook_path()
593
598
  else:
594
599
  path = Path(path_str)
600
+ if pypackages is None:
601
+ pypackages = True
595
602
  description = None
596
603
  path_str = path.as_posix()
597
604
  if path_str.endswith("Untitled.ipynb"):
@@ -612,10 +619,11 @@ class Context:
612
619
  if nbproject_title is not None:
613
620
  description = nbproject_title
614
621
 
615
- self._logging_message_imports += (
616
- "notebook imports:"
617
- f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
618
- )
622
+ if pypackages:
623
+ self._logging_message_imports += (
624
+ "notebook imports:"
625
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
626
+ )
619
627
  except Exception:
620
628
  logger.debug("reading the notebook file failed")
621
629
  pass
@@ -685,10 +693,21 @@ class Context:
685
693
  source_code_path = ln_setup.settings.cache_dir / self._path.name.replace(
686
694
  ".ipynb", ".py"
687
695
  )
688
- notebook_to_script(description, self._path, source_code_path)
689
- transform_hash, _ = hash_file(source_code_path)
696
+ if (
697
+ self._path.exists()
698
+ ): # notebook kernel might be running on a different machine
699
+ notebook_to_script(description, self._path, source_code_path)
700
+ transform_hash, _ = hash_file(source_code_path)
701
+ else:
702
+ logger.debug(
703
+ "skipping notebook hash comparison, notebook kernel running on a different machine"
704
+ )
705
+ transform_hash = None
690
706
  # see whether we find a transform with the exact same hash
691
- aux_transform = Transform.filter(hash=transform_hash).one_or_none()
707
+ if transform_hash is not None:
708
+ aux_transform = Transform.filter(hash=transform_hash).one_or_none()
709
+ else:
710
+ aux_transform = None
692
711
  # if the user did not pass a uid and there is no matching aux_transform
693
712
  # need to search for the transform based on the filename
694
713
  if self.uid is None and aux_transform is None:
@@ -9,35 +9,65 @@ import pandas as pd
9
9
 
10
10
  def small_dataset3_cellxgene(
11
11
  otype: Literal["DataFrame", "AnnData"] = "AnnData",
12
+ with_obs_defaults: bool = False,
13
+ with_obs_typo: bool = False,
12
14
  ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
13
15
  # TODO: consider other ids for other organisms
14
16
  # "ENSMUSG00002076988"
15
17
  var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
16
- dataset_dict = {
17
- var_ids[0]: [2, 3, 3],
18
- var_ids[1]: [3, 4, 5],
19
- var_ids[2]: [4, 2, 3],
20
- "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
21
- "organism": ["human", "human", "human"],
22
- "sex": ["female", "male", "unknown"],
23
- "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
24
- "tissue": ["lungg", "lungg", "heart"],
25
- "donor": ["-1", "1", "2"],
26
- }
27
- dataset_df = pd.DataFrame(
28
- dataset_dict,
18
+
19
+ lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
20
+ obs_df = pd.DataFrame(
21
+ {
22
+ "disease_ontology_term_id": [
23
+ "MONDO:0004975",
24
+ "MONDO:0004980",
25
+ "MONDO:0004980",
26
+ ],
27
+ "development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
28
+ "organism": ["human", "human", "human"],
29
+ "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
30
+ "tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
31
+ "cell_type": ["T cell", "B cell", "B cell"],
32
+ "self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
33
+ "donor_id": ["-1", "1", "2"],
34
+ "is_primary_data": [False, False, False],
35
+ "suspension_type": ["cell", "cell", "cell"],
36
+ "tissue_type": ["tissue", "tissue", "tissue"],
37
+ },
29
38
  index=["barcode1", "barcode2", "barcode3"],
30
39
  )
31
- dataset_df["tissue"] = dataset_df["tissue"].astype("category")
32
- ad.AnnData(
33
- dataset_df[var_ids],
34
- obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
40
+
41
+ var_df = pd.DataFrame(
42
+ index=var_ids, data={"feature_is_filtered": [False, False, False]}
35
43
  )
44
+
45
+ X = pd.DataFrame(
46
+ {
47
+ var_ids[0]: [2, 3, 3],
48
+ var_ids[1]: [3, 4, 5],
49
+ var_ids[2]: [4, 2, 3],
50
+ },
51
+ index=["barcode1", "barcode2", "barcode3"],
52
+ dtype="float32",
53
+ )
54
+
55
+ obs_df["donor_id"] = obs_df["donor_id"].astype("category")
56
+
36
57
  if otype == "DataFrame":
37
- return dataset_df
58
+ return pd.concat([X, obs_df], axis=1)
38
59
  else:
39
- dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:])
40
- return dataset_ad
60
+ adata = ad.AnnData(X=X, obs=obs_df, var=var_df)
61
+ adata.uns["title"] = "CELLxGENE example"
62
+ adata.obsm["X_pca"] = np.array(
63
+ [[-1.2, 0.8], [0.5, -0.3], [0.7, -0.5]], dtype="float32"
64
+ )
65
+ # CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
66
+ adata.raw = adata.copy()
67
+ adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
68
+ if with_obs_defaults:
69
+ adata.obs["assay"] = "single-cell RNA sequencing"
70
+ return adata
41
71
 
42
72
 
43
73
  def anndata_with_obs() -> ad.AnnData:
@@ -4,7 +4,7 @@ from contextlib import contextmanager
4
4
  from pathlib import Path
5
5
  from typing import TYPE_CHECKING
6
6
 
7
- from lamindb_setup.core._settings_storage import get_storage_region
7
+ from lamindb_setup.core.upath import get_storage_region
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from collections.abc import Iterator
@@ -8,8 +8,7 @@ import pyarrow as pa
8
8
  from anndata import AnnData, read_h5ad
9
9
  from lamin_utils import logger
10
10
  from lamindb_setup import settings as setup_settings
11
- from lamindb_setup.core._settings_storage import get_storage_region
12
- from lamindb_setup.core.upath import LocalPathClasses, create_path
11
+ from lamindb_setup.core.upath import LocalPathClasses, create_path, get_storage_region
13
12
  from packaging import version
14
13
 
15
14
  if TYPE_CHECKING:
@@ -18,10 +18,6 @@ Modules.
18
18
 
19
19
  """
20
20
 
21
- from ._legacy import ( # backward compat
22
- CellxGeneAnnDataCatManager,
23
- PertAnnDataCatManager,
24
- )
25
21
  from .core import (
26
22
  AnnDataCurator,
27
23
  DataFrameCurator,
@@ -31,8 +27,6 @@ from .core import (
31
27
  )
32
28
 
33
29
  __all__ = [
34
- "CellxGeneAnnDataCatManager",
35
- "PertAnnDataCatManager",
36
30
  "AnnDataCurator",
37
31
  "DataFrameCurator",
38
32
  "MuDataCurator",