pertpy 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pertpy/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  __author__ = "Lukas Heumos"
4
4
  __email__ = "lukas.heumos@posteo.net"
5
- __version__ = "1.0.0"
5
+ __version__ = "1.0.2"
6
6
 
7
7
  import warnings
8
8
 
@@ -49,66 +49,69 @@ def _download( # pragma: no cover
49
49
  Path(output_path).mkdir(parents=True, exist_ok=True)
50
50
  lock_path = Path(output_path) / f"{output_file_name}.lock"
51
51
 
52
- with FileLock(lock_path, timeout=300):
53
- if Path(download_to_path).exists() and not overwrite:
54
- logger.warning(f"File {download_to_path} already exists!")
55
- return download_to_path
56
-
57
- temp_file_name = Path(f"{download_to_path}.part")
58
-
59
- retry_count = 0
60
- while retry_count <= max_retries:
61
- try:
62
- head_response = requests.head(url, timeout=timeout)
63
- head_response.raise_for_status()
64
- content_length = int(head_response.headers.get("content-length", 0))
65
-
66
- free_space = shutil.disk_usage(output_path).free
67
- if content_length > free_space:
68
- raise OSError(
69
- f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
70
- )
71
-
72
- response = requests.get(url, stream=True)
73
- response.raise_for_status()
74
- total = int(response.headers.get("content-length", 0))
75
-
76
- with Progress(refresh_per_second=5) as progress:
77
- task = progress.add_task("[red]Downloading...", total=total)
78
- with Path(temp_file_name).open("wb") as file:
79
- for data in response.iter_content(block_size):
80
- file.write(data)
81
- progress.update(task, advance=len(data))
82
- progress.update(task, completed=total, refresh=True)
83
-
84
- Path(temp_file_name).replace(download_to_path)
85
-
86
- if is_zip:
87
- with ZipFile(download_to_path, "r") as zip_obj:
88
- zip_obj.extractall(path=output_path)
89
- return Path(output_path)
90
-
52
+ try:
53
+ with FileLock(lock_path, timeout=300):
54
+ if Path(download_to_path).exists() and not overwrite:
55
+ logger.warning(f"File {download_to_path} already exists!")
91
56
  return download_to_path
92
- except (OSError, RequestException) as e:
93
- retry_count += 1
94
- if retry_count <= max_retries:
95
- logger.warning(
96
- f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
97
- )
98
- time.sleep(retry_delay)
99
- else:
100
- logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
57
+
58
+ temp_file_name = Path(f"{download_to_path}.part")
59
+
60
+ retry_count = 0
61
+ while retry_count <= max_retries:
62
+ try:
63
+ head_response = requests.head(url, timeout=timeout)
64
+ head_response.raise_for_status()
65
+ content_length = int(head_response.headers.get("content-length", 0))
66
+
67
+ free_space = shutil.disk_usage(output_path).free
68
+ if content_length > free_space:
69
+ raise OSError(
70
+ f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
71
+ )
72
+
73
+ response = requests.get(url, stream=True)
74
+ response.raise_for_status()
75
+ total = int(response.headers.get("content-length", 0))
76
+
77
+ with Progress(refresh_per_second=5) as progress:
78
+ task = progress.add_task("[red]Downloading...", total=total)
79
+ with Path(temp_file_name).open("wb") as file:
80
+ for data in response.iter_content(block_size):
81
+ file.write(data)
82
+ progress.update(task, advance=len(data))
83
+ progress.update(task, completed=total, refresh=True)
84
+
85
+ Path(temp_file_name).replace(download_to_path)
86
+
87
+ if is_zip:
88
+ with ZipFile(download_to_path, "r") as zip_obj:
89
+ zip_obj.extractall(path=output_path)
90
+ return Path(output_path)
91
+
92
+ return download_to_path
93
+ except (OSError, RequestException) as e:
94
+ retry_count += 1
95
+ if retry_count <= max_retries:
96
+ logger.warning(
97
+ f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
98
+ )
99
+ time.sleep(retry_delay)
100
+ else:
101
+ logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
102
+ if Path(temp_file_name).exists():
103
+ Path(temp_file_name).unlink(missing_ok=True)
104
+ raise
105
+
106
+ except Exception as e:
107
+ logger.error(f"Download failed: {str(e)}")
101
108
  if Path(temp_file_name).exists():
102
109
  Path(temp_file_name).unlink(missing_ok=True)
103
110
  raise
111
+ finally:
112
+ if Path(temp_file_name).exists():
113
+ Path(temp_file_name).unlink(missing_ok=True)
114
+ finally:
115
+ lock_path.unlink(missing_ok=True)
104
116
 
105
- except Exception as e:
106
- logger.error(f"Download failed: {str(e)}")
107
- if Path(temp_file_name).exists():
108
- Path(temp_file_name).unlink(missing_ok=True)
109
- raise
110
- finally:
111
- if Path(temp_file_name).exists():
112
- Path(temp_file_name).unlink(missing_ok=True)
113
-
114
- return Path(download_to_path)
117
+ return Path(download_to_path)
@@ -195,7 +195,9 @@ class CellLine(MetaData):
195
195
  block_size=4096,
196
196
  is_zip=False,
197
197
  )
198
- df = pd.read_csv(drug_response_prism_file_path, index_col=0)[["depmap_id", "name", "ic50", "ec50", "auc"]]
198
+ df = pd.read_csv(
199
+ drug_response_prism_file_path, index_col=0, usecols=["broad_id", "depmap_id", "name", "ic50", "ec50", "auc"]
200
+ )
199
201
  df = df.dropna(subset=["depmap_id", "name"])
200
202
  df = df.groupby(["depmap_id", "name"]).mean().reset_index()
201
203
  self.drug_response_prism = df
@@ -568,7 +570,9 @@ class CellLine(MetaData):
568
570
  verbosity=verbosity,
569
571
  )
570
572
 
571
- old_index_name = "index" if adata.obs.index.name is None else adata.obs.index.name
573
+ if adata.obs.index.name is None:
574
+ adata.obs.index.name = "original_index"
575
+ old_index_name = adata.obs.index.name
572
576
  adata.obs = (
573
577
  adata.obs.reset_index()
574
578
  .set_index([query_id, query_perturbation])
@@ -635,7 +639,9 @@ class CellLine(MetaData):
635
639
  verbosity=verbosity,
636
640
  )
637
641
 
638
- old_index_name = "index" if adata.obs.index.name is None else adata.obs.index.name
642
+ if adata.obs.index.name is None:
643
+ adata.obs.index.name = "original_index"
644
+ old_index_name = adata.obs.index.name
639
645
  adata.obs = (
640
646
  adata.obs.reset_index()
641
647
  .set_index([query_id, "perturbation_lower"])
pertpy/tools/__init__.py CHANGED
@@ -1,24 +1,5 @@
1
1
  from importlib import import_module
2
2
 
3
-
4
- def lazy_import(module_path: str, class_name: str, extras: list[str]):
5
- try:
6
- for extra in extras:
7
- import_module(extra)
8
- module = import_module(module_path)
9
- return getattr(module, class_name)
10
- except ImportError:
11
-
12
- class Placeholder:
13
- def __init__(self, *args, **kwargs):
14
- raise ImportError(
15
- f"Extra dependencies required: {', '.join(extras)}. "
16
- f"Please install with: pip install {' '.join(extras)}"
17
- )
18
-
19
- return Placeholder
20
-
21
-
22
3
  from pertpy.tools._augur import Augur
23
4
  from pertpy.tools._cinemaot import Cinemaot
24
5
  from pertpy.tools._coda._sccoda import Sccoda
@@ -42,15 +23,25 @@ from pertpy.tools._perturbation_space._simple import (
42
23
  )
43
24
  from pertpy.tools._scgen import Scgen
44
25
 
45
- CODA_EXTRAS = ["toytree", "ete4"] # also "pyqt6" but it cannot be imported
46
- Tasccoda = lazy_import("pertpy.tools._coda._tasccoda", "Tasccoda", CODA_EXTRAS)
47
26
 
48
- DE_EXTRAS = ["formulaic", "pydeseq2"]
49
- EdgeR = lazy_import("pertpy.tools._differential_gene_expression", "EdgeR", DE_EXTRAS) # edgeR will be imported via rpy2
50
- PyDESeq2 = lazy_import("pertpy.tools._differential_gene_expression", "PyDESeq2", DE_EXTRAS)
51
- Statsmodels = lazy_import("pertpy.tools._differential_gene_expression", "Statsmodels", DE_EXTRAS + ["statsmodels"])
52
- TTest = lazy_import("pertpy.tools._differential_gene_expression", "TTest", DE_EXTRAS)
53
- WilcoxonTest = lazy_import("pertpy.tools._differential_gene_expression", "WilcoxonTest", DE_EXTRAS)
27
+ def __getattr__(name: str):
28
+ if name == "Tasccoda":
29
+ try:
30
+ for extra in ["toytree", "ete4"]:
31
+ import_module(extra)
32
+ module = import_module("pertpy.tools._coda._tasccoda")
33
+ return module.Tasccoda
34
+ except ImportError:
35
+ raise ImportError(
36
+ "Extra dependencies required: toytree, ete4. Please install with: pip install toytree ete4"
37
+ ) from None
38
+
39
+ elif name in ["EdgeR", "PyDESeq2", "Statsmodels", "TTest", "WilcoxonTest"]:
40
+ module = import_module("pertpy.tools._differential_gene_expression")
41
+ return getattr(module, name)
42
+
43
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
44
+
54
45
 
55
46
  __all__ = [
56
47
  "Augur",
@@ -1181,7 +1181,7 @@ class CompositionalModel2(ABC):
1181
1181
  r,
1182
1182
  bars,
1183
1183
  bottom=cum_bars,
1184
- color=palette(n % palette.N),
1184
+ color=palette(n % palette.N), # type: ignore
1185
1185
  width=barwidth,
1186
1186
  label=type_names[n],
1187
1187
  linewidth=0,
@@ -1377,6 +1377,7 @@ class CompositionalModel2(ABC):
1377
1377
  plot_df.columns = covariate_names
1378
1378
  plot_df = pd.melt(plot_df, ignore_index=False, var_name="Covariate")
1379
1379
 
1380
+ plot_df.index.name = "Cell Type"
1380
1381
  plot_df = plot_df.reset_index()
1381
1382
 
1382
1383
  if len(covariate_names_zero) != 0 and plot_facets and plot_zero_covariate and not plot_zero_cell_type:
@@ -1472,6 +1473,7 @@ class CompositionalModel2(ABC):
1472
1473
  if return_fig and not plot_facets:
1473
1474
  return plt.gcf()
1474
1475
  plt.show()
1476
+
1475
1477
  return None
1476
1478
 
1477
1479
  @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1823,6 +1825,7 @@ class CompositionalModel2(ABC):
1823
1825
  if return_fig:
1824
1826
  return plt.gcf()
1825
1827
  plt.show()
1828
+
1826
1829
  return None
1827
1830
 
1828
1831
  @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1881,7 +1884,7 @@ class CompositionalModel2(ABC):
1881
1884
  from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
1882
1885
  except ImportError:
1883
1886
  raise ImportError(
1884
- "To use tasccoda please install additional dependencies with `pip install pertpy[coda]`"
1887
+ "To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
1885
1888
  ) from None
1886
1889
 
1887
1890
  if isinstance(data, MuData):
@@ -1902,8 +1905,8 @@ class CompositionalModel2(ABC):
1902
1905
  tree.render(save, tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi) # type: ignore
1903
1906
  if return_fig:
1904
1907
  return tree, tree_style
1908
+
1905
1909
  return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi) # type: ignore
1906
- return None
1907
1910
 
1908
1911
  @_doc_params(common_plot_args=doc_common_plot_args)
1909
1912
  def plot_draw_effects( # pragma: no cover # noqa: D417
@@ -1969,7 +1972,7 @@ class CompositionalModel2(ABC):
1969
1972
  from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
1970
1973
  except ImportError:
1971
1974
  raise ImportError(
1972
- "To use tasccoda please install additional dependencies as `pip install pertpy[coda]`"
1975
+ "To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
1973
1976
  ) from None
1974
1977
 
1975
1978
  if isinstance(data, MuData):
@@ -2207,6 +2210,7 @@ class CompositionalModel2(ABC):
2207
2210
  if return_fig:
2208
2211
  return fig
2209
2212
  plt.show()
2213
+
2210
2214
  return None
2211
2215
 
2212
2216
 
@@ -2325,6 +2329,7 @@ def df2newick(df: pd.DataFrame, levels: list[str], inner_label: bool = True) ->
2325
2329
  strs = [traverse(df_tax, a, 0, inner_label) for a in alevel]
2326
2330
 
2327
2331
  newick = f"({','.join(strs)});"
2332
+
2328
2333
  return newick
2329
2334
 
2330
2335
 
@@ -2562,6 +2567,7 @@ def from_scanpy(
2562
2567
  covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
2563
2568
 
2564
2569
  if isinstance(sample_identifier, list):
2570
+ adata.obs = adata.obs.copy()
2565
2571
  adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
2566
2572
  sample_identifier = "scCODA_sample_id"
2567
2573
 
@@ -303,7 +303,7 @@ class Sccoda(CompositionalModel2):
303
303
  self,
304
304
  data: AnnData | MuData,
305
305
  modality_key: str = "coda",
306
- rng_key=None,
306
+ rng_key: int | None = None,
307
307
  num_prior_samples: int = 500,
308
308
  use_posterior_predictive: bool = True,
309
309
  ) -> az.InferenceData:
@@ -381,6 +381,8 @@ class Sccoda(CompositionalModel2):
381
381
  if rng_key is None:
382
382
  rng = np.random.default_rng()
383
383
  rng_key = random.key(rng.integers(0, 10000))
384
+ else:
385
+ rng_key = random.key(rng_key)
384
386
 
385
387
  if use_posterior_predictive:
386
388
  posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
@@ -391,6 +393,15 @@ class Sccoda(CompositionalModel2):
391
393
  ref_index=ref_index,
392
394
  sample_adata=sample_adata,
393
395
  )
396
+ # Remove problematic posterior predictive arrays with wrong dimensions
397
+ if posterior_predictive and "counts" in posterior_predictive:
398
+ counts_shape = posterior_predictive["counts"].shape
399
+ expected_dims = 2 # ['sample', 'cell_type']
400
+ if len(counts_shape) != expected_dims:
401
+ posterior_predictive = {k: v for k, v in posterior_predictive.items() if k != "counts"}
402
+ logger.warning(
403
+ f"Removed 'counts' from posterior_predictive due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
404
+ )
394
405
  else:
395
406
  posterior_predictive = None
396
407
 
@@ -403,6 +414,15 @@ class Sccoda(CompositionalModel2):
403
414
  ref_index=ref_index,
404
415
  sample_adata=sample_adata,
405
416
  )
417
+ # Remove problematic prior arrays with wrong dimensions
418
+ if prior and "counts" in prior:
419
+ counts_shape = prior["counts"].shape
420
+ expected_dims = 2 # ['sample', 'cell_type']
421
+ if len(counts_shape) != expected_dims:
422
+ prior = {k: v for k, v in prior.items() if k != "counts"}
423
+ logger.warning(
424
+ f"Removed 'counts' from prior due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
425
+ )
406
426
  else:
407
427
  prior = None
408
428
 
@@ -426,76 +446,84 @@ class Sccoda(CompositionalModel2):
426
446
  *args,
427
447
  **kwargs,
428
448
  ):
429
- """Examples:
430
- >>> import pertpy as pt
431
- >>> haber_cells = pt.dt.haber_2017_regions()
432
- >>> sccoda = pt.tl.Sccoda()
433
- >>> mdata = sccoda.load(haber_cells,
434
- >>> type="cell_level",
435
- >>> generate_sample_level=True,
436
- >>> cell_type_identifier="cell_label",
437
- >>> sample_identifier="batch",
438
- >>> covariate_obs=["condition"])
439
- >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
440
- >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
441
- """ # noqa: D205
449
+ """
450
+
451
+ Examples:
452
+ >>> import pertpy as pt
453
+ >>> haber_cells = pt.dt.haber_2017_regions()
454
+ >>> sccoda = pt.tl.Sccoda()
455
+ >>> mdata = sccoda.load(haber_cells,
456
+ >>> type="cell_level",
457
+ >>> generate_sample_level=True,
458
+ >>> cell_type_identifier="cell_label",
459
+ >>> sample_identifier="batch",
460
+ >>> covariate_obs=["condition"])
461
+ >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
462
+ >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
463
+ """ # noqa: D205, D212
442
464
  return super().run_nuts(data, modality_key, num_samples, num_warmup, rng_key, copy, *args, **kwargs)
443
465
 
444
466
  run_nuts.__doc__ = CompositionalModel2.run_nuts.__doc__ + run_nuts.__doc__
445
467
 
446
468
  def credible_effects(self, data: AnnData | MuData, modality_key: str = "coda", est_fdr: float = None) -> pd.Series:
447
- """Examples:
448
- >>> import pertpy as pt
449
- >>> haber_cells = pt.dt.haber_2017_regions()
450
- >>> sccoda = pt.tl.Sccoda()
451
- >>> mdata = sccoda.load(haber_cells,
452
- >>> type="cell_level",
453
- >>> generate_sample_level=True,
454
- >>> cell_type_identifier="cell_label",
455
- >>> sample_identifier="batch",
456
- >>> covariate_obs=["condition"])
457
- >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
458
- >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
459
- >>> credible_effects = sccoda.credible_effects(mdata).
460
- """ # noqa: D205
469
+ """
470
+
471
+ Examples:
472
+ >>> import pertpy as pt
473
+ >>> haber_cells = pt.dt.haber_2017_regions()
474
+ >>> sccoda = pt.tl.Sccoda()
475
+ >>> mdata = sccoda.load(haber_cells,
476
+ >>> type="cell_level",
477
+ >>> generate_sample_level=True,
478
+ >>> cell_type_identifier="cell_label",
479
+ >>> sample_identifier="batch",
480
+ >>> covariate_obs=["condition"])
481
+ >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
482
+ >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
483
+ >>> credible_effects = sccoda.credible_effects(mdata).
484
+ """ # noqa: D205, D212
461
485
  return super().credible_effects(data, modality_key, est_fdr)
462
486
 
463
487
  credible_effects.__doc__ = CompositionalModel2.credible_effects.__doc__ + credible_effects.__doc__
464
488
 
465
489
  def summary(self, data: AnnData | MuData, extended: bool = False, modality_key: str = "coda", *args, **kwargs):
466
- """Examples:
467
- >>> import pertpy as pt
468
- >>> haber_cells = pt.dt.haber_2017_regions()
469
- >>> sccoda = pt.tl.Sccoda()
470
- >>> mdata = sccoda.load(haber_cells,
471
- >>> type="cell_level",
472
- >>> generate_sample_level=True,
473
- >>> cell_type_identifier="cell_label",
474
- >>> sample_identifier="batch",
475
- >>> covariate_obs=["condition"])
476
- >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
477
- >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
478
- >>> sccoda.summary(mdata).
479
- """ # noqa: D205
490
+ """
491
+
492
+ Examples:
493
+ >>> import pertpy as pt
494
+ >>> haber_cells = pt.dt.haber_2017_regions()
495
+ >>> sccoda = pt.tl.Sccoda()
496
+ >>> mdata = sccoda.load(haber_cells,
497
+ >>> type="cell_level",
498
+ >>> generate_sample_level=True,
499
+ >>> cell_type_identifier="cell_label",
500
+ >>> sample_identifier="batch",
501
+ >>> covariate_obs=["condition"])
502
+ >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
503
+ >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
504
+ >>> sccoda.summary(mdata).
505
+ """ # noqa: D205, D212
480
506
  return super().summary(data, extended, modality_key, *args, **kwargs)
481
507
 
482
508
  summary.__doc__ = CompositionalModel2.summary.__doc__ + summary.__doc__
483
509
 
484
510
  def set_fdr(self, data: AnnData | MuData, est_fdr: float, modality_key: str = "coda", *args, **kwargs):
485
- """Examples:
486
- >>> import pertpy as pt
487
- >>> haber_cells = pt.dt.haber_2017_regions()
488
- >>> sccoda = pt.tl.Sccoda()
489
- >>> mdata = sccoda.load(haber_cells,
490
- >>> type="cell_level",
491
- >>> generate_sample_level=True,
492
- >>> cell_type_identifier="cell_label",
493
- >>> sample_identifier="batch",
494
- >>> covariate_obs=["condition"])
495
- >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
496
- >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
497
- >>> sccoda.set_fdr(mdata, est_fdr=0.4).
498
- """ # noqa: D205
511
+ """
512
+
513
+ Examples:
514
+ >>> import pertpy as pt
515
+ >>> haber_cells = pt.dt.haber_2017_regions()
516
+ >>> sccoda = pt.tl.Sccoda()
517
+ >>> mdata = sccoda.load(haber_cells,
518
+ >>> type="cell_level",
519
+ >>> generate_sample_level=True,
520
+ >>> cell_type_identifier="cell_label",
521
+ >>> sample_identifier="batch",
522
+ >>> covariate_obs=["condition"])
523
+ >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
524
+ >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
525
+ >>> sccoda.set_fdr(mdata, est_fdr=0.4).
526
+ """ # noqa: D205, D212
499
527
  return super().set_fdr(data, est_fdr, modality_key, *args, **kwargs)
500
528
 
501
529
  set_fdr.__doc__ = CompositionalModel2.set_fdr.__doc__ + set_fdr.__doc__