pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. pertpy/__init__.py +4 -2
  2. pertpy/data/__init__.py +66 -1
  3. pertpy/data/_dataloader.py +28 -26
  4. pertpy/data/_datasets.py +261 -92
  5. pertpy/metadata/__init__.py +6 -0
  6. pertpy/metadata/_cell_line.py +795 -0
  7. pertpy/metadata/_compound.py +128 -0
  8. pertpy/metadata/_drug.py +238 -0
  9. pertpy/metadata/_look_up.py +569 -0
  10. pertpy/metadata/_metadata.py +70 -0
  11. pertpy/metadata/_moa.py +125 -0
  12. pertpy/plot/__init__.py +0 -13
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +89 -6
  15. pertpy/tools/__init__.py +48 -15
  16. pertpy/tools/_augur.py +329 -32
  17. pertpy/tools/_cinemaot.py +145 -6
  18. pertpy/tools/_coda/_base_coda.py +1237 -116
  19. pertpy/tools/_coda/_sccoda.py +66 -36
  20. pertpy/tools/_coda/_tasccoda.py +46 -39
  21. pertpy/tools/_dialogue.py +180 -77
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +29 -24
  32. pertpy/tools/_distances/_distances.py +584 -98
  33. pertpy/tools/_enrichment.py +460 -0
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +406 -49
  36. pertpy/tools/_mixscape.py +677 -55
  37. pertpy/tools/_perturbation_space/_clustering.py +10 -3
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
  41. pertpy/tools/_perturbation_space/_simple.py +52 -11
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +706 -0
  45. pertpy/tools/_scgen/_utils.py +3 -5
  46. pertpy/tools/decoupler_LICENSE +674 -0
  47. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
  48. pertpy-0.8.0.dist-info/RECORD +57 -0
  49. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  50. pertpy/plot/_augur.py +0 -234
  51. pertpy/plot/_cinemaot.py +0 -81
  52. pertpy/plot/_coda.py +0 -1001
  53. pertpy/plot/_dialogue.py +0 -91
  54. pertpy/plot/_guide_rna.py +0 -82
  55. pertpy/plot/_milopy.py +0 -284
  56. pertpy/plot/_mixscape.py +0 -594
  57. pertpy/plot/_scgen.py +0 -337
  58. pertpy/tools/_differential_gene_expression.py +0 -99
  59. pertpy/tools/_metadata/__init__.py +0 -0
  60. pertpy/tools/_metadata/_cell_line.py +0 -613
  61. pertpy/tools/_metadata/_look_up.py +0 -342
  62. pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
  63. pertpy/tools/_scgen/_jax_scgen.py +0 -370
  64. pertpy-0.6.0.dist-info/RECORD +0 -50
  65. /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
  66. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/data/_datasets.py CHANGED
@@ -40,7 +40,7 @@ def papalexi_2021() -> MuData: # pragma: no cover
40
40
  import muon as mu
41
41
 
42
42
  output_file_name = "papalexi_2021.h5mu"
43
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
43
+ output_file_path = settings.datasetdir / output_file_name
44
44
  if not Path(output_file_path).exists():
45
45
  _download(
46
46
  url="https://figshare.com/ndownloader/files/36509460",
@@ -63,7 +63,7 @@ def sc_sim_augur() -> AnnData: # pragma: no cover
63
63
  :class:`~anndata.AnnData` object of a simulated single-cell RNA seq dataset
64
64
  """
65
65
  output_file_name = "sc_sim_augur.h5ad"
66
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
66
+ output_file_path = settings.datasetdir / output_file_name
67
67
  if not Path(output_file_path).exists():
68
68
  _download(
69
69
  url="https://figshare.com/ndownloader/files/31645886",
@@ -93,7 +93,7 @@ def bhattacherjee() -> AnnData: # pragma: no cover
93
93
  :class:`~anndata.AnnData` object of a single-cell RNA seq dataset
94
94
  """
95
95
  output_file_name = "bhattacherjee_rna.h5ad"
96
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
96
+ output_file_path = settings.datasetdir / output_file_name
97
97
  if not Path(output_file_path).exists():
98
98
  _download(
99
99
  url="https://figshare.com/ndownloader/files/34526528",
@@ -110,9 +110,7 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
110
110
  """Raw sciplex3 perturbation dataset curated for perturbation modeling.
111
111
 
112
112
  References:
113
- Srivatsan SR, McFaline-Figueroa JL, Ramani V, Saunders L, Cao J, Packer J,
114
- Pliner HA, Jackson DL, Daza RM, Christiansen L, Zhang F, Steemers F,
115
- Shendure J, Trapnell C. Massively multiplex chemical transcriptomics at
113
+ Srivatsan SR et al., Trapnell C. Massively multiplex chemical transcriptomics at
116
114
  single-cell resolution. Science. 2020 Jan 3;367(6473):45-51.
117
115
  doi: 10.1126/science.aax6234. Epub 2019 Dec 5. PMID: 31806696; PMCID: PMC7289078.
118
116
 
@@ -120,7 +118,7 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
120
118
  :class:`~anndata.AnnData` object of a single-cell RNA seq dataset
121
119
  """
122
120
  output_file_name = "sciplex3.h5ad"
123
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
121
+ output_file_path = settings.datasetdir / output_file_name
124
122
  if not Path(output_file_path).exists():
125
123
  _download(
126
124
  url="https://figshare.com/ndownloader/files/33979517",
@@ -133,8 +131,12 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
133
131
  return adata
134
132
 
135
133
 
136
- def smillie() -> AnnData: # pragma: no cover
137
- """scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
134
+ def tasccoda_example() -> AnnData: # pragma: no cover
135
+ """Example for the coda part of a mudata object.
136
+
137
+ Resulting AnnData object (mudata['coda']) when preparing a dataset for processing with tascCODA.
138
+ Created using the smillie dataset, which comprises scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
139
+ The full dataset containing the actual count data can be obtained via smillie_2019().
138
140
 
139
141
  References:
140
142
  Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.”
@@ -143,8 +145,8 @@ def smillie() -> AnnData: # pragma: no cover
143
145
  Returns:
144
146
  :class:`~anndata.AnnData` object of the dataset.
145
147
  """
146
- output_file_name = "smillie.h5ad"
147
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
148
+ output_file_name = "tasccoda_smillie.h5ad"
149
+ output_file_path = settings.datasetdir / output_file_name
148
150
  if not Path(output_file_path).exists():
149
151
  _download(
150
152
  url="https://figshare.com/ndownloader/files/38648585",
@@ -173,7 +175,7 @@ def frangieh_2021() -> AnnData: # pragma: no cover
173
175
  :class:`~anndata.AnnData` object of the Perturb-CITE-seq data.
174
176
  """
175
177
  output_file_name = "frangieh_2021.h5ad"
176
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
178
+ output_file_path = settings.datasetdir / output_file_name
177
179
  if not Path(output_file_path).exists():
178
180
  _download(
179
181
  url="https://figshare.com/ndownloader/files/34013717",
@@ -202,7 +204,7 @@ def frangieh_2021_raw() -> AnnData: # pragma: no cover
202
204
  :class:`~anndata.AnnData` object of raw Perturb-CITE-seq data.
203
205
  """
204
206
  output_file_name = "frangieh_2021_raw.h5ad"
205
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
207
+ output_file_path = settings.datasetdir / output_file_name
206
208
  if not Path(output_file_path).exists():
207
209
  _download(
208
210
  url="https://figshare.com/ndownloader/files/34012565",
@@ -231,7 +233,7 @@ def dixit_2016_raw() -> AnnData: # pragma: no cover
231
233
  :class:`~anndata.AnnData` object of raw Perturb-seq data.
232
234
  """
233
235
  output_file_name = "dixit_2016_raw.h5ad"
234
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
236
+ output_file_path = settings.datasetdir / output_file_name
235
237
  if not Path(output_file_path).exists():
236
238
  _download(
237
239
  url="https://figshare.com/ndownloader/files/34012565",
@@ -260,7 +262,7 @@ def dixit_2016() -> AnnData: # pragma: no cover
260
262
  :class:`~anndata.AnnData` object of Perturb-seq data
261
263
  """
262
264
  output_file_name = "dixit_2016.h5ad"
263
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
265
+ output_file_path = settings.datasetdir / output_file_name
264
266
  if not Path(output_file_path).exists():
265
267
  _download(
266
268
  url="https://figshare.com/ndownloader/files/34014608",
@@ -288,7 +290,7 @@ def norman_2019() -> AnnData: # pragma: no cover
288
290
  :class:`~anndata.AnnData` object of single-cell pooled CRISPR screening.
289
291
  """
290
292
  output_file_name = "norman_2019.h5ad"
291
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
293
+ output_file_path = settings.datasetdir / output_file_name
292
294
  if not Path(output_file_path).exists():
293
295
  _download(
294
296
  url="https://figshare.com/ndownloader/files/34027562",
@@ -316,7 +318,7 @@ def norman_2019_raw() -> AnnData: # pragma: no cover
316
318
  :class:`~anndata.AnnData` object of raw single-cell pooled CRISPR screening
317
319
  """
318
320
  output_file_name = "norman_2019_raw.h5ad"
319
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
321
+ output_file_path = settings.datasetdir / output_file_name
320
322
  if not Path(output_file_path).exists():
321
323
  _download(
322
324
  url="https://figshare.com/ndownloader/files/34002548",
@@ -339,10 +341,10 @@ def dialogue_example() -> AnnData: # pragma: no cover
339
341
  :class:`~anndata.AnnData` object
340
342
  """
341
343
  output_file_name = "dialogue_example.h5ad"
342
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
344
+ output_file_path = settings.datasetdir / output_file_name
343
345
  if not Path(output_file_path).exists():
344
346
  _download(
345
- url="https://figshare.com/ndownloader/files/34490714",
347
+ url="https://figshare.com/ndownloader/files/43462662",
346
348
  output_file_name=output_file_name,
347
349
  output_path=settings.datasetdir,
348
350
  is_zip=False,
@@ -361,7 +363,7 @@ def distance_example() -> AnnData: # pragma: no cover
361
363
  :class:`~anndata.AnnData` object
362
364
  """
363
365
  output_file_name = "distances_example_data.h5ad"
364
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
366
+ output_file_path = settings.datasetdir / output_file_name
365
367
  if not Path(output_file_path).exists():
366
368
  _download(
367
369
  url="https://figshare.com/ndownloader/files/39561379",
@@ -392,7 +394,7 @@ def kang_2018() -> AnnData: # pragma: no cover
392
394
  :class:`~anndata.AnnData` object of droplet-based single cell RNA-sequencing
393
395
  """
394
396
  output_file_name = "kang_2018.h5ad"
395
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
397
+ output_file_path = settings.datasetdir / output_file_name
396
398
  if not Path(output_file_path).exists():
397
399
  _download(
398
400
  url="https://figshare.com/ndownloader/files/34464122",
@@ -421,7 +423,7 @@ def stephenson_2021_subsampled() -> AnnData: # pragma: no cover
421
423
  :class:`~anndata.AnnData` object of scRNA-seq profiles
422
424
  """
423
425
  output_file_name = "stephenson_2021_subsampled.h5ad"
424
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
426
+ output_file_path = settings.datasetdir / output_file_name
425
427
  if not Path(output_file_path).exists():
426
428
  _download(
427
429
  url="https://figshare.com/ndownloader/files/38171703",
@@ -448,7 +450,7 @@ def haber_2017_regions() -> AnnData: # pragma: no cover
448
450
  :class:`~anndata.AnnData` object
449
451
  """
450
452
  output_file_name = "haber_2017_regions.h5ad"
451
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
453
+ output_file_path = settings.datasetdir / output_file_name
452
454
  if not Path(output_file_path).exists():
453
455
  _download(
454
456
  url="https://figshare.com/ndownloader/files/38169900",
@@ -475,10 +477,10 @@ def adamson_2016_pilot() -> AnnData: # pragma: no cover
475
477
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
476
478
  """
477
479
  output_file_name = "adamson_2016_pilot.h5ad"
478
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
480
+ output_file_path = settings.datasetdir / output_file_name
479
481
  if not Path(output_file_path).exists():
480
482
  _download(
481
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406675_10X001.h5ad?download=1",
483
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406675_10X001.h5ad?download=1",
482
484
  output_file_name=output_file_name,
483
485
  output_path=settings.datasetdir,
484
486
  is_zip=False,
@@ -506,10 +508,10 @@ def adamson_2016_upr_epistasis() -> AnnData: # pragma: no cover
506
508
  :class:`~anndata.AnnData` object of scPerturb preparedsingle-cell perturbation data
507
509
  """
508
510
  output_file_name = "adamson_2016_upr_epistasis.h5ad"
509
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
511
+ output_file_path = settings.datasetdir / output_file_name
510
512
  if not Path(output_file_path).exists():
511
513
  _download(
512
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406677_10X005.h5ad?download=1",
514
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406677_10X005.h5ad?download=1",
513
515
  output_file_name=output_file_name,
514
516
  output_path=settings.datasetdir,
515
517
  is_zip=False,
@@ -535,10 +537,10 @@ def adamson_2016_upr_perturb_seq() -> AnnData: # pragma: no cover
535
537
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
536
538
  """
537
539
  output_file_name = "adamson_2016_upr_perturb_seq.h5ad"
538
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
540
+ output_file_path = settings.datasetdir / output_file_name
539
541
  if not Path(output_file_path).exists():
540
542
  _download(
541
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406681_10X010.h5ad?download=1",
543
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406681_10X010.h5ad?download=1",
542
544
  output_file_name=output_file_name,
543
545
  output_path=settings.datasetdir,
544
546
  is_zip=False,
@@ -562,10 +564,10 @@ def aissa_2021() -> AnnData: # pragma: no cover
562
564
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
563
565
  """
564
566
  output_file_name = "aissa_2021.h5ad"
565
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
567
+ output_file_path = settings.datasetdir / output_file_name
566
568
  if not Path(output_file_path).exists():
567
569
  _download(
568
- url="https://zenodo.org/record/7278143/files/AissaBenevolenskaya2021.h5ad?download=1",
570
+ url="https://zenodo.org/record/10044268/files/AissaBenevolenskaya2021.h5ad?download=1",
569
571
  output_file_name=output_file_name,
570
572
  output_path=settings.datasetdir,
571
573
  is_zip=False,
@@ -591,10 +593,10 @@ def chang_2021() -> AnnData: # pragma: no cover
591
593
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
592
594
  """
593
595
  output_file_name = "chang_2021.h5ad"
594
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
596
+ output_file_path = settings.datasetdir / output_file_name
595
597
  if not Path(output_file_path).exists():
596
598
  _download(
597
- url="https://zenodo.org/record/7278143/files/ChangYe2021.h5ad?download=1",
599
+ url="https://zenodo.org/record/10044268/files/ChangYe2021.h5ad?download=1",
598
600
  output_file_name=output_file_name,
599
601
  output_path=settings.datasetdir,
600
602
  is_zip=False,
@@ -622,10 +624,10 @@ def datlinger_2017() -> AnnData: # pragma: no cover
622
624
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
623
625
  """
624
626
  output_file_name = "datlinger_2017.h5ad"
625
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
627
+ output_file_path = settings.datasetdir / output_file_name
626
628
  if not Path(output_file_path).exists():
627
629
  _download(
628
- url="https://zenodo.org/record/7278143/files/DatlingerBock2017.h5ad?download=1",
630
+ url="https://zenodo.org/record/10044268/files/DatlingerBock2017.h5ad?download=1",
629
631
  output_file_name=output_file_name,
630
632
  output_path=settings.datasetdir,
631
633
  is_zip=False,
@@ -652,10 +654,10 @@ def datlinger_2021() -> AnnData: # pragma: no cover
652
654
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
653
655
  """
654
656
  output_file_name = "datlinger_2021.h5ad"
655
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
657
+ output_file_path = settings.datasetdir / output_file_name
656
658
  if not Path(output_file_path).exists():
657
659
  _download(
658
- url="https://zenodo.org/record/7278143/files/DatlingerBock2021.h5ad?download=1",
660
+ url="https://zenodo.org/record/10044268/files/DatlingerBock2021.h5ad?download=1",
659
661
  output_file_name=output_file_name,
660
662
  output_path=settings.datasetdir,
661
663
  is_zip=False,
@@ -683,10 +685,10 @@ def frangieh_2021_protein() -> AnnData: # pragma: no cover
683
685
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
684
686
  """
685
687
  output_file_name = "frangieh_2021_protein.h5ad"
686
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
688
+ output_file_path = settings.datasetdir / output_file_name
687
689
  if not Path(output_file_path).exists():
688
690
  _download(
689
- url="https://zenodo.org/record/7278143/files/FrangiehIzar2021_protein.h5ad?download=1",
691
+ url="https://zenodo.org/record/10044268/files/FrangiehIzar2021_protein.h5ad?download=1",
690
692
  output_file_name=output_file_name,
691
693
  output_path=settings.datasetdir,
692
694
  is_zip=False,
@@ -714,10 +716,10 @@ def frangieh_2021_rna() -> AnnData: # pragma: no cover
714
716
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
715
717
  """
716
718
  output_file_name = "frangieh_2021_rna.h5ad"
717
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
719
+ output_file_path = settings.datasetdir / output_file_name
718
720
  if not Path(output_file_path).exists():
719
721
  _download(
720
- url="https://zenodo.org/record/7278143/files/FrangiehIzar2021_RNA.h5ad?download=1",
722
+ url="https://zenodo.org/record/10044268/files/FrangiehIzar2021_RNA.h5ad?download=1",
721
723
  output_file_name=output_file_name,
722
724
  output_path=settings.datasetdir,
723
725
  is_zip=False,
@@ -742,10 +744,10 @@ def gasperini_2019_atscale() -> AnnData: # pragma: no cover
742
744
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
743
745
  """
744
746
  output_file_name = "gasperini_2019_atscale.h5ad"
745
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
747
+ output_file_path = settings.datasetdir / output_file_name
746
748
  if not Path(output_file_path).exists():
747
749
  _download(
748
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_atscale.h5ad?download=1",
750
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_atscale.h5ad?download=1",
749
751
  output_file_name=output_file_name,
750
752
  output_path=settings.datasetdir,
751
753
  is_zip=False,
@@ -771,10 +773,10 @@ def gasperini_2019_highmoi() -> AnnData: # pragma: no cover
771
773
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
772
774
  """
773
775
  output_file_name = "gasperini_2019_highmoi.h5ad"
774
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
776
+ output_file_path = settings.datasetdir / output_file_name
775
777
  if not Path(output_file_path).exists():
776
778
  _download(
777
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_highMOI.h5ad?download=1",
779
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_highMOI.h5ad?download=1",
778
780
  output_file_name=output_file_name,
779
781
  output_path=settings.datasetdir,
780
782
  is_zip=False,
@@ -800,10 +802,10 @@ def gasperini_2019_lowmoi() -> AnnData: # pragma: no cover
800
802
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
801
803
  """
802
804
  output_file_name = "gasperini_2019_lowmoi.h5ad"
803
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
805
+ output_file_path = settings.datasetdir / output_file_name
804
806
  if not Path(output_file_path).exists():
805
807
  _download(
806
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_lowMOI.h5ad?download=1",
808
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_lowMOI.h5ad?download=1",
807
809
  output_file_name=output_file_name,
808
810
  output_path=settings.datasetdir,
809
811
  is_zip=False,
@@ -828,10 +830,10 @@ def gehring_2019() -> AnnData: # pragma: no cover
828
830
  :class:`~anndata.AnnData` object of a scPerturb prepared single-cell dataset
829
831
  """
830
832
  output_file_name = "gehring_2019.h5ad"
831
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
833
+ output_file_path = settings.datasetdir / output_file_name
832
834
  if not Path(output_file_path).exists():
833
835
  _download(
834
- url="https://zenodo.org/record/7278143/files/GehringPachter2019.h5ad?download=1",
836
+ url="https://zenodo.org/record/10044268/files/GehringPachter2019.h5ad?download=1",
835
837
  output_file_name=output_file_name,
836
838
  output_path=settings.datasetdir,
837
839
  is_zip=False,
@@ -857,10 +859,10 @@ def mcfarland_2020() -> AnnData: # pragma: no cover
857
859
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
858
860
  """
859
861
  output_file_name = "mcfarland_2020.h5ad"
860
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
862
+ output_file_path = settings.datasetdir / output_file_name
861
863
  if not Path(output_file_path).exists():
862
864
  _download(
863
- url="https://zenodo.org/record/7278143/files/McFarlandTsherniak2020.h5ad?download=1",
865
+ url="https://zenodo.org/record/10044268/files/McFarlandTsherniak2020.h5ad?download=1",
864
866
  output_file_name=output_file_name,
865
867
  output_path=settings.datasetdir,
866
868
  is_zip=False,
@@ -886,10 +888,10 @@ def replogle_2022_k562_essential() -> AnnData: # pragma: no cover
886
888
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
887
889
  """
888
890
  output_file_name = "replogle_2022_k562_essential.h5ad"
889
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
891
+ output_file_path = settings.datasetdir / output_file_name
890
892
  if not Path(output_file_path).exists():
891
893
  _download(
892
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_K562_essential.h5ad?download=1",
894
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_K562_essential.h5ad?download=1",
893
895
  output_file_name=output_file_name,
894
896
  output_path=settings.datasetdir,
895
897
  is_zip=False,
@@ -900,7 +902,7 @@ def replogle_2022_k562_essential() -> AnnData: # pragma: no cover
900
902
 
901
903
 
902
904
  def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
903
- """K562 cells transduced with CRISPRi (day 8 after transcduction).
905
+ """K562 cells transduced with CRISPRi (day 8 after transduction).
904
906
 
905
907
  Here, the authors used a compact, multiplexed CRISPR interference (CRISPRi) library
906
908
  to assay thousands of loss-of-function genetic perturbations with single-cell RNA sequencing
@@ -917,10 +919,10 @@ def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
917
919
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
918
920
  """
919
921
  output_file_name = "replogle_2022_k562_gwps.h5ad"
920
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
922
+ output_file_path = settings.datasetdir / output_file_name
921
923
  if not Path(output_file_path).exists():
922
924
  _download(
923
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_K562_gwps.h5ad?download=1",
925
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_K562_gwps.h5ad?download=1",
924
926
  output_file_name=output_file_name,
925
927
  output_path=settings.datasetdir,
926
928
  is_zip=False,
@@ -931,7 +933,7 @@ def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
931
933
 
932
934
 
933
935
  def replogle_2022_rpe1() -> AnnData: # pragma: no cover
934
- """RPE1 cells transduced with CRISPRi (day 7 after transcduction).
936
+ """RPE1 cells transduced with CRISPRi (day 7 after transduction).
935
937
 
936
938
  For day 7 essential-scale Perturb-seq experiment in retinal pigment epithelial (RPE1)
937
939
  cell lines, library lentivirus was packaged into lentivirus in 293T cells and
@@ -947,10 +949,10 @@ def replogle_2022_rpe1() -> AnnData: # pragma: no cover
947
949
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
948
950
  """
949
951
  output_file_name = "replogle_2022_rpe1.h5ad"
950
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
952
+ output_file_path = settings.datasetdir / output_file_name
951
953
  if not Path(output_file_path).exists():
952
954
  _download(
953
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_rpe1.h5ad?download=1",
955
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_rpe1.h5ad?download=1",
954
956
  output_file_name=output_file_name,
955
957
  output_path=settings.datasetdir,
956
958
  is_zip=False,
@@ -978,10 +980,10 @@ def schiebinger_2019_16day() -> AnnData: # pragma: no cover
978
980
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
979
981
  """
980
982
  output_file_name = "schiebinger_2019_16day.h5ad"
981
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
983
+ output_file_path = settings.datasetdir / output_file_name
982
984
  if not Path(output_file_path).exists():
983
985
  _download(
984
- url="https://zenodo.org/record/7278143/files/SchiebingerLander2019_GSE106340.h5ad?download=1",
986
+ url="https://zenodo.org/record/10044268/files/SchiebingerLander2019_GSE106340.h5ad?download=1",
985
987
  output_file_name=output_file_name,
986
988
  output_path=settings.datasetdir,
987
989
  is_zip=False,
@@ -1007,10 +1009,10 @@ def schiebinger_2019_18day() -> AnnData: # pragma: no cover
1007
1009
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1008
1010
  """
1009
1011
  output_file_name = "Schiebinger_2019_18day.h5ad"
1010
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1012
+ output_file_path = settings.datasetdir / output_file_name
1011
1013
  if not Path(output_file_path).exists():
1012
1014
  _download(
1013
- url="https://zenodo.org/record/7278143/files/SchiebingerLander2019_GSE115943.h5ad?download=1",
1015
+ url="https://zenodo.org/record/10044268/files/SchiebingerLander2019_GSE115943.h5ad?download=1",
1014
1016
  output_file_name=output_file_name,
1015
1017
  output_path=settings.datasetdir,
1016
1018
  is_zip=False,
@@ -1036,10 +1038,10 @@ def schraivogel_2020_tap_screen_chr11() -> AnnData: # pragma: no cover
1036
1038
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1037
1039
  """
1038
1040
  output_file_name = "schraivogel_2020_tap_screen_chr11.h5ad"
1039
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1041
+ output_file_path = settings.datasetdir / output_file_name
1040
1042
  if not Path(output_file_path).exists():
1041
1043
  _download(
1042
- url="https://zenodo.org/record/7278143/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_11_screen.h5ad?download=1",
1044
+ url="https://zenodo.org/record/10044268/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_11_screen.h5ad?download=1",
1043
1045
  output_file_name=output_file_name,
1044
1046
  output_path=settings.datasetdir,
1045
1047
  is_zip=False,
@@ -1065,10 +1067,10 @@ def schraivogel_2020_tap_screen_chr8() -> AnnData: # pragma: no cover
1065
1067
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1066
1068
  """
1067
1069
  output_file_name = "schraivogel_2020_tap_screen_chr8.h5ad"
1068
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1070
+ output_file_path = settings.datasetdir / output_file_name
1069
1071
  if not Path(output_file_path).exists():
1070
1072
  _download(
1071
- url="https://zenodo.org/record/7278143/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_8_screen.h5ad?download=1",
1073
+ url="https://zenodo.org/record/10044268/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_8_screen.h5ad?download=1",
1072
1074
  output_file_name=output_file_name,
1073
1075
  output_path=settings.datasetdir,
1074
1076
  is_zip=False,
@@ -1095,10 +1097,10 @@ def shifrut_2018() -> AnnData: # pragma: no cover
1095
1097
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1096
1098
  """
1097
1099
  output_file_name = "shifrut_2018.h5ad"
1098
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1100
+ output_file_path = settings.datasetdir / output_file_name
1099
1101
  if not Path(output_file_path).exists():
1100
1102
  _download(
1101
- url="https://zenodo.org/record/7278143/files/ShifrutMarson2018.h5ad?download=1",
1103
+ url="https://zenodo.org/record/10044268/files/ShifrutMarson2018.h5ad?download=1",
1102
1104
  output_file_name=output_file_name,
1103
1105
  output_path=settings.datasetdir,
1104
1106
  is_zip=False,
@@ -1126,10 +1128,10 @@ def srivatsan_2020_sciplex2() -> AnnData: # pragma: no cover
1126
1128
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1127
1129
  """
1128
1130
  output_file_name = "srivatsan_2020_sciplex2.h5ad"
1129
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1131
+ output_file_path = settings.datasetdir / output_file_name
1130
1132
  if not Path(output_file_path).exists():
1131
1133
  _download(
1132
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex2.h5ad?download=1",
1134
+ url="https://zenodo.org/record/10044268/files/SrivatsanTrapnell2020_sciplex2.h5ad?download=1",
1133
1135
  output_file_name=output_file_name,
1134
1136
  output_path=settings.datasetdir,
1135
1137
  is_zip=False,
@@ -1155,10 +1157,10 @@ def srivatsan_2020_sciplex3() -> AnnData: # pragma: no cover
1155
1157
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1156
1158
  """
1157
1159
  output_file_name = "srivatsan_2020_sciplex3.h5ad"
1158
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1160
+ output_file_path = settings.datasetdir / output_file_name
1159
1161
  if not Path(output_file_path).exists():
1160
1162
  _download(
1161
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex3.h5ad?download=1",
1163
+ url="https://zenodo.org/records/10044268/files/SrivatsanTrapnell2020_sciplex3.h5ad?download=1",
1162
1164
  output_file_name=output_file_name,
1163
1165
  output_path=settings.datasetdir,
1164
1166
  is_zip=False,
@@ -1187,10 +1189,10 @@ def srivatsan_2020_sciplex4() -> AnnData: # pragma: no cover
1187
1189
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1188
1190
  """
1189
1191
  output_file_name = "srivatsan_2020_sciplex4.h5ad"
1190
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1192
+ output_file_path = settings.datasetdir / output_file_name
1191
1193
  if not Path(output_file_path).exists():
1192
1194
  _download(
1193
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex4.h5ad?download=1",
1195
+ url="https://zenodo.org/records/10044268/files/SrivatsanTrapnell2020_sciplex4.h5ad?download=1",
1194
1196
  output_file_name=output_file_name,
1195
1197
  output_path=settings.datasetdir,
1196
1198
  is_zip=False,
@@ -1219,10 +1221,10 @@ def tian_2019_day7neuron() -> AnnData: # pragma: no cover
1219
1221
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1220
1222
  """
1221
1223
  output_file_name = "tian_2019_day7neuron.h5ad"
1222
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1224
+ output_file_path = settings.datasetdir / output_file_name
1223
1225
  if not Path(output_file_path).exists():
1224
1226
  _download(
1225
- url="https://zenodo.org/record/7278143/files/TianKampmann2019_day7neuron.h5ad?download=1",
1227
+ url="https://zenodo.org/records/10044268/files/TianKampmann2019_day7neuron.h5ad?download=1",
1226
1228
  output_file_name=output_file_name,
1227
1229
  output_path=settings.datasetdir,
1228
1230
  is_zip=False,
@@ -1251,10 +1253,10 @@ def tian_2019_ipsc() -> AnnData: # pragma: no cover
1251
1253
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1252
1254
  """
1253
1255
  output_file_name = "tian_2019_iPSC.h5ad"
1254
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1256
+ output_file_path = settings.datasetdir / output_file_name
1255
1257
  if not Path(output_file_path).exists():
1256
1258
  _download(
1257
- url="https://zenodo.org/record/7278143/files/TianKampmann2019_iPSC.h5ad?download=1",
1259
+ url="https://zenodo.org/records/10044268/files/TianKampmann2019_iPSC.h5ad?download=1",
1258
1260
  output_file_name=output_file_name,
1259
1261
  output_path=settings.datasetdir,
1260
1262
  is_zip=False,
@@ -1281,10 +1283,10 @@ def tian_2021_crispra() -> AnnData: # pragma: no cover
1281
1283
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1282
1284
  """
1283
1285
  output_file_name = "tian_2021_crispra.h5ad"
1284
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1286
+ output_file_path = settings.datasetdir / output_file_name
1285
1287
  if not Path(output_file_path).exists():
1286
1288
  _download(
1287
- url="https://zenodo.org/record/7278143/files/TianKampmann2021_CRISPRa.h5ad?download=1",
1289
+ url="https://zenodo.org/records/10044268/files/TianKampmann2021_CRISPRa.h5ad?download=1",
1288
1290
  output_file_name=output_file_name,
1289
1291
  output_path=settings.datasetdir,
1290
1292
  is_zip=False,
@@ -1311,10 +1313,10 @@ def tian_2021_crispri() -> AnnData: # pragma: no cover
1311
1313
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1312
1314
  """
1313
1315
  output_file_name = "tian_2021_crispri.h5ad"
1314
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1316
+ output_file_path = settings.datasetdir / output_file_name
1315
1317
  if not Path(output_file_path).exists():
1316
1318
  _download(
1317
- url="https://zenodo.org/record/7278143/files/TianKampmann2021_CRISPRi.h5ad?download=1",
1319
+ url="https://zenodo.org/records/10044268/files/TianKampmann2021_CRISPRi.h5ad?download=1",
1318
1320
  output_file_name=output_file_name,
1319
1321
  output_path=settings.datasetdir,
1320
1322
  is_zip=False,
@@ -1338,10 +1340,10 @@ def weinreb_2020() -> AnnData: # pragma: no cover
1338
1340
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1339
1341
  """
1340
1342
  output_file_name = "weinreb_2020.h5ad"
1341
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1343
+ output_file_path = settings.datasetdir / output_file_name
1342
1344
  if not Path(output_file_path).exists():
1343
1345
  _download(
1344
- url="https://zenodo.org/record/7278143/files/WeinrebKlein2020.h5ad?download=1",
1346
+ url="https://zenodo.org/records/10044268/files/WeinrebKlein2020.h5ad?download=1",
1345
1347
  output_file_name=output_file_name,
1346
1348
  output_path=settings.datasetdir,
1347
1349
  is_zip=False,
@@ -1366,10 +1368,10 @@ def xie_2017() -> AnnData: # pragma: no cover
1366
1368
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1367
1369
  """
1368
1370
  output_file_name = "xie_2017.h5ad"
1369
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1371
+ output_file_path = settings.datasetdir / output_file_name
1370
1372
  if not Path(output_file_path).exists():
1371
1373
  _download(
1372
- url="https://zenodo.org/record/7278143/files/XieHon2017.h5ad?download=1",
1374
+ url="https://zenodo.org/records/10044268/files/XieHon2017.h5ad?download=1",
1373
1375
  output_file_name=output_file_name,
1374
1376
  output_path=settings.datasetdir,
1375
1377
  is_zip=False,
@@ -1396,10 +1398,10 @@ def zhao_2021() -> AnnData: # pragma: no cover
1396
1398
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1397
1399
  """
1398
1400
  output_file_name = "zhaoSims2021.h5ad"
1399
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1401
+ output_file_path = settings.datasetdir / output_file_name
1400
1402
  if not Path(output_file_path).exists():
1401
1403
  _download(
1402
- url="https://zenodo.org/record/7278143/files/ZhaoSims2021.h5ad?download=1",
1404
+ url="https://zenodo.org/records/10044268/files/ZhaoSims2021.h5ad?download=1",
1403
1405
  output_file_name=output_file_name,
1404
1406
  output_path=settings.datasetdir,
1405
1407
  is_zip=False,
@@ -1410,15 +1412,18 @@ def zhao_2021() -> AnnData: # pragma: no cover
1410
1412
 
1411
1413
 
1412
1414
  def cinemaot_example() -> AnnData: # pragma: no cover:
1413
- """CINEMA-OT Example dataset.
1415
+ """Subsampled CINEMA-OT example dataset.
1416
+
1417
+ Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon. This is a subsampled
1418
+ dataset containing 1000 cells, either without stimulation or stimulated with IFNb. The full dataset is available
1419
+ via the cinemaot_full() function.
1414
1420
 
1415
- Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon.
1416
1421
 
1417
1422
  Returns:
1418
1423
  :class:`~anndata.AnnData` object of PBMCs stimulated with interferon.
1419
1424
  """
1420
1425
  output_file_name = "cinemaot_example.h5ad"
1421
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1426
+ output_file_path = settings.datasetdir / output_file_name
1422
1427
  if not Path(output_file_path).exists():
1423
1428
  _download(
1424
1429
  url="https://figshare.com/ndownloader/files/42362796?private_link=270b0d2c7f1ea57c366d",
@@ -1429,3 +1434,167 @@ def cinemaot_example() -> AnnData: # pragma: no cover:
1429
1434
  adata = sc.read_h5ad(output_file_path)
1430
1435
 
1431
1436
  return adata
1437
+
1438
+
1439
+ def dong_2023() -> AnnData: # pragma: no cover
1440
+ """Complete CINEMA-OT dataset.
1441
+
1442
+ Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon. This is the full dataset
1443
+ containing 9209 cells that were stimulated with IFNb, IFNg, IFNb+IFNg, or left unstimulated. A subsampled version
1444
+ of the dataset is available via cinemaot_example().
1445
+
1446
+ References:
1447
+ Preprint: https://doi.org/10.1101/2022.07.31.502173
1448
+ Dataset available here: https://datadryad.org/stash/dataset/doi:10.5061/dryad.4xgxd25g1
1449
+
1450
+ Returns:
1451
+ :class:`~anndata.AnnData` object of PBMCs stimulated with interferon.
1452
+ """
1453
+ output_file_name = "dong_2023.h5ad"
1454
+ output_file_path = settings.datasetdir / output_file_name
1455
+ if not Path(output_file_path).exists():
1456
+ _download(
1457
+ url="https://figshare.com/ndownloader/files/43068190",
1458
+ output_file_name=output_file_name,
1459
+ output_path=settings.datasetdir,
1460
+ is_zip=False,
1461
+ )
1462
+ adata = sc.read_h5ad(output_file_path)
1463
+
1464
+ return adata
1465
+
1466
+
1467
+ def smillie_2019() -> AnnData: # pragma: no cover
1468
+ """scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
1469
+
1470
+ The resulting AnnData when preparing this dataset for processing with tascCODA is available via tasccoda_example().
1471
+
1472
+ References:
1473
+ Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.”
1474
+ Cell vol. 178,3 (2019): 714-730.e22. doi:10.1016/j.cell.2019.06.029
1475
+
1476
+ Returns:
1477
+ :class:`~anndata.AnnData` object of the dataset.
1478
+ """
1479
+ output_file_name = "smillie_2019.h5ad.zip"
1480
+ output_file_path = settings.datasetdir / Path(output_file_name).with_suffix("")
1481
+ if not Path(output_file_path).exists():
1482
+ _download(
1483
+ url="https://figshare.com/ndownloader/files/43317285",
1484
+ output_file_name=output_file_name,
1485
+ output_path=settings.datasetdir,
1486
+ is_zip=True,
1487
+ )
1488
+ adata = sc.read_h5ad(output_file_path)
1489
+
1490
+ return adata
1491
+
1492
+
1493
+ def combosciplex() -> AnnData: # pragma: no cover
1494
+ """scRNA-seq subset of the combinatorial experiment of sciplex3.
1495
+
1496
+ References:
1497
+ Srivatsan SR et al., Trapnell C. Massively multiplex chemical transcriptomics at
1498
+ single-cell resolution. Science. 2020 Jan 3;367(6473):45-51.
1499
+ doi: 10.1126/science.aax6234. Epub 2019 Dec 5. PMID: 31806696; PMCID: PMC7289078.
1500
+
1501
+ Returns:
1502
+ :class:`~anndata.AnnData` object of the dataset.
1503
+ """
1504
+ output_file_name = "combosciplex.h5ad"
1505
+ output_file_path = settings.datasetdir / output_file_name
1506
+ if not Path(output_file_path).exists():
1507
+ _download(
1508
+ url="https://figshare.com/ndownloader/files/44229635",
1509
+ output_file_name=output_file_name,
1510
+ output_path=settings.datasetdir,
1511
+ is_zip=False,
1512
+ )
1513
+ adata = sc.read_h5ad(output_file_path)
1514
+
1515
+ return adata
1516
+
1517
+
1518
+ def sciplex_gxe1() -> AnnData: # pragma: no cover
1519
+ """sci-Plex-GxE combined chemical and genetic profiling of A172 dCas9-KRAB cells
1520
+ genetically perturbed for HPRT1 or mismtach repair genes exposed to 6-thioguanine and temozolomide,
1521
+ respectively, and A172 dCas9-SunTag cells genetically perturbed for HPRT1 exposed to 6-thioguanine.
1522
+
1523
+ References:
1524
+ McFaline-Figueroa JL et al., Trapnell C. Multiplex single-cell chemical genomics reveals
1525
+ the kinase dependence of the response to targeted therapy. Cell Genomics. 2024 Volume 4, Issue 2.
1526
+ doi: 10.1016/j.xgen.2023.100487
1527
+
1528
+ Returns:
1529
+ :class:`~anndata.AnnData` object of the dataset.
1530
+ """
1531
+ output_file_name = "sciPlexGxE_1_GSM7056148.h5ad"
1532
+ output_file_path = settings.datasetdir / output_file_name
1533
+ if not Path(output_file_path).exists():
1534
+ _download(
1535
+ url="https://figshare.com/ndownloader/files/45372454",
1536
+ output_file_name=output_file_name,
1537
+ output_path=settings.datasetdir,
1538
+ is_zip=False,
1539
+ )
1540
+ adata = sc.read_h5ad(output_file_path)
1541
+
1542
+ return adata
1543
+
1544
+
1545
+ def zhang_2021() -> AnnData: # pragma: no cover
1546
+ """Single-cell RNA-seq of TNBC patients' immune cells exposed to paclitaxel alone or combined with the anti-PD-L1 atezolizumab.
1547
+
1548
+ This analysis, involving 22 patients, identifies immune subtypes predictive of therapeutic
1549
+ responses and underscores potential limitations of combining paclitaxel with atezolizumab in treatment protocols.
1550
+
1551
+ The script that generated this specific AnnData object:
1552
+ https://github.com/tessadgreen/ThesisCode/blob/main/Chapter3/drug_response/import_zhang_data.ipynb
1553
+
1554
+ This dataset does not contain the single-cell ATAC-seq data that was also measured for the paper.
1555
+
1556
+ References:
1557
+ Zhang Y et al., Liu Z. Single-cell analyses reveal key immune cell subsets associated with response to PD-L1 blockade in triple-negative breast cancer.
1558
+ Cancer Cell. 2021 Volume 39, Issue 12. doi: https://doi.org/10.1016/j.ccell.2021.09.010
1559
+
1560
+ Returns:
1561
+ :class:`~anndata.AnnData` object of the dataset.
1562
+ """
1563
+ output_file_name = "zhang_2021.h5ad"
1564
+ output_file_path = settings.datasetdir / output_file_name
1565
+ if not Path(output_file_path).exists():
1566
+ _download(
1567
+ url="https://figshare.com/ndownloader/files/46457872",
1568
+ output_file_name=output_file_name,
1569
+ output_path=settings.datasetdir,
1570
+ is_zip=False,
1571
+ )
1572
+ adata = sc.read_h5ad(output_file_path)
1573
+
1574
+ return adata
1575
+
1576
+
1577
+ def hagai_2018() -> AnnData: # pragma: no cover
1578
+ """Cross-species analysis of primary dermal fibroblasts and bone marrow-derived phagocytes, stimulated with dsRNA and IFNB.
1579
+
1580
+ The study explores immune response variations across humans, macaques, mice, and rats.
1581
+
1582
+ Referenences:
1583
+ Hagai, T., Chen, X., Miragaia, R.J. et al. Gene expression variability across cells and species shapes innate immunity.
1584
+ Nature 563, 197–202 (2018). https://doi.org/10.1038/s41586-018-0657-2
1585
+
1586
+ Returns:
1587
+ :class:`~anndata.AnnData` object of the dataset.
1588
+ """
1589
+ output_file_name = "hagai_2018.h5ad"
1590
+ output_file_path = settings.datasetdir / output_file_name
1591
+ if not Path(output_file_path).exists():
1592
+ _download(
1593
+ url="https://figshare.com/ndownloader/files/46978846",
1594
+ output_file_name=output_file_name,
1595
+ output_path=settings.datasetdir,
1596
+ is_zip=False,
1597
+ )
1598
+ adata = sc.read_h5ad(output_file_path)
1599
+
1600
+ return adata