pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. pertpy/__init__.py +4 -2
  2. pertpy/data/__init__.py +66 -1
  3. pertpy/data/_dataloader.py +28 -26
  4. pertpy/data/_datasets.py +261 -92
  5. pertpy/metadata/__init__.py +6 -0
  6. pertpy/metadata/_cell_line.py +795 -0
  7. pertpy/metadata/_compound.py +128 -0
  8. pertpy/metadata/_drug.py +238 -0
  9. pertpy/metadata/_look_up.py +569 -0
  10. pertpy/metadata/_metadata.py +70 -0
  11. pertpy/metadata/_moa.py +125 -0
  12. pertpy/plot/__init__.py +0 -13
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +89 -6
  15. pertpy/tools/__init__.py +48 -15
  16. pertpy/tools/_augur.py +329 -32
  17. pertpy/tools/_cinemaot.py +145 -6
  18. pertpy/tools/_coda/_base_coda.py +1237 -116
  19. pertpy/tools/_coda/_sccoda.py +66 -36
  20. pertpy/tools/_coda/_tasccoda.py +46 -39
  21. pertpy/tools/_dialogue.py +180 -77
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +29 -24
  32. pertpy/tools/_distances/_distances.py +584 -98
  33. pertpy/tools/_enrichment.py +460 -0
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +406 -49
  36. pertpy/tools/_mixscape.py +677 -55
  37. pertpy/tools/_perturbation_space/_clustering.py +10 -3
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
  41. pertpy/tools/_perturbation_space/_simple.py +52 -11
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +706 -0
  45. pertpy/tools/_scgen/_utils.py +3 -5
  46. pertpy/tools/decoupler_LICENSE +674 -0
  47. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
  48. pertpy-0.8.0.dist-info/RECORD +57 -0
  49. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  50. pertpy/plot/_augur.py +0 -234
  51. pertpy/plot/_cinemaot.py +0 -81
  52. pertpy/plot/_coda.py +0 -1001
  53. pertpy/plot/_dialogue.py +0 -91
  54. pertpy/plot/_guide_rna.py +0 -82
  55. pertpy/plot/_milopy.py +0 -284
  56. pertpy/plot/_mixscape.py +0 -594
  57. pertpy/plot/_scgen.py +0 -337
  58. pertpy/tools/_differential_gene_expression.py +0 -99
  59. pertpy/tools/_metadata/__init__.py +0 -0
  60. pertpy/tools/_metadata/_cell_line.py +0 -613
  61. pertpy/tools/_metadata/_look_up.py +0 -342
  62. pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
  63. pertpy/tools/_scgen/_jax_scgen.py +0 -370
  64. pertpy-0.6.0.dist-info/RECORD +0 -50
  65. /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
  66. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/data/_datasets.py CHANGED
@@ -40,7 +40,7 @@ def papalexi_2021() -> MuData: # pragma: no cover
40
40
  import muon as mu
41
41
 
42
42
  output_file_name = "papalexi_2021.h5mu"
43
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
43
+ output_file_path = settings.datasetdir / output_file_name
44
44
  if not Path(output_file_path).exists():
45
45
  _download(
46
46
  url="https://figshare.com/ndownloader/files/36509460",
@@ -63,7 +63,7 @@ def sc_sim_augur() -> AnnData: # pragma: no cover
63
63
  :class:`~anndata.AnnData` object of a simulated single-cell RNA seq dataset
64
64
  """
65
65
  output_file_name = "sc_sim_augur.h5ad"
66
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
66
+ output_file_path = settings.datasetdir / output_file_name
67
67
  if not Path(output_file_path).exists():
68
68
  _download(
69
69
  url="https://figshare.com/ndownloader/files/31645886",
@@ -93,7 +93,7 @@ def bhattacherjee() -> AnnData: # pragma: no cover
93
93
  :class:`~anndata.AnnData` object of a single-cell RNA seq dataset
94
94
  """
95
95
  output_file_name = "bhattacherjee_rna.h5ad"
96
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
96
+ output_file_path = settings.datasetdir / output_file_name
97
97
  if not Path(output_file_path).exists():
98
98
  _download(
99
99
  url="https://figshare.com/ndownloader/files/34526528",
@@ -110,9 +110,7 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
110
110
  """Raw sciplex3 perturbation dataset curated for perturbation modeling.
111
111
 
112
112
  References:
113
- Srivatsan SR, McFaline-Figueroa JL, Ramani V, Saunders L, Cao J, Packer J,
114
- Pliner HA, Jackson DL, Daza RM, Christiansen L, Zhang F, Steemers F,
115
- Shendure J, Trapnell C. Massively multiplex chemical transcriptomics at
113
+ Srivatsan SR et al., Trapnell C. Massively multiplex chemical transcriptomics at
116
114
  single-cell resolution. Science. 2020 Jan 3;367(6473):45-51.
117
115
  doi: 10.1126/science.aax6234. Epub 2019 Dec 5. PMID: 31806696; PMCID: PMC7289078.
118
116
 
@@ -120,7 +118,7 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
120
118
  :class:`~anndata.AnnData` object of a single-cell RNA seq dataset
121
119
  """
122
120
  output_file_name = "sciplex3.h5ad"
123
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
121
+ output_file_path = settings.datasetdir / output_file_name
124
122
  if not Path(output_file_path).exists():
125
123
  _download(
126
124
  url="https://figshare.com/ndownloader/files/33979517",
@@ -133,8 +131,12 @@ def sciplex3_raw() -> AnnData: # pragma: no cover
133
131
  return adata
134
132
 
135
133
 
136
- def smillie() -> AnnData: # pragma: no cover
137
- """scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
134
+ def tasccoda_example() -> AnnData: # pragma: no cover
135
+ """Example for the coda part of a mudata object.
136
+
137
+ Resulting AnnData object (mudata['coda']) when preparing a dataset for processing with tascCODA.
138
+ Created using the smillie dataset, which comprises scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
139
+ The full dataset containing the actual count data can be obtained via smillie_2019().
138
140
 
139
141
  References:
140
142
  Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.”
@@ -143,8 +145,8 @@ def smillie() -> AnnData: # pragma: no cover
143
145
  Returns:
144
146
  :class:`~anndata.AnnData` object of the dataset.
145
147
  """
146
- output_file_name = "smillie.h5ad"
147
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
148
+ output_file_name = "tasccoda_smillie.h5ad"
149
+ output_file_path = settings.datasetdir / output_file_name
148
150
  if not Path(output_file_path).exists():
149
151
  _download(
150
152
  url="https://figshare.com/ndownloader/files/38648585",
@@ -173,7 +175,7 @@ def frangieh_2021() -> AnnData: # pragma: no cover
173
175
  :class:`~anndata.AnnData` object of the Perturb-CITE-seq data.
174
176
  """
175
177
  output_file_name = "frangieh_2021.h5ad"
176
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
178
+ output_file_path = settings.datasetdir / output_file_name
177
179
  if not Path(output_file_path).exists():
178
180
  _download(
179
181
  url="https://figshare.com/ndownloader/files/34013717",
@@ -202,7 +204,7 @@ def frangieh_2021_raw() -> AnnData: # pragma: no cover
202
204
  :class:`~anndata.AnnData` object of raw Perturb-CITE-seq data.
203
205
  """
204
206
  output_file_name = "frangieh_2021_raw.h5ad"
205
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
207
+ output_file_path = settings.datasetdir / output_file_name
206
208
  if not Path(output_file_path).exists():
207
209
  _download(
208
210
  url="https://figshare.com/ndownloader/files/34012565",
@@ -231,7 +233,7 @@ def dixit_2016_raw() -> AnnData: # pragma: no cover
231
233
  :class:`~anndata.AnnData` object of raw Perturb-seq data.
232
234
  """
233
235
  output_file_name = "dixit_2016_raw.h5ad"
234
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
236
+ output_file_path = settings.datasetdir / output_file_name
235
237
  if not Path(output_file_path).exists():
236
238
  _download(
237
239
  url="https://figshare.com/ndownloader/files/34012565",
@@ -260,7 +262,7 @@ def dixit_2016() -> AnnData: # pragma: no cover
260
262
  :class:`~anndata.AnnData` object of Perturb-seq data
261
263
  """
262
264
  output_file_name = "dixit_2016.h5ad"
263
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
265
+ output_file_path = settings.datasetdir / output_file_name
264
266
  if not Path(output_file_path).exists():
265
267
  _download(
266
268
  url="https://figshare.com/ndownloader/files/34014608",
@@ -288,7 +290,7 @@ def norman_2019() -> AnnData: # pragma: no cover
288
290
  :class:`~anndata.AnnData` object of single-cell pooled CRISPR screening.
289
291
  """
290
292
  output_file_name = "norman_2019.h5ad"
291
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
293
+ output_file_path = settings.datasetdir / output_file_name
292
294
  if not Path(output_file_path).exists():
293
295
  _download(
294
296
  url="https://figshare.com/ndownloader/files/34027562",
@@ -316,7 +318,7 @@ def norman_2019_raw() -> AnnData: # pragma: no cover
316
318
  :class:`~anndata.AnnData` object of raw single-cell pooled CRISPR screening
317
319
  """
318
320
  output_file_name = "norman_2019_raw.h5ad"
319
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
321
+ output_file_path = settings.datasetdir / output_file_name
320
322
  if not Path(output_file_path).exists():
321
323
  _download(
322
324
  url="https://figshare.com/ndownloader/files/34002548",
@@ -339,10 +341,10 @@ def dialogue_example() -> AnnData: # pragma: no cover
339
341
  :class:`~anndata.AnnData` object
340
342
  """
341
343
  output_file_name = "dialogue_example.h5ad"
342
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
344
+ output_file_path = settings.datasetdir / output_file_name
343
345
  if not Path(output_file_path).exists():
344
346
  _download(
345
- url="https://figshare.com/ndownloader/files/34490714",
347
+ url="https://figshare.com/ndownloader/files/43462662",
346
348
  output_file_name=output_file_name,
347
349
  output_path=settings.datasetdir,
348
350
  is_zip=False,
@@ -361,7 +363,7 @@ def distance_example() -> AnnData: # pragma: no cover
361
363
  :class:`~anndata.AnnData` object
362
364
  """
363
365
  output_file_name = "distances_example_data.h5ad"
364
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
366
+ output_file_path = settings.datasetdir / output_file_name
365
367
  if not Path(output_file_path).exists():
366
368
  _download(
367
369
  url="https://figshare.com/ndownloader/files/39561379",
@@ -392,7 +394,7 @@ def kang_2018() -> AnnData: # pragma: no cover
392
394
  :class:`~anndata.AnnData` object of droplet-based single cell RNA-sequencing
393
395
  """
394
396
  output_file_name = "kang_2018.h5ad"
395
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
397
+ output_file_path = settings.datasetdir / output_file_name
396
398
  if not Path(output_file_path).exists():
397
399
  _download(
398
400
  url="https://figshare.com/ndownloader/files/34464122",
@@ -421,7 +423,7 @@ def stephenson_2021_subsampled() -> AnnData: # pragma: no cover
421
423
  :class:`~anndata.AnnData` object of scRNA-seq profiles
422
424
  """
423
425
  output_file_name = "stephenson_2021_subsampled.h5ad"
424
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
426
+ output_file_path = settings.datasetdir / output_file_name
425
427
  if not Path(output_file_path).exists():
426
428
  _download(
427
429
  url="https://figshare.com/ndownloader/files/38171703",
@@ -448,7 +450,7 @@ def haber_2017_regions() -> AnnData: # pragma: no cover
448
450
  :class:`~anndata.AnnData` object
449
451
  """
450
452
  output_file_name = "haber_2017_regions.h5ad"
451
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
453
+ output_file_path = settings.datasetdir / output_file_name
452
454
  if not Path(output_file_path).exists():
453
455
  _download(
454
456
  url="https://figshare.com/ndownloader/files/38169900",
@@ -475,10 +477,10 @@ def adamson_2016_pilot() -> AnnData: # pragma: no cover
475
477
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
476
478
  """
477
479
  output_file_name = "adamson_2016_pilot.h5ad"
478
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
480
+ output_file_path = settings.datasetdir / output_file_name
479
481
  if not Path(output_file_path).exists():
480
482
  _download(
481
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406675_10X001.h5ad?download=1",
483
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406675_10X001.h5ad?download=1",
482
484
  output_file_name=output_file_name,
483
485
  output_path=settings.datasetdir,
484
486
  is_zip=False,
@@ -506,10 +508,10 @@ def adamson_2016_upr_epistasis() -> AnnData: # pragma: no cover
506
508
  :class:`~anndata.AnnData` object of scPerturb preparedsingle-cell perturbation data
507
509
  """
508
510
  output_file_name = "adamson_2016_upr_epistasis.h5ad"
509
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
511
+ output_file_path = settings.datasetdir / output_file_name
510
512
  if not Path(output_file_path).exists():
511
513
  _download(
512
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406677_10X005.h5ad?download=1",
514
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406677_10X005.h5ad?download=1",
513
515
  output_file_name=output_file_name,
514
516
  output_path=settings.datasetdir,
515
517
  is_zip=False,
@@ -535,10 +537,10 @@ def adamson_2016_upr_perturb_seq() -> AnnData: # pragma: no cover
535
537
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
536
538
  """
537
539
  output_file_name = "adamson_2016_upr_perturb_seq.h5ad"
538
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
540
+ output_file_path = settings.datasetdir / output_file_name
539
541
  if not Path(output_file_path).exists():
540
542
  _download(
541
- url="https://zenodo.org/record/7278143/files/AdamsonWeissman2016_GSM2406681_10X010.h5ad?download=1",
543
+ url="https://zenodo.org/record/10044268/files/AdamsonWeissman2016_GSM2406681_10X010.h5ad?download=1",
542
544
  output_file_name=output_file_name,
543
545
  output_path=settings.datasetdir,
544
546
  is_zip=False,
@@ -562,10 +564,10 @@ def aissa_2021() -> AnnData: # pragma: no cover
562
564
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
563
565
  """
564
566
  output_file_name = "aissa_2021.h5ad"
565
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
567
+ output_file_path = settings.datasetdir / output_file_name
566
568
  if not Path(output_file_path).exists():
567
569
  _download(
568
- url="https://zenodo.org/record/7278143/files/AissaBenevolenskaya2021.h5ad?download=1",
570
+ url="https://zenodo.org/record/10044268/files/AissaBenevolenskaya2021.h5ad?download=1",
569
571
  output_file_name=output_file_name,
570
572
  output_path=settings.datasetdir,
571
573
  is_zip=False,
@@ -591,10 +593,10 @@ def chang_2021() -> AnnData: # pragma: no cover
591
593
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
592
594
  """
593
595
  output_file_name = "chang_2021.h5ad"
594
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
596
+ output_file_path = settings.datasetdir / output_file_name
595
597
  if not Path(output_file_path).exists():
596
598
  _download(
597
- url="https://zenodo.org/record/7278143/files/ChangYe2021.h5ad?download=1",
599
+ url="https://zenodo.org/record/10044268/files/ChangYe2021.h5ad?download=1",
598
600
  output_file_name=output_file_name,
599
601
  output_path=settings.datasetdir,
600
602
  is_zip=False,
@@ -622,10 +624,10 @@ def datlinger_2017() -> AnnData: # pragma: no cover
622
624
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
623
625
  """
624
626
  output_file_name = "datlinger_2017.h5ad"
625
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
627
+ output_file_path = settings.datasetdir / output_file_name
626
628
  if not Path(output_file_path).exists():
627
629
  _download(
628
- url="https://zenodo.org/record/7278143/files/DatlingerBock2017.h5ad?download=1",
630
+ url="https://zenodo.org/record/10044268/files/DatlingerBock2017.h5ad?download=1",
629
631
  output_file_name=output_file_name,
630
632
  output_path=settings.datasetdir,
631
633
  is_zip=False,
@@ -652,10 +654,10 @@ def datlinger_2021() -> AnnData: # pragma: no cover
652
654
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
653
655
  """
654
656
  output_file_name = "datlinger_2021.h5ad"
655
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
657
+ output_file_path = settings.datasetdir / output_file_name
656
658
  if not Path(output_file_path).exists():
657
659
  _download(
658
- url="https://zenodo.org/record/7278143/files/DatlingerBock2021.h5ad?download=1",
660
+ url="https://zenodo.org/record/10044268/files/DatlingerBock2021.h5ad?download=1",
659
661
  output_file_name=output_file_name,
660
662
  output_path=settings.datasetdir,
661
663
  is_zip=False,
@@ -683,10 +685,10 @@ def frangieh_2021_protein() -> AnnData: # pragma: no cover
683
685
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
684
686
  """
685
687
  output_file_name = "frangieh_2021_protein.h5ad"
686
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
688
+ output_file_path = settings.datasetdir / output_file_name
687
689
  if not Path(output_file_path).exists():
688
690
  _download(
689
- url="https://zenodo.org/record/7278143/files/FrangiehIzar2021_protein.h5ad?download=1",
691
+ url="https://zenodo.org/record/10044268/files/FrangiehIzar2021_protein.h5ad?download=1",
690
692
  output_file_name=output_file_name,
691
693
  output_path=settings.datasetdir,
692
694
  is_zip=False,
@@ -714,10 +716,10 @@ def frangieh_2021_rna() -> AnnData: # pragma: no cover
714
716
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
715
717
  """
716
718
  output_file_name = "frangieh_2021_rna.h5ad"
717
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
719
+ output_file_path = settings.datasetdir / output_file_name
718
720
  if not Path(output_file_path).exists():
719
721
  _download(
720
- url="https://zenodo.org/record/7278143/files/FrangiehIzar2021_RNA.h5ad?download=1",
722
+ url="https://zenodo.org/record/10044268/files/FrangiehIzar2021_RNA.h5ad?download=1",
721
723
  output_file_name=output_file_name,
722
724
  output_path=settings.datasetdir,
723
725
  is_zip=False,
@@ -742,10 +744,10 @@ def gasperini_2019_atscale() -> AnnData: # pragma: no cover
742
744
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
743
745
  """
744
746
  output_file_name = "gasperini_2019_atscale.h5ad"
745
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
747
+ output_file_path = settings.datasetdir / output_file_name
746
748
  if not Path(output_file_path).exists():
747
749
  _download(
748
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_atscale.h5ad?download=1",
750
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_atscale.h5ad?download=1",
749
751
  output_file_name=output_file_name,
750
752
  output_path=settings.datasetdir,
751
753
  is_zip=False,
@@ -771,10 +773,10 @@ def gasperini_2019_highmoi() -> AnnData: # pragma: no cover
771
773
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
772
774
  """
773
775
  output_file_name = "gasperini_2019_highmoi.h5ad"
774
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
776
+ output_file_path = settings.datasetdir / output_file_name
775
777
  if not Path(output_file_path).exists():
776
778
  _download(
777
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_highMOI.h5ad?download=1",
779
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_highMOI.h5ad?download=1",
778
780
  output_file_name=output_file_name,
779
781
  output_path=settings.datasetdir,
780
782
  is_zip=False,
@@ -800,10 +802,10 @@ def gasperini_2019_lowmoi() -> AnnData: # pragma: no cover
800
802
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
801
803
  """
802
804
  output_file_name = "gasperini_2019_lowmoi.h5ad"
803
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
805
+ output_file_path = settings.datasetdir / output_file_name
804
806
  if not Path(output_file_path).exists():
805
807
  _download(
806
- url="https://zenodo.org/record/7278143/files/GasperiniShendure2019_lowMOI.h5ad?download=1",
808
+ url="https://zenodo.org/record/10044268/files/GasperiniShendure2019_lowMOI.h5ad?download=1",
807
809
  output_file_name=output_file_name,
808
810
  output_path=settings.datasetdir,
809
811
  is_zip=False,
@@ -828,10 +830,10 @@ def gehring_2019() -> AnnData: # pragma: no cover
828
830
  :class:`~anndata.AnnData` object of a scPerturb prepared single-cell dataset
829
831
  """
830
832
  output_file_name = "gehring_2019.h5ad"
831
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
833
+ output_file_path = settings.datasetdir / output_file_name
832
834
  if not Path(output_file_path).exists():
833
835
  _download(
834
- url="https://zenodo.org/record/7278143/files/GehringPachter2019.h5ad?download=1",
836
+ url="https://zenodo.org/record/10044268/files/GehringPachter2019.h5ad?download=1",
835
837
  output_file_name=output_file_name,
836
838
  output_path=settings.datasetdir,
837
839
  is_zip=False,
@@ -857,10 +859,10 @@ def mcfarland_2020() -> AnnData: # pragma: no cover
857
859
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
858
860
  """
859
861
  output_file_name = "mcfarland_2020.h5ad"
860
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
862
+ output_file_path = settings.datasetdir / output_file_name
861
863
  if not Path(output_file_path).exists():
862
864
  _download(
863
- url="https://zenodo.org/record/7278143/files/McFarlandTsherniak2020.h5ad?download=1",
865
+ url="https://zenodo.org/record/10044268/files/McFarlandTsherniak2020.h5ad?download=1",
864
866
  output_file_name=output_file_name,
865
867
  output_path=settings.datasetdir,
866
868
  is_zip=False,
@@ -886,10 +888,10 @@ def replogle_2022_k562_essential() -> AnnData: # pragma: no cover
886
888
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
887
889
  """
888
890
  output_file_name = "replogle_2022_k562_essential.h5ad"
889
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
891
+ output_file_path = settings.datasetdir / output_file_name
890
892
  if not Path(output_file_path).exists():
891
893
  _download(
892
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_K562_essential.h5ad?download=1",
894
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_K562_essential.h5ad?download=1",
893
895
  output_file_name=output_file_name,
894
896
  output_path=settings.datasetdir,
895
897
  is_zip=False,
@@ -900,7 +902,7 @@ def replogle_2022_k562_essential() -> AnnData: # pragma: no cover
900
902
 
901
903
 
902
904
  def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
903
- """K562 cells transduced with CRISPRi (day 8 after transcduction).
905
+ """K562 cells transduced with CRISPRi (day 8 after transduction).
904
906
 
905
907
  Here, the authors used a compact, multiplexed CRISPR interference (CRISPRi) library
906
908
  to assay thousands of loss-of-function genetic perturbations with single-cell RNA sequencing
@@ -917,10 +919,10 @@ def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
917
919
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
918
920
  """
919
921
  output_file_name = "replogle_2022_k562_gwps.h5ad"
920
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
922
+ output_file_path = settings.datasetdir / output_file_name
921
923
  if not Path(output_file_path).exists():
922
924
  _download(
923
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_K562_gwps.h5ad?download=1",
925
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_K562_gwps.h5ad?download=1",
924
926
  output_file_name=output_file_name,
925
927
  output_path=settings.datasetdir,
926
928
  is_zip=False,
@@ -931,7 +933,7 @@ def replogle_2022_k562_gwps() -> AnnData: # pragma: no cover
931
933
 
932
934
 
933
935
  def replogle_2022_rpe1() -> AnnData: # pragma: no cover
934
- """RPE1 cells transduced with CRISPRi (day 7 after transcduction).
936
+ """RPE1 cells transduced with CRISPRi (day 7 after transduction).
935
937
 
936
938
  For day 7 essential-scale Perturb-seq experiment in retinal pigment epithelial (RPE1)
937
939
  cell lines, library lentivirus was packaged into lentivirus in 293T cells and
@@ -947,10 +949,10 @@ def replogle_2022_rpe1() -> AnnData: # pragma: no cover
947
949
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
948
950
  """
949
951
  output_file_name = "replogle_2022_rpe1.h5ad"
950
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
952
+ output_file_path = settings.datasetdir / output_file_name
951
953
  if not Path(output_file_path).exists():
952
954
  _download(
953
- url="https://zenodo.org/record/7278143/files/ReplogleWeissman2022_rpe1.h5ad?download=1",
955
+ url="https://zenodo.org/record/10044268/files/ReplogleWeissman2022_rpe1.h5ad?download=1",
954
956
  output_file_name=output_file_name,
955
957
  output_path=settings.datasetdir,
956
958
  is_zip=False,
@@ -978,10 +980,10 @@ def schiebinger_2019_16day() -> AnnData: # pragma: no cover
978
980
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
979
981
  """
980
982
  output_file_name = "schiebinger_2019_16day.h5ad"
981
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
983
+ output_file_path = settings.datasetdir / output_file_name
982
984
  if not Path(output_file_path).exists():
983
985
  _download(
984
- url="https://zenodo.org/record/7278143/files/SchiebingerLander2019_GSE106340.h5ad?download=1",
986
+ url="https://zenodo.org/record/10044268/files/SchiebingerLander2019_GSE106340.h5ad?download=1",
985
987
  output_file_name=output_file_name,
986
988
  output_path=settings.datasetdir,
987
989
  is_zip=False,
@@ -1007,10 +1009,10 @@ def schiebinger_2019_18day() -> AnnData: # pragma: no cover
1007
1009
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1008
1010
  """
1009
1011
  output_file_name = "Schiebinger_2019_18day.h5ad"
1010
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1012
+ output_file_path = settings.datasetdir / output_file_name
1011
1013
  if not Path(output_file_path).exists():
1012
1014
  _download(
1013
- url="https://zenodo.org/record/7278143/files/SchiebingerLander2019_GSE115943.h5ad?download=1",
1015
+ url="https://zenodo.org/record/10044268/files/SchiebingerLander2019_GSE115943.h5ad?download=1",
1014
1016
  output_file_name=output_file_name,
1015
1017
  output_path=settings.datasetdir,
1016
1018
  is_zip=False,
@@ -1036,10 +1038,10 @@ def schraivogel_2020_tap_screen_chr11() -> AnnData: # pragma: no cover
1036
1038
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1037
1039
  """
1038
1040
  output_file_name = "schraivogel_2020_tap_screen_chr11.h5ad"
1039
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1041
+ output_file_path = settings.datasetdir / output_file_name
1040
1042
  if not Path(output_file_path).exists():
1041
1043
  _download(
1042
- url="https://zenodo.org/record/7278143/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_11_screen.h5ad?download=1",
1044
+ url="https://zenodo.org/record/10044268/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_11_screen.h5ad?download=1",
1043
1045
  output_file_name=output_file_name,
1044
1046
  output_path=settings.datasetdir,
1045
1047
  is_zip=False,
@@ -1065,10 +1067,10 @@ def schraivogel_2020_tap_screen_chr8() -> AnnData: # pragma: no cover
1065
1067
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1066
1068
  """
1067
1069
  output_file_name = "schraivogel_2020_tap_screen_chr8.h5ad"
1068
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1070
+ output_file_path = settings.datasetdir / output_file_name
1069
1071
  if not Path(output_file_path).exists():
1070
1072
  _download(
1071
- url="https://zenodo.org/record/7278143/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_8_screen.h5ad?download=1",
1073
+ url="https://zenodo.org/record/10044268/files/SchraivogelSteinmetz2020_TAP_SCREEN__chromosome_8_screen.h5ad?download=1",
1072
1074
  output_file_name=output_file_name,
1073
1075
  output_path=settings.datasetdir,
1074
1076
  is_zip=False,
@@ -1095,10 +1097,10 @@ def shifrut_2018() -> AnnData: # pragma: no cover
1095
1097
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1096
1098
  """
1097
1099
  output_file_name = "shifrut_2018.h5ad"
1098
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1100
+ output_file_path = settings.datasetdir / output_file_name
1099
1101
  if not Path(output_file_path).exists():
1100
1102
  _download(
1101
- url="https://zenodo.org/record/7278143/files/ShifrutMarson2018.h5ad?download=1",
1103
+ url="https://zenodo.org/record/10044268/files/ShifrutMarson2018.h5ad?download=1",
1102
1104
  output_file_name=output_file_name,
1103
1105
  output_path=settings.datasetdir,
1104
1106
  is_zip=False,
@@ -1126,10 +1128,10 @@ def srivatsan_2020_sciplex2() -> AnnData: # pragma: no cover
1126
1128
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1127
1129
  """
1128
1130
  output_file_name = "srivatsan_2020_sciplex2.h5ad"
1129
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1131
+ output_file_path = settings.datasetdir / output_file_name
1130
1132
  if not Path(output_file_path).exists():
1131
1133
  _download(
1132
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex2.h5ad?download=1",
1134
+ url="https://zenodo.org/record/10044268/files/SrivatsanTrapnell2020_sciplex2.h5ad?download=1",
1133
1135
  output_file_name=output_file_name,
1134
1136
  output_path=settings.datasetdir,
1135
1137
  is_zip=False,
@@ -1155,10 +1157,10 @@ def srivatsan_2020_sciplex3() -> AnnData: # pragma: no cover
1155
1157
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1156
1158
  """
1157
1159
  output_file_name = "srivatsan_2020_sciplex3.h5ad"
1158
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1160
+ output_file_path = settings.datasetdir / output_file_name
1159
1161
  if not Path(output_file_path).exists():
1160
1162
  _download(
1161
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex3.h5ad?download=1",
1163
+ url="https://zenodo.org/records/10044268/files/SrivatsanTrapnell2020_sciplex3.h5ad?download=1",
1162
1164
  output_file_name=output_file_name,
1163
1165
  output_path=settings.datasetdir,
1164
1166
  is_zip=False,
@@ -1187,10 +1189,10 @@ def srivatsan_2020_sciplex4() -> AnnData: # pragma: no cover
1187
1189
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1188
1190
  """
1189
1191
  output_file_name = "srivatsan_2020_sciplex4.h5ad"
1190
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1192
+ output_file_path = settings.datasetdir / output_file_name
1191
1193
  if not Path(output_file_path).exists():
1192
1194
  _download(
1193
- url="https://zenodo.org/record/7278143/files/SrivatsanTrapnell2020_sciplex4.h5ad?download=1",
1195
+ url="https://zenodo.org/records/10044268/files/SrivatsanTrapnell2020_sciplex4.h5ad?download=1",
1194
1196
  output_file_name=output_file_name,
1195
1197
  output_path=settings.datasetdir,
1196
1198
  is_zip=False,
@@ -1219,10 +1221,10 @@ def tian_2019_day7neuron() -> AnnData: # pragma: no cover
1219
1221
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1220
1222
  """
1221
1223
  output_file_name = "tian_2019_day7neuron.h5ad"
1222
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1224
+ output_file_path = settings.datasetdir / output_file_name
1223
1225
  if not Path(output_file_path).exists():
1224
1226
  _download(
1225
- url="https://zenodo.org/record/7278143/files/TianKampmann2019_day7neuron.h5ad?download=1",
1227
+ url="https://zenodo.org/records/10044268/files/TianKampmann2019_day7neuron.h5ad?download=1",
1226
1228
  output_file_name=output_file_name,
1227
1229
  output_path=settings.datasetdir,
1228
1230
  is_zip=False,
@@ -1251,10 +1253,10 @@ def tian_2019_ipsc() -> AnnData: # pragma: no cover
1251
1253
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1252
1254
  """
1253
1255
  output_file_name = "tian_2019_iPSC.h5ad"
1254
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1256
+ output_file_path = settings.datasetdir / output_file_name
1255
1257
  if not Path(output_file_path).exists():
1256
1258
  _download(
1257
- url="https://zenodo.org/record/7278143/files/TianKampmann2019_iPSC.h5ad?download=1",
1259
+ url="https://zenodo.org/records/10044268/files/TianKampmann2019_iPSC.h5ad?download=1",
1258
1260
  output_file_name=output_file_name,
1259
1261
  output_path=settings.datasetdir,
1260
1262
  is_zip=False,
@@ -1281,10 +1283,10 @@ def tian_2021_crispra() -> AnnData: # pragma: no cover
1281
1283
  :class:`~anndata.AnnData` object of scPerturb single-cell perturbation data
1282
1284
  """
1283
1285
  output_file_name = "tian_2021_crispra.h5ad"
1284
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1286
+ output_file_path = settings.datasetdir / output_file_name
1285
1287
  if not Path(output_file_path).exists():
1286
1288
  _download(
1287
- url="https://zenodo.org/record/7278143/files/TianKampmann2021_CRISPRa.h5ad?download=1",
1289
+ url="https://zenodo.org/records/10044268/files/TianKampmann2021_CRISPRa.h5ad?download=1",
1288
1290
  output_file_name=output_file_name,
1289
1291
  output_path=settings.datasetdir,
1290
1292
  is_zip=False,
@@ -1311,10 +1313,10 @@ def tian_2021_crispri() -> AnnData: # pragma: no cover
1311
1313
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1312
1314
  """
1313
1315
  output_file_name = "tian_2021_crispri.h5ad"
1314
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1316
+ output_file_path = settings.datasetdir / output_file_name
1315
1317
  if not Path(output_file_path).exists():
1316
1318
  _download(
1317
- url="https://zenodo.org/record/7278143/files/TianKampmann2021_CRISPRi.h5ad?download=1",
1319
+ url="https://zenodo.org/records/10044268/files/TianKampmann2021_CRISPRi.h5ad?download=1",
1318
1320
  output_file_name=output_file_name,
1319
1321
  output_path=settings.datasetdir,
1320
1322
  is_zip=False,
@@ -1338,10 +1340,10 @@ def weinreb_2020() -> AnnData: # pragma: no cover
1338
1340
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1339
1341
  """
1340
1342
  output_file_name = "weinreb_2020.h5ad"
1341
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1343
+ output_file_path = settings.datasetdir / output_file_name
1342
1344
  if not Path(output_file_path).exists():
1343
1345
  _download(
1344
- url="https://zenodo.org/record/7278143/files/WeinrebKlein2020.h5ad?download=1",
1346
+ url="https://zenodo.org/records/10044268/files/WeinrebKlein2020.h5ad?download=1",
1345
1347
  output_file_name=output_file_name,
1346
1348
  output_path=settings.datasetdir,
1347
1349
  is_zip=False,
@@ -1366,10 +1368,10 @@ def xie_2017() -> AnnData: # pragma: no cover
1366
1368
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1367
1369
  """
1368
1370
  output_file_name = "xie_2017.h5ad"
1369
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1371
+ output_file_path = settings.datasetdir / output_file_name
1370
1372
  if not Path(output_file_path).exists():
1371
1373
  _download(
1372
- url="https://zenodo.org/record/7278143/files/XieHon2017.h5ad?download=1",
1374
+ url="https://zenodo.org/records/10044268/files/XieHon2017.h5ad?download=1",
1373
1375
  output_file_name=output_file_name,
1374
1376
  output_path=settings.datasetdir,
1375
1377
  is_zip=False,
@@ -1396,10 +1398,10 @@ def zhao_2021() -> AnnData: # pragma: no cover
1396
1398
  :class:`~anndata.AnnData` object of scPerturb prepared single-cell perturbation data
1397
1399
  """
1398
1400
  output_file_name = "zhaoSims2021.h5ad"
1399
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1401
+ output_file_path = settings.datasetdir / output_file_name
1400
1402
  if not Path(output_file_path).exists():
1401
1403
  _download(
1402
- url="https://zenodo.org/record/7278143/files/ZhaoSims2021.h5ad?download=1",
1404
+ url="https://zenodo.org/records/10044268/files/ZhaoSims2021.h5ad?download=1",
1403
1405
  output_file_name=output_file_name,
1404
1406
  output_path=settings.datasetdir,
1405
1407
  is_zip=False,
@@ -1410,15 +1412,18 @@ def zhao_2021() -> AnnData: # pragma: no cover
1410
1412
 
1411
1413
 
1412
1414
  def cinemaot_example() -> AnnData: # pragma: no cover:
1413
- """CINEMA-OT Example dataset.
1415
+ """Subsampled CINEMA-OT example dataset.
1416
+
1417
+ Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon. This is a subsampled
1418
+ dataset containing 1000 cells, either without stimulation or stimulated with IFNb. The full dataset is available
1419
+ via the cinemaot_full() function.
1414
1420
 
1415
- Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon.
1416
1421
 
1417
1422
  Returns:
1418
1423
  :class:`~anndata.AnnData` object of PBMCs stimulated with interferon.
1419
1424
  """
1420
1425
  output_file_name = "cinemaot_example.h5ad"
1421
- output_file_path = settings.datasetdir.__str__() + "/" + output_file_name
1426
+ output_file_path = settings.datasetdir / output_file_name
1422
1427
  if not Path(output_file_path).exists():
1423
1428
  _download(
1424
1429
  url="https://figshare.com/ndownloader/files/42362796?private_link=270b0d2c7f1ea57c366d",
@@ -1429,3 +1434,167 @@ def cinemaot_example() -> AnnData: # pragma: no cover:
1429
1434
  adata = sc.read_h5ad(output_file_path)
1430
1435
 
1431
1436
  return adata
1437
+
1438
+
1439
+ def dong_2023() -> AnnData: # pragma: no cover
1440
+ """Complete CINEMA-OT dataset.
1441
+
1442
+ Ex vivo stimulation of human peripheral blood mononuclear cells (PBMC) with interferon. This is the full dataset
1443
+ containing 9209 cells that were stimulated with IFNb, IFNg, IFNb+IFNg, or left unstimulated. A subsampled version
1444
+ of the dataset is available via cinemaot_example().
1445
+
1446
+ References:
1447
+ Preprint: https://doi.org/10.1101/2022.07.31.502173
1448
+ Dataset available here: https://datadryad.org/stash/dataset/doi:10.5061/dryad.4xgxd25g1
1449
+
1450
+ Returns:
1451
+ :class:`~anndata.AnnData` object of PBMCs stimulated with interferon.
1452
+ """
1453
+ output_file_name = "dong_2023.h5ad"
1454
+ output_file_path = settings.datasetdir / output_file_name
1455
+ if not Path(output_file_path).exists():
1456
+ _download(
1457
+ url="https://figshare.com/ndownloader/files/43068190",
1458
+ output_file_name=output_file_name,
1459
+ output_path=settings.datasetdir,
1460
+ is_zip=False,
1461
+ )
1462
+ adata = sc.read_h5ad(output_file_path)
1463
+
1464
+ return adata
1465
+
1466
+
1467
+ def smillie_2019() -> AnnData: # pragma: no cover
1468
+ """scRNA-seq data of the small intestine of mice under Ulcerative Colitis.
1469
+
1470
+ The resulting AnnData when preparing this dataset for processing with tascCODA is available via tasccoda_example().
1471
+
1472
+ References:
1473
+ Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.”
1474
+ Cell vol. 178,3 (2019): 714-730.e22. doi:10.1016/j.cell.2019.06.029
1475
+
1476
+ Returns:
1477
+ :class:`~anndata.AnnData` object of the dataset.
1478
+ """
1479
+ output_file_name = "smillie_2019.h5ad.zip"
1480
+ output_file_path = settings.datasetdir / Path(output_file_name).with_suffix("")
1481
+ if not Path(output_file_path).exists():
1482
+ _download(
1483
+ url="https://figshare.com/ndownloader/files/43317285",
1484
+ output_file_name=output_file_name,
1485
+ output_path=settings.datasetdir,
1486
+ is_zip=True,
1487
+ )
1488
+ adata = sc.read_h5ad(output_file_path)
1489
+
1490
+ return adata
1491
+
1492
+
1493
+ def combosciplex() -> AnnData: # pragma: no cover
1494
+ """scRNA-seq subset of the combinatorial experiment of sciplex3.
1495
+
1496
+ References:
1497
+ Srivatsan SR et al., Trapnell C. Massively multiplex chemical transcriptomics at
1498
+ single-cell resolution. Science. 2020 Jan 3;367(6473):45-51.
1499
+ doi: 10.1126/science.aax6234. Epub 2019 Dec 5. PMID: 31806696; PMCID: PMC7289078.
1500
+
1501
+ Returns:
1502
+ :class:`~anndata.AnnData` object of the dataset.
1503
+ """
1504
+ output_file_name = "combosciplex.h5ad"
1505
+ output_file_path = settings.datasetdir / output_file_name
1506
+ if not Path(output_file_path).exists():
1507
+ _download(
1508
+ url="https://figshare.com/ndownloader/files/44229635",
1509
+ output_file_name=output_file_name,
1510
+ output_path=settings.datasetdir,
1511
+ is_zip=False,
1512
+ )
1513
+ adata = sc.read_h5ad(output_file_path)
1514
+
1515
+ return adata
1516
+
1517
+
1518
+ def sciplex_gxe1() -> AnnData: # pragma: no cover
1519
+ """sci-Plex-GxE combined chemical and genetic profiling of A172 dCas9-KRAB cells
1520
+ genetically perturbed for HPRT1 or mismtach repair genes exposed to 6-thioguanine and temozolomide,
1521
+ respectively, and A172 dCas9-SunTag cells genetically perturbed for HPRT1 exposed to 6-thioguanine.
1522
+
1523
+ References:
1524
+ McFaline-Figueroa JL et al., Trapnell C. Multiplex single-cell chemical genomics reveals
1525
+ the kinase dependence of the response to targeted therapy. Cell Genomics. 2024 Volume 4, Issue 2.
1526
+ doi: 10.1016/j.xgen.2023.100487
1527
+
1528
+ Returns:
1529
+ :class:`~anndata.AnnData` object of the dataset.
1530
+ """
1531
+ output_file_name = "sciPlexGxE_1_GSM7056148.h5ad"
1532
+ output_file_path = settings.datasetdir / output_file_name
1533
+ if not Path(output_file_path).exists():
1534
+ _download(
1535
+ url="https://figshare.com/ndownloader/files/45372454",
1536
+ output_file_name=output_file_name,
1537
+ output_path=settings.datasetdir,
1538
+ is_zip=False,
1539
+ )
1540
+ adata = sc.read_h5ad(output_file_path)
1541
+
1542
+ return adata
1543
+
1544
+
1545
+ def zhang_2021() -> AnnData: # pragma: no cover
1546
+ """Single-cell RNA-seq of TNBC patients' immune cells exposed to paclitaxel alone or combined with the anti-PD-L1 atezolizumab.
1547
+
1548
+ This analysis, involving 22 patients, identifies immune subtypes predictive of therapeutic
1549
+ responses and underscores potential limitations of combining paclitaxel with atezolizumab in treatment protocols.
1550
+
1551
+ The script that generated this specific AnnData object:
1552
+ https://github.com/tessadgreen/ThesisCode/blob/main/Chapter3/drug_response/import_zhang_data.ipynb
1553
+
1554
+ This dataset does not contain the single-cell ATAC-seq data that was also measured for the paper.
1555
+
1556
+ References:
1557
+ Zhang Y et al., Liu Z. Single-cell analyses reveal key immune cell subsets associated with response to PD-L1 blockade in triple-negative breast cancer.
1558
+ Cancer Cell. 2021 Volume 39, Issue 12. doi: https://doi.org/10.1016/j.ccell.2021.09.010
1559
+
1560
+ Returns:
1561
+ :class:`~anndata.AnnData` object of the dataset.
1562
+ """
1563
+ output_file_name = "zhang_2021.h5ad"
1564
+ output_file_path = settings.datasetdir / output_file_name
1565
+ if not Path(output_file_path).exists():
1566
+ _download(
1567
+ url="https://figshare.com/ndownloader/files/46457872",
1568
+ output_file_name=output_file_name,
1569
+ output_path=settings.datasetdir,
1570
+ is_zip=False,
1571
+ )
1572
+ adata = sc.read_h5ad(output_file_path)
1573
+
1574
+ return adata
1575
+
1576
+
1577
+ def hagai_2018() -> AnnData: # pragma: no cover
1578
+ """Cross-species analysis of primary dermal fibroblasts and bone marrow-derived phagocytes, stimulated with dsRNA and IFNB.
1579
+
1580
+ The study explores immune response variations across humans, macaques, mice, and rats.
1581
+
1582
+ Referenences:
1583
+ Hagai, T., Chen, X., Miragaia, R.J. et al. Gene expression variability across cells and species shapes innate immunity.
1584
+ Nature 563, 197–202 (2018). https://doi.org/10.1038/s41586-018-0657-2
1585
+
1586
+ Returns:
1587
+ :class:`~anndata.AnnData` object of the dataset.
1588
+ """
1589
+ output_file_name = "hagai_2018.h5ad"
1590
+ output_file_path = settings.datasetdir / output_file_name
1591
+ if not Path(output_file_path).exists():
1592
+ _download(
1593
+ url="https://figshare.com/ndownloader/files/46978846",
1594
+ output_file_name=output_file_name,
1595
+ output_path=settings.datasetdir,
1596
+ is_zip=False,
1597
+ )
1598
+ adata = sc.read_h5ad(output_file_path)
1599
+
1600
+ return adata