pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +6 -7
  15. pertpy/tools/__init__.py +67 -6
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +77 -54
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
  47. pertpy-0.9.1.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -3,11 +3,11 @@ from __future__ import annotations
3
3
  from collections import namedtuple
4
4
  from typing import TYPE_CHECKING, Literal
5
5
 
6
+ from lamin_utils import logger
7
+
6
8
  if TYPE_CHECKING:
7
9
  from collections.abc import Sequence
8
10
 
9
- from rich import print
10
-
11
11
  if TYPE_CHECKING:
12
12
  import pandas as pd
13
13
 
@@ -24,10 +24,9 @@ class LookUp:
24
24
  ):
25
25
  """
26
26
  Args:
27
- type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug. Defaults to cell_line.
27
+ type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug.
28
28
  transfer_metadata: DataFrames used to generate Lookup object.
29
29
  This is currently set to None for CompoundMetaData which does not require any dataframes for transfer.
30
- Defaults to 'cell_line'.
31
30
  """
32
31
  self.type = type
33
32
  if type == "cell_line":
@@ -285,12 +284,11 @@ class LookUp:
285
284
  """A brief summary of cell line metadata.
286
285
 
287
286
  Args:
288
- cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene. Defaults to "DepMap".
287
+ cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene.
289
288
  reference_id: The type of cell line identifier in the meta data, e.g. ModelID, CellLineName or StrippedCellLineName.
290
- If fetch cell line metadata from Cancerrxgene, it is recommended to choose
291
- "stripped_cell_line_name". Defaults to "ModelID".
289
+ If fetch cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
292
290
  query_id_list: Unique cell line identifiers to test the number of matched ids present in the
293
- metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
291
+ metadata. If set to None, the query of metadata identifiers will be disabled.
294
292
  """
295
293
  if self.type != "cell_line":
296
294
  raise ValueError("This is not a LookUp object specifically for CellLineMetaData!")
@@ -313,8 +311,8 @@ class LookUp:
313
311
  )
314
312
  not_matched_identifiers = list(set(query_id_list) - set(self.cl_cancer_project_meta[reference_id]))
315
313
 
316
- print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
317
- print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
314
+ logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
315
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
318
316
 
319
317
  def available_bulk_rna(
320
318
  self,
@@ -324,9 +322,9 @@ class LookUp:
324
322
  """A brief summary of bulk RNA expression data.
325
323
 
326
324
  Args:
327
- cell_line_source: the source of RNA-seq data, broad or sanger. Defaults to "sanger".
325
+ cell_line_source: the source of RNA-seq data, broad or sanger.
328
326
  query_id_list: Unique cell line identifiers to test the number of matched ids present in the
329
- metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
327
+ metadata. If set to None, the query of metadata identifiers will be disabled.
330
328
  """
331
329
  if self.type != "cell_line":
332
330
  raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -340,8 +338,8 @@ class LookUp:
340
338
  identifier_num_all = len(query_id_list)
341
339
  not_matched_identifiers = list(set(query_id_list) - set(bulk_rna.index))
342
340
 
343
- print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
344
- print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
341
+ logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
342
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
345
343
 
346
344
  def available_protein_expression(
347
345
  self,
@@ -352,9 +350,8 @@ class LookUp:
352
350
 
353
351
  Args:
354
352
  reference_id: The type of cell line identifier in the meta data, model_name or model_id.
355
- Defaults to "model_name".
356
353
  query_id_list: Unique cell line identifiers to test the number of matched ids present in the
357
- metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
354
+ metadata. If set to None, the query of metadata identifiers will be disabled.
358
355
  """
359
356
  if self.type != "cell_line":
360
357
  raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -367,8 +364,8 @@ class LookUp:
367
364
  f"The specified `reference_id` {reference_id} is not available in the proteomics data. "
368
365
  )
369
366
  not_matched_identifiers = list(set(query_id_list) - set(self.proteomics_data[reference_id]))
370
- print(f"[bold blue]{len(not_matched_identifiers)} cell lines are not found in the metadata.")
371
- print(f"[bold yellow]{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
367
+ logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
368
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
372
369
 
373
370
  def available_drug_response(
374
371
  self,
@@ -381,20 +378,16 @@ class LookUp:
381
378
  """A brief summary of drug response data.
382
379
 
383
380
  Args:
384
- gdsc_dataset: The GDSC dataset, 1 or 2. Defaults to 1.
381
+ gdsc_dataset: The GDSC dataset, 1 or 2.
385
382
  The GDSC1 dataset updates previous releases with additional drug screening data from the Wellcome Sanger Institute and Massachusetts General Hospital.
386
383
  It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
387
384
  GDSC2 is new and has 243,466 IC50 results from the latest screening at the Wellcome Sanger Institute using improved experimental procedures.
388
385
  reference_id: The type of cell line identifier in the meta data, cell_line_name, sanger_model_id or cosmic_id.
389
- Defaults to 'cell_line_name'.
390
386
  query_id_list: Unique cell line identifiers to test the number of matched ids present in the metadata.
391
387
  If set to None, the query of metadata identifiers will be disabled.
392
- Defaults to None.
393
388
  reference_perturbation: The perturbation information in the meta data, drug_name or drug_id.
394
- Defaults to 'drug_name'.
395
389
  query_perturbation_list: Unique perturbation types to test the number of matched ones present in the metadata.
396
390
  If set to None, the query of perturbation types will be disabled.
397
- Defaults to None.
398
391
  """
399
392
  if self.type != "cell_line":
400
393
  raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -410,8 +403,8 @@ class LookUp:
410
403
  )
411
404
  identifier_num_all = len(query_id_list)
412
405
  not_matched_identifiers = list(set(query_id_list) - set(gdsc_data[reference_id]))
413
- print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
414
- print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
406
+ logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
407
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
415
408
 
416
409
  if query_perturbation_list is not None:
417
410
  if reference_perturbation not in gdsc_data.columns:
@@ -420,8 +413,8 @@ class LookUp:
420
413
  )
421
414
  identifier_num_all = len(query_perturbation_list)
422
415
  not_matched_identifiers = list(set(query_perturbation_list) - set(gdsc_data[reference_perturbation]))
423
- print(f"{len(not_matched_identifiers)} perturbation types are not found in the metadata.")
424
- print(f"{identifier_num_all - len(not_matched_identifiers)} perturbation types are found! ")
416
+ logger.info(f"{len(not_matched_identifiers)} perturbation types are not found in the metadata.")
417
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} perturbation types are found! ")
425
418
 
426
419
  def available_genes_annotation(
427
420
  self,
@@ -432,22 +425,20 @@ class LookUp:
432
425
 
433
426
  Args:
434
427
  reference_id: The type of gene identifier in the meta data, gene_id, ensembl_gene_id, hgnc_id, hgnc_symbol.
435
- Defaults to "ensembl_gene_id".
436
428
  query_id_list: Unique gene identifiers to test the number of matched ids present in the metadata.
437
- Defaults to None.
438
429
  """
439
430
  if self.type != "cell_line":
440
431
  raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
441
432
 
442
- print("To summarize: in the DepMap_Sanger gene annotation file, you can find: ")
443
- print(f"{len(self.gene_annotation.index)} driver genes")
444
- print(
433
+ logger.info("To summarize: in the DepMap_Sanger gene annotation file, you can find: ")
434
+ logger.info(f"{len(self.gene_annotation.index)} driver genes")
435
+ logger.info(
445
436
  f"{len(self.gene_annotation.columns)} meta data including: ",
446
437
  *list(self.gene_annotation.columns.values),
447
438
  sep="\n- ",
448
439
  )
449
- print("Overview of gene annotation: ")
450
- print(self.gene_annotation.head().to_string())
440
+ logger.info("Overview of gene annotation: ")
441
+ logger.info(self.gene_annotation.head().to_string())
451
442
  """
452
443
  #not implemented yet
453
444
  print("Default parameters to annotate gene annotation: ")
@@ -472,26 +463,24 @@ class LookUp:
472
463
  Args:
473
464
  query_id_list: Unique perturbagens to test the number of matched ones present in the metadata.
474
465
  If set to None, the query of metadata perturbagens will be disabled.
475
- Defaults to None.
476
466
  target_list: Unique molecular targets to test the number of matched ones present in the metadata.
477
467
  If set to None, the comparison of molecular targets in the query of metadata perturbagens will be disabled.
478
- Defaults to None.
479
468
  """
480
- if self.type != "moa":
481
- raise ValueError("This is not a LookUp object specific for MoaMetaData!")
482
469
  if query_id_list is not None:
470
+ if self.type != "moa":
471
+ raise ValueError("This is not a LookUp object specific for MoaMetaData!")
483
472
  identifier_num_all = len(query_id_list)
484
473
  not_matched_identifiers = list(set(query_id_list) - set(self.moa_meta.pert_iname))
485
- print(f"{len(not_matched_identifiers)} perturbagens are not found in the metadata.")
486
- print(f"{identifier_num_all - len(not_matched_identifiers)} perturbagens are found! ")
474
+ logger.info(f"{len(not_matched_identifiers)} perturbagens are not found in the metadata.")
475
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} perturbagens are found! ")
487
476
 
488
477
  if target_list is not None:
489
478
  targets = self.moa_meta.target.astype(str).apply(lambda x: x.split("|"))
490
479
  all_targets = [t for tl in targets for t in tl]
491
480
  identifier_num_all = len(target_list)
492
481
  not_matched_identifiers = list(set(target_list) - set(all_targets))
493
- print(f"{len(not_matched_identifiers)} molecular targets are not found in the metadata.")
494
- print(f"{identifier_num_all - len(not_matched_identifiers)} molecular targets are found! ")
482
+ logger.info(f"{len(not_matched_identifiers)} molecular targets are not found in the metadata.")
483
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} molecular targets are found! ")
495
484
 
496
485
  def available_compounds(
497
486
  self,
@@ -503,8 +492,7 @@ class LookUp:
503
492
  Args:
504
493
  query_id_list: Unique compounds to test the number of matched ones present in the metadata.
505
494
  If set to None, query of compound identifiers will be disabled.
506
- Defaults to None.
507
- query_id_type: The type of compound identifiers, name or cid. Defaults to 'name'.
495
+ query_id_type: The type of compound identifiers, name or cid.
508
496
  """
509
497
  if self.type != "compound":
510
498
  raise ValueError("This is not a LookUp object specific for CompoundData!")
@@ -523,8 +511,8 @@ class LookUp:
523
511
  except pcp.BadRequestError:
524
512
  not_matched_identifiers.append(compound)
525
513
 
526
- print(f"{len(not_matched_identifiers)} compounds are not found in the metadata.")
527
- print(f"{identifier_num_all - len(not_matched_identifiers)} compounds are found! ")
514
+ logger.info(f"{len(not_matched_identifiers)} compounds are not found in the metadata.")
515
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} compounds are found! ")
528
516
 
529
517
  def available_drug_annotation(
530
518
  self,
@@ -535,11 +523,10 @@ class LookUp:
535
523
  """A brief summary of drug annotation.
536
524
 
537
525
  Args:
538
- drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb. Defaults to "chembl".
526
+ drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb.
539
527
  query_id_list: Unique target or compound names to test the number of matched ones present in the metadata.
540
528
  If set to None, query of compound identifiers will be disabled.
541
- Defaults to None.
542
- query_id_type: The type of identifiers, target, compound and disease(pharmgkb only). Defaults to 'target'.
529
+ query_id_type: The type of identifiers, target, compound and disease(pharmgkb only).
543
530
  """
544
531
  if self.type != "drug":
545
532
  raise ValueError("This is not a LookUp object specific for DrugMetaData!")
@@ -578,5 +565,5 @@ class LookUp:
578
565
  diseases = self.pharmgkb[self.pharmgkb["Type"] == "Disease"]
579
566
  not_matched_identifiers = list(set(query_id_list) - set(diseases["Compound|Disease"]))
580
567
 
581
- print(f"{len(not_matched_identifiers)} {query_id_type}s are not found in the metadata.")
582
- print(f"{identifier_num_all - len(not_matched_identifiers)} {query_id_type}s are found! ")
568
+ logger.info(f"{len(not_matched_identifiers)} {query_id_type}s are not found in the metadata.")
569
+ logger.info(f"{identifier_num_all - len(not_matched_identifiers)} {query_id_type}s are found! ")
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Literal
4
4
 
5
+ from lamin_utils import logger
6
+
5
7
  if TYPE_CHECKING:
6
8
  from collections.abc import Sequence
7
9
 
@@ -31,12 +33,10 @@ class MetaData:
31
33
  total_identifiers: The total number of identifiers in the `adata` object.
32
34
  unmatched_identifiers: Unmatched identifiers in the `adata` object.
33
35
  query_id: The column of `.obs` with cell line information.
34
- reference_id: The type of cell line identifier in the meta data.
36
+ reference_id: The type of cell line identifier in the metadata.
35
37
  metadata_type: The type of metadata where some identifiers are not matched during annotation such as
36
38
  cell line, protein expression, bulk RNA expression, drug response, moa or compound.
37
- Defaults to 'cell line'.
38
39
  verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
39
- Defaults to 5.
40
40
  """
41
41
  if isinstance(verbosity, str):
42
42
  if verbosity != "all":
@@ -60,14 +60,11 @@ class MetaData:
60
60
  if isinstance(verbosity, int) and verbosity >= 0:
61
61
  verbosity = min(verbosity, len(unmatched_identifiers))
62
62
  if verbosity > 0:
63
- print(
64
- f"[bold blue]There are {total_identifiers} identifiers in `adata.obs`."
63
+ logger.info(
64
+ f"There are {total_identifiers} identifiers in `adata.obs`."
65
65
  f"However, {len(unmatched_identifiers)} identifiers can't be found in the {metadata_type} annotation,"
66
- "leading to the presence of NA values for their respective metadata.\n",
67
- "Please check again: ",
68
- *unmatched_identifiers[:verbosity],
69
- "...",
70
- sep="\n- ",
66
+ "leading to the presence of NA values for their respective metadata.\n"
67
+ f"Please check again: *unmatched_identifiers[:verbosity]..."
71
68
  )
72
69
  else:
73
70
  raise ValueError("Only 'all' or a non-negative value is accepted.")
pertpy/metadata/_moa.py CHANGED
@@ -5,7 +5,6 @@ from typing import TYPE_CHECKING
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from rich import print
9
8
  from scanpy import settings
10
9
 
11
10
  from pertpy.data._dataloader import _download
@@ -26,7 +25,6 @@ class Moa(MetaData):
26
25
  def _download_clue(self) -> None:
27
26
  clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
28
27
  if not Path(clue_path).exists():
29
- print("[bold yellow]No metadata file was found for clue. Starting download now.")
30
28
  _download(
31
29
  url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
32
30
  output_file_name="repurposing_drugs_20200324.txt",
@@ -51,12 +49,10 @@ class Moa(MetaData):
51
49
 
52
50
  Args:
53
51
  adata: The data object to annotate.
54
- query_id: The column of `.obs` with the name of a perturbagen. Defaults to 'perturbation'.
52
+ query_id: The column of `.obs` with the name of a perturbagen.
55
53
  target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
56
- Defaults to None.
57
54
  verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
58
- Defaults to 5.
59
- copy: Determines whether a copy of the `adata` is returned. Defaults to False.
55
+ copy: Determines whether a copy of the `adata` is returned.
60
56
 
61
57
  Returns:
62
58
  Returns an AnnData object with MoA annotation.
pertpy/plot/__init__.py CHANGED
@@ -1,5 +0,0 @@
1
- from pertpy.plot._augur import AugurpyPlot as ag
2
- from pertpy.plot._coda import CodaPlot as coda
3
- from pertpy.plot._guide_rna import GuideRnaPlot as guide
4
- from pertpy.plot._milopy import MilopyPlot as milo
5
- from pertpy.plot._mixscape import MixscapePlot as ms
@@ -1 +1,3 @@
1
1
  from ._guide_rna import GuideAssignment
2
+
3
+ __all__ = ["GuideAssignment"]
@@ -34,9 +34,8 @@ class GuideAssignment:
34
34
  assignment_threshold: The count threshold that is required for an assignment to be viable.
35
35
  layer: Key to the layer containing raw count values of the gRNAs.
36
36
  adata.X is used if layer is None. Expects count data.
37
- output_layer: Assigned guide will be saved on adata.layers[output_key]. Defaults to `assigned_guides`.
37
+ output_layer: Assigned guide will be saved on adata.layers[output_key].
38
38
  only_return_results: If True, input AnnData is not modified and the result is returned as an np.ndarray.
39
- Defaults to False.
40
39
 
41
40
  Examples:
42
41
  Each cell is assigned to gRNA that occurs at least 5 times in the respective cell.
@@ -49,7 +48,7 @@ class GuideAssignment:
49
48
  """
50
49
  counts = adata.X if layer is None else adata.layers[layer]
51
50
  if scipy.sparse.issparse(counts):
52
- counts = counts.A
51
+ counts = counts.toarray()
53
52
 
54
53
  assigned_grnas = np.where(counts >= assignment_threshold, 1, 0)
55
54
  assigned_grnas = scipy.sparse.csr_matrix(assigned_grnas)
@@ -93,7 +92,7 @@ class GuideAssignment:
93
92
  """
94
93
  counts = adata.X if layer is None else adata.layers[layer]
95
94
  if scipy.sparse.issparse(counts):
96
- counts = counts.A
95
+ counts = counts.toarray()
97
96
 
98
97
  assigned_grna = np.where(
99
98
  counts.max(axis=1).squeeze() >= assignment_threshold,
@@ -127,7 +126,7 @@ class GuideAssignment:
127
126
  adata: Annotated data matrix containing gRNA values
128
127
  layer: Key to the layer containing log normalized count values of the gRNAs.
129
128
  adata.X is used if layer is None.
130
- order_by: The order of cells in y axis. Defaults to None.
129
+ order_by: The order of cells in y axis.
131
130
  If None, cells will be reordered to have a nice sparse representation.
132
131
  If a string is provided, adata.obs[order_by] will be used as the order.
133
132
  If a numpy array is provided, the array will be used for ordering.
@@ -153,9 +152,9 @@ class GuideAssignment:
153
152
 
154
153
  if order_by is None:
155
154
  if scipy.sparse.issparse(data):
156
- max_values = data.max(axis=1).A.squeeze()
155
+ max_values = data.max(axis=1).toarray().squeeze()
157
156
  data_argmax = data.argmax(axis=1).A.squeeze()
158
- max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
157
+ max_guide_index = np.where(max_values != data.min(axis=1).toarray().squeeze(), data_argmax, -1)
159
158
  else:
160
159
  max_guide_index = np.where(
161
160
  data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1
pertpy/tools/__init__.py CHANGED
@@ -1,19 +1,80 @@
1
+ from importlib import import_module
2
+
3
+
4
+ def lazy_import(module_path, class_name, extras):
5
+ def _import():
6
+ try:
7
+ for extra in extras:
8
+ import_module(extra)
9
+ except ImportError as e:
10
+ raise ImportError(
11
+ f"Extra dependencies required: {', '.join(extras)}. "
12
+ f"Please install with: pip install {' '.join(extras)}"
13
+ ) from e
14
+ module = import_module(module_path)
15
+ return getattr(module, class_name)
16
+
17
+ return _import
18
+
19
+
1
20
  from pertpy.tools._augur import Augur
2
21
  from pertpy.tools._cinemaot import Cinemaot
3
- from pertpy.tools._coda._sccoda import Sccoda
4
- from pertpy.tools._coda._tasccoda import Tasccoda
5
22
  from pertpy.tools._dialogue import Dialogue
6
- from pertpy.tools._differential_gene_expression import DifferentialGeneExpression
7
23
  from pertpy.tools._distances._distance_tests import DistanceTest
8
24
  from pertpy.tools._distances._distances import Distance
9
25
  from pertpy.tools._enrichment import Enrichment
10
26
  from pertpy.tools._milo import Milo
11
27
  from pertpy.tools._mixscape import Mixscape
12
28
  from pertpy.tools._perturbation_space._clustering import ClusteringSpace
29
+ from pertpy.tools._perturbation_space._comparison import PerturbationComparison
13
30
  from pertpy.tools._perturbation_space._discriminator_classifiers import (
14
- DiscriminatorClassifierSpace,
15
31
  LRClassifierSpace,
16
32
  MLPClassifierSpace,
17
33
  )
18
- from pertpy.tools._perturbation_space._simple import CentroidSpace, DBSCANSpace, KMeansSpace, PseudobulkSpace
19
- from pertpy.tools._scgen import SCGEN
34
+ from pertpy.tools._perturbation_space._simple import (
35
+ CentroidSpace,
36
+ DBSCANSpace,
37
+ KMeansSpace,
38
+ PseudobulkSpace,
39
+ )
40
+ from pertpy.tools._scgen import Scgen
41
+
42
+ # from pertpy.tools._differential_gene_expression import DGEEVAL
43
+
44
+ CODA_EXTRAS = ["toytree", "arviz", "ete3"] # also pyqt5 technically
45
+ Sccoda = lazy_import("pertpy.tools._coda._sccoda", "Sccoda", CODA_EXTRAS)
46
+ Tasccoda = lazy_import("pertpy.tools._coda._tasccoda", "Tasccoda", CODA_EXTRAS)
47
+
48
+ DE_EXTRAS = ["formulaic", "pydeseq2"]
49
+ EdgeR = lazy_import("pertpy.tools._differential_gene_expression", "EdgeR", DE_EXTRAS + ["edger"])
50
+ PyDESeq2 = lazy_import("pertpy.tools._differential_gene_expression", "PyDESeq2", DE_EXTRAS)
51
+ Statsmodels = lazy_import("pertpy.tools._differential_gene_expression", "Statsmodels", DE_EXTRAS + ["statsmodels"])
52
+ TTest = lazy_import("pertpy.tools._differential_gene_expression", "TTest", DE_EXTRAS)
53
+ WilcoxonTest = lazy_import("pertpy.tools._differential_gene_expression", "WilcoxonTest", DE_EXTRAS)
54
+
55
+ __all__ = [
56
+ "Augur",
57
+ "Cinemaot",
58
+ "Sccoda",
59
+ "Tasccoda",
60
+ "Dialogue",
61
+ "EdgeR",
62
+ "PyDESeq2",
63
+ "WilcoxonTest",
64
+ "TTest",
65
+ "Statsmodels",
66
+ "DistanceTest",
67
+ "Distance",
68
+ "Enrichment",
69
+ "Milo",
70
+ "Mixscape",
71
+ "ClusteringSpace",
72
+ "LRClassifierSpace",
73
+ "MLPClassifierSpace",
74
+ "CentroidSpace",
75
+ "DBSCANSpace",
76
+ "KMeansSpace",
77
+ "PseudobulkSpace",
78
+ "Scgen",
79
+ "DGEEVAL",
80
+ ]
pertpy/tools/_augur.py CHANGED
@@ -14,6 +14,7 @@ import scanpy as sc
14
14
  import statsmodels.api as sm
15
15
  from anndata import AnnData
16
16
  from joblib import Parallel, delayed
17
+ from lamin_utils import logger
17
18
  from rich import print
18
19
  from rich.progress import track
19
20
  from scipy import sparse, stats
@@ -127,7 +128,7 @@ class Augur:
127
128
  _ = input[cell_type_col]
128
129
  _ = input[label_col]
129
130
  except KeyError:
130
- print("[bold red]No column names matching cell_type_col and label_col.")
131
+ logger.error("No column names matching cell_type_col and label_col.")
131
132
 
132
133
  label = input[label_col] if meta is None else meta[label_col]
133
134
  cell_type = input[cell_type_col] if meta is None else meta[cell_type_col]
@@ -140,7 +141,7 @@ class Augur:
140
141
  if adata.obs["label"].dtype.name == "category":
141
142
  # filter samples according to label
142
143
  if condition_label is not None and treatment_label is not None:
143
- print(f"Filtering samples with {condition_label} and {treatment_label} labels.")
144
+ logger.info(f"Filtering samples with {condition_label} and {treatment_label} labels.")
144
145
  adata = ad.concat(
145
146
  [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
146
147
  )
@@ -556,7 +557,7 @@ class Augur:
556
557
  try:
557
558
  sc.pp.highly_variable_genes(adata)
558
559
  except ValueError:
559
- print("[bold yellow]Data not normalized. Normalizing now using scanpy log1p normalize.")
560
+ logger.warn("Data not normalized. Normalizing now using scanpy log1p normalize.")
560
561
  sc.pp.log1p(adata)
561
562
  sc.pp.highly_variable_genes(adata)
562
563
 
@@ -608,7 +609,7 @@ class Augur:
608
609
  var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
609
610
  filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
610
611
  span: Smoothing factor, as a fraction of the number of points to take into account.
611
- Should be in the range (0, 1]. Defaults to 0.75
612
+ Should be in the range (0, 1].
612
613
 
613
614
  Return:
614
615
  AnnData object with additional select_variance column in var.
@@ -700,13 +701,11 @@ class Augur:
700
701
  feature_perc: proportion of genes that are randomly selected as features for input to the classifier in each
701
702
  subsample using the random gene filter
702
703
  var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
703
- Defaults to 0.5.
704
704
  span: Smoothing factor, as a fraction of the number of points to take into account. Should be in the range (0, 1].
705
- Defaults to 0.75.
706
705
  filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
707
706
  n_threads: number of threads to use for parallelization
708
707
  select_variance_features: Whether to select genes based on the original Augur implementation (True)
709
- or using scanpy's highly_variable_genes (False). Defaults to True.
708
+ or using scanpy's highly_variable_genes (False).
710
709
  key_added: Key to add results to in .uns
711
710
  augur_mode: One of 'default', 'velocity' or 'permute'. Setting augur_mode = "velocity" disables feature selection,
712
711
  assuming feature selection has been performed by the RNA velocity procedure to produce the input matrix,
@@ -751,8 +750,8 @@ class Augur:
751
750
  "full_results": defaultdict(list),
752
751
  }
753
752
  if select_variance_features:
754
- print("[bold yellow]Set smaller span value in the case of a `segmentation fault` error.")
755
- print("[bold yellow]Set larger span in case of svddc or other near singularities error.")
753
+ logger.warning("Set smaller span value in the case of a `segmentation fault` error.")
754
+ logger.warning("Set larger span in case of svddc or other near singularities error.")
756
755
  adata.obs["augur_score"] = nan
757
756
  for cell_type in track(adata.obs["cell_type"].unique(), description="Processing data..."):
758
757
  cell_type_subsample = adata[adata.obs["cell_type"] == cell_type].copy()
@@ -768,8 +767,8 @@ class Augur:
768
767
  )
769
768
  )
770
769
  if len(cell_type_subsample) < min_cells:
771
- print(
772
- f"[bold red]Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
770
+ logger.warning(
771
+ f"Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
773
772
  )
774
773
  elif (
775
774
  cell_type_subsample.obs.groupby(
@@ -778,8 +777,8 @@ class Augur:
778
777
  ).y_.count()
779
778
  < subsample_size
780
779
  ).any():
781
- print(
782
- f"[bold red]Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
780
+ logger.warning(
781
+ f"Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
783
782
  f"subsample size {subsample_size}."
784
783
  )
785
784
  else:
@@ -821,7 +820,7 @@ class Augur:
821
820
  results["full_results"]["cell_type"].extend([cell_type] * folds * n_subsamples)
822
821
  # make sure one cell type worked
823
822
  if len(results) <= 2:
824
- print("[bold red]No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
823
+ logger.warning("No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
825
824
 
826
825
  results["summary_metrics"] = pd.DataFrame(results["summary_metrics"])
827
826
  results["feature_importances"] = pd.DataFrame(results["feature_importances"])
@@ -850,7 +849,7 @@ class Augur:
850
849
  augur2: Augurpy results from condition 2, obtained from `predict()[1]`
851
850
  permuted1: permuted Augurpy results from condition 1, obtained from `predict()` with argument `augur_mode=permute`
852
851
  permuted2: permuted Augurpy results from condition 2, obtained from `predict()` with argument `augur_mode=permute`
853
- n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation; Defaults to 50.
852
+ n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation.
854
853
  n_permutations: the total number of mean augur scores to calculate from a background distribution
855
854
 
856
855
  Returns:
pertpy/tools/_cinemaot.py CHANGED
@@ -338,7 +338,7 @@ class Cinemaot:
338
338
  sc.tl.leiden(adata, resolution=cf_resolution)
339
339
  df["ct"] = adata.obs["leiden"].astype(str)
340
340
  df["ptb"] = "control"
341
- df["ptb"][adata.obs[pert_key] != control] = de.obs["leiden"].astype(str)
341
+ df.loc[adata.obs[pert_key] != control, "ptb"] = de.obs["leiden"].astype(str)
342
342
  label_list.append("ptb")
343
343
  df = df.groupby(label_list).sum()
344
344
  new_index = df.index.map(lambda x: "_".join(map(str, x)))
@@ -432,7 +432,7 @@ class Cinemaot:
432
432
  expr_label = "control"
433
433
 
434
434
  adata_.obs["ct"] = ref_label
435
- adata_.obs["ct"][adata_.obs[pert_key] == control] = expr_label
435
+ adata_.obs.loc[adata_.obs[pert_key] == control, "ct"] = expr_label
436
436
  pert_key = "ct"
437
437
  z = np.zeros(adata_.shape[0]) + 1
438
438