biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +4 -0
  3. biopipen/core/filters.py +1 -1
  4. biopipen/core/testing.py +2 -1
  5. biopipen/ns/cellranger.py +33 -3
  6. biopipen/ns/regulatory.py +4 -0
  7. biopipen/ns/scrna.py +548 -98
  8. biopipen/ns/scrna_metabolic_landscape.py +4 -0
  9. biopipen/ns/tcr.py +256 -16
  10. biopipen/ns/web.py +5 -0
  11. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
  12. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
  13. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
  14. biopipen/reports/tcr/ClonalStats.svelte +1 -0
  15. biopipen/scripts/cellranger/CellRangerCount.py +55 -11
  16. biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
  17. biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
  18. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
  19. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
  20. biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
  21. biopipen/scripts/regulatory/motifs-common.R +3 -2
  22. biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
  23. biopipen/scripts/scrna/CellCellCommunication.py +26 -14
  24. biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
  25. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  26. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
  27. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
  28. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
  29. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
  31. biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
  32. biopipen/scripts/scrna/MQuad.py +25 -0
  33. biopipen/scripts/scrna/MarkersFinder.R +128 -30
  34. biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
  35. biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
  36. biopipen/scripts/scrna/ScFGSEA.R +23 -26
  37. biopipen/scripts/scrna/ScVelo.py +20 -8
  38. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  39. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
  40. biopipen/scripts/scrna/SeuratClustering.R +5 -1
  41. biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
  42. biopipen/scripts/scrna/SeuratPreparing.R +19 -11
  43. biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
  44. biopipen/scripts/scrna/Slingshot.R +2 -4
  45. biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
  46. biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
  47. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  48. biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
  49. biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
  50. biopipen/scripts/tcr/ClonalStats.R +76 -35
  51. biopipen/utils/misc.py +104 -9
  52. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
  53. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
  54. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  55. biopipen/utils/common_docstrs.py +0 -103
  56. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.34.6"
1
+ __version__ = "0.34.26"
biopipen/core/config.toml CHANGED
@@ -10,6 +10,8 @@ bcftools = "bcftools"
10
10
  calculate_rmsd = "calculate_rmsd"
11
11
  # cellranger
12
12
  cellranger = "cellranger"
13
+ # cellsnp-lite
14
+ cellsnp_lite = "cellsnp-lite"
13
15
  # Control-FREEC to call cnvs
14
16
  freec = "freec"
15
17
  # liftover coordinates across genomes
@@ -33,6 +35,8 @@ convert = "convert"
33
35
  fimo = "fimo"
34
36
  # MAXIT: https://sw-tools.rcsb.org/apps/MAXIT/
35
37
  maxit = "maxit"
38
+ # MQuad: https://github.com/single-cell-genetics/MQuad
39
+ mquad = "mquad"
36
40
  # wget
37
41
  wget = "wget"
38
42
  # aria2c
biopipen/core/filters.py CHANGED
@@ -32,7 +32,7 @@ def dict_to_cli_args(
32
32
 
33
33
  Args:
34
34
  dic: The dict to convert
35
- exclude: The keys to exclude
35
+ exclude: The keys to exclude before conversion (e.g. dashify)
36
36
  prefix: The prefix of the keys after conversion
37
37
  Defaults to `None`, mean `-` for short keys and `--` for long keys
38
38
  sep: The separator between key and value
biopipen/core/testing.py CHANGED
@@ -6,7 +6,8 @@ from pathlib import Path
6
6
  from pipen import Pipen
7
7
 
8
8
  TESTING_INDEX_INIT = 1
9
- TESTING_PARENT_DIR = Path(tempfile.gettempdir())
9
+ TESTING_PARENT_DIR = Path(__file__).parent.parent.parent.joinpath("tests", "running")
10
+ TESTING_PARENT_DIR.mkdir(parents=True, exist_ok=True)
10
11
  TESTING_DIR = str(TESTING_PARENT_DIR.joinpath("biopipen-tests-%(index)s"))
11
12
  RSCRIPT_DIR = TESTING_PARENT_DIR.joinpath("biopipen-tests-rscripts")
12
13
  RSCRIPT_DIR.mkdir(exist_ok=True)
biopipen/ns/cellranger.py CHANGED
@@ -30,13 +30,26 @@ class CellRangerCount(Proc):
30
30
  ref: Path of folder containing 10x-compatible transcriptome reference
31
31
  tmpdir: Path to temporary directory, used to save the soft-lined fastq files
32
32
  to pass to cellranger
33
+ outdir_is_mounted (flag): A flag indicating whether the output directory is
34
+ on a mounted filesystem. As of `cellranger` v9.0.1, `cellranger vdj` will
35
+ fail when trying to copy/operate files to a mounted filesystem.
36
+ See <https://github.com/10XGenomics/cellranger/issues/210> and
37
+ <https://github.com/10XGenomics/cellranger/issues/250> for similar issues.
38
+ If that is the case, set this flag to `True` to use `envs.tmpdir` as
39
+ the output directory for `cellranger vdj`, and then move the results
40
+ to the final output directory after `cellranger vdj` finishes.
41
+ In this case, make sure that `envs.tmpdir` must have enough space and
42
+ it must be a local filesystem.
43
+ copy_outs_only (flag): If `outdir_is_mounted` is `True`, set this flag to `True`
44
+ to only copy the `outs` folder from the temporary output directory
45
+ to the final output directory, instead of the whole output directory.
33
46
  include_introns (flag): Set to false to exclude intronic reads in count.
34
47
  create_bam (flag): Enable or disable BAM file generation.
35
48
  This is required by cellrange v8+. When using cellrange v8-, it will be
36
49
  transformed to `--no-bam`.
37
50
  <more>: Other environment variables required by `cellranger count`
38
51
  See `cellranger count --help` for more details or
39
- https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#count
52
+ <https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#count>
40
53
  """ # noqa: E501
41
54
  input = "fastqs:files, id"
42
55
  output = """outdir:dir:
@@ -59,6 +72,8 @@ class CellRangerCount(Proc):
59
72
  "cellranger": config.exe.cellranger,
60
73
  "ref": config.ref.ref_cellranger_gex,
61
74
  "tmpdir": config.path.tmpdir,
75
+ "outdir_is_mounted": False,
76
+ "copy_outs_only": True,
62
77
  "include_introns": True,
63
78
  "create_bam": False,
64
79
  }
@@ -91,10 +106,23 @@ class CellRangerVdj(Proc):
91
106
  cellranger: Path to cellranger
92
107
  ref: Path of folder containing 10x-compatible transcriptome reference
93
108
  tmpdir: Path to temporary directory, used to save the soft-lined fastq files
94
- to pass to cellranger
109
+ to pass to cellranger.
110
+ outdir_is_mounted (flag): A flag indicating whether the output directory is
111
+ on a mounted filesystem. As of `cellranger` v9.0.1, `cellranger vdj` will
112
+ fail when trying to copy the VDJ reference files to a mounted filesystem.
113
+ See <https://github.com/10XGenomics/cellranger/issues/210> and
114
+ <https://github.com/10XGenomics/cellranger/issues/250> for similar issues.
115
+ If that is the case, set this flag to `True` to use `envs.tmpdir` as
116
+ the output directory for `cellranger vdj`, and then move the results
117
+ to the final output directory after `cellranger vdj` finishes.
118
+ In this case, make sure that `envs.tmpdir` must have enough space and
119
+ it must be a local filesystem.
120
+ copy_outs_only (flag): If `outdir_is_mounted` is `True`, set this flag to `True`
121
+ to only copy the `outs` folder from the temporary output directory
122
+ to the final output directory, instead of the whole output directory.
95
123
  <more>: Other environment variables required by `cellranger vdj`
96
124
  See `cellranger vdj --help` for more details or
97
- https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#vdj
125
+ <https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#vdj>
98
126
  """ # noqa: E501
99
127
  input = "fastqs:files, id"
100
128
  output = """outdir:dir:
@@ -116,6 +144,8 @@ class CellRangerVdj(Proc):
116
144
  "ncores": config.misc.ncores,
117
145
  "cellranger": config.exe.cellranger,
118
146
  "ref": config.ref.ref_cellranger_vdj,
147
+ "outdir_is_mounted": False,
148
+ "copy_outs_only": True,
119
149
  "tmpdir": config.path.tmpdir,
120
150
  }
121
151
  script = "file://../scripts/cellranger/CellRangerVdj.py"
biopipen/ns/regulatory.py CHANGED
@@ -132,6 +132,9 @@ class MotifAffinityTest(Proc):
132
132
  If no `regulator_col` is provided, no regulator information is written in
133
133
  the output. Otherwise, the regulator information is written in the output in
134
134
  the `Regulator` column.
135
+ var_col: The column names in the `in.motiffile` containing the variant information.
136
+ It has to be matching the names in the `in.varfile`. This is helpful when
137
+ we only need to test the pairs of variants and motifs in the `in.motiffile`.
135
138
  notfound (choice): What to do if a motif is not found in the database,
136
139
  or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
137
140
  file.
@@ -200,6 +203,7 @@ class MotifAffinityTest(Proc):
200
203
  "bcftools": config.exe.bcftools,
201
204
  "motif_col": None,
202
205
  "regulator_col": None,
206
+ "var_col": None,
203
207
  "notfound": "error",
204
208
  "motifdb": config.ref.tf_motifdb,
205
209
  "regmotifs": config.ref.tf_motifs,