biopipen 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (60) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.py +0 -5
  3. biopipen/core/config.toml +2 -4
  4. biopipen/core/defaults.py +3 -3
  5. biopipen/core/filters.py +1 -0
  6. biopipen/core/proc.py +1 -3
  7. biopipen/core/testing.py +1 -2
  8. biopipen/ns/bam.py +10 -14
  9. biopipen/ns/bcftools.py +4 -7
  10. biopipen/ns/bed.py +9 -16
  11. biopipen/ns/cnv.py +8 -11
  12. biopipen/ns/cnvkit.py +32 -59
  13. biopipen/ns/cnvkit_pipeline.py +266 -310
  14. biopipen/ns/csv.py +0 -2
  15. biopipen/ns/gene.py +0 -1
  16. biopipen/ns/gsea.py +4 -10
  17. biopipen/ns/misc.py +0 -5
  18. biopipen/ns/plot.py +2 -4
  19. biopipen/ns/rnaseq.py +0 -1
  20. biopipen/ns/scrna.py +76 -119
  21. biopipen/ns/scrna_metabolic_landscape.py +306 -348
  22. biopipen/ns/tcr.py +5 -15
  23. biopipen/ns/vcf.py +52 -34
  24. biopipen/ns/web.py +8 -19
  25. biopipen/reports/bam/CNAClinic.svelte +1 -1
  26. biopipen/reports/bam/CNVpytor.svelte +2 -2
  27. biopipen/reports/bam/ControlFREEC.svelte +1 -1
  28. biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
  29. biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
  30. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  31. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  32. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  33. biopipen/reports/gsea/FGSEA.svelte +1 -1
  34. biopipen/reports/gsea/GSEA.svelte +2 -2
  35. biopipen/reports/scrna/CellsDistribution.svelte +1 -1
  36. biopipen/reports/scrna/DimPlots.svelte +1 -1
  37. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
  38. biopipen/reports/scrna/MarkersFinder.svelte +2 -2
  39. biopipen/reports/scrna/ScFGSEA.svelte +1 -1
  40. biopipen/reports/scrna/SeuratClusterStats.svelte +2 -2
  41. biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +1 -1
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
  45. biopipen/reports/tcr/CloneResidency.svelte +2 -2
  46. biopipen/reports/tcr/Immunarch.svelte +2 -2
  47. biopipen/reports/tcr/SampleDiversity.svelte +2 -2
  48. biopipen/reports/tcr/TCRClusteringStats.svelte +2 -2
  49. biopipen/reports/tcr/VJUsage.svelte +1 -1
  50. biopipen/reports/utils/gsea.liq +1 -1
  51. biopipen/reports/utils/misc.liq +1 -1
  52. biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
  53. biopipen/reports/vcf/TruvariConsistency.svelte +2 -2
  54. biopipen/scripts/tcgamaf/maf2vcf.pl +10 -5
  55. biopipen/scripts/vcf/VcfAnno.py +26 -0
  56. biopipen/scripts/vcf/VcfFix_utils.py +3 -2
  57. {biopipen-0.7.1.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
  58. {biopipen-0.7.1.dist-info → biopipen-0.8.0.dist-info}/RECORD +60 -59
  59. {biopipen-0.7.1.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
  60. {biopipen-0.7.1.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +0 -0
biopipen/ns/csv.py CHANGED
@@ -1,6 +1,4 @@
1
1
  """Tools to deal with csv/tsv files"""
2
-
3
-
4
2
  from ..core.proc import Proc
5
3
 
6
4
 
biopipen/ns/gene.py CHANGED
@@ -38,7 +38,6 @@ class GeneNameConversion(Proc):
38
38
  Supported: human, mouse, rat, fruitfly, nematode, zebrafish,
39
39
  thale-cress, frog and pig
40
40
  """ # noqa: E501
41
-
42
41
  input = "infile:file"
43
42
  output = "outfile:file:{{in.infile | basename}}"
44
43
  lang = config.lang.python
biopipen/ns/gsea.py CHANGED
@@ -1,5 +1,4 @@
1
1
  """Gene set enrichment analysis"""
2
-
3
2
  from ..core.proc import Proc
4
3
  from ..core.config import config
5
4
 
@@ -35,9 +34,8 @@ class GSEA(Proc):
35
34
  Other configs passed to `GSEA()` directly
36
35
 
37
36
  Requires:
38
- - name: GSEA-MSigDB/GSEA_R
39
- check: |
40
- {{proc.lang}} <(echo "library(GSEA)")
37
+ GSEA-MSigDB/GSEA_R:
38
+ - check: {{proc.lang}} <(echo "library(GSEA)")
41
39
  """
42
40
 
43
41
  input = "infile:file, metafile:file, gmtfile:file, configfile:file"
@@ -86,7 +84,6 @@ class PreRank(Proc):
86
84
  the first one is pos and second is neg. Or you can have multiple
87
85
  pairs of labels (e.g. `[["CASE1", "CNTRL"], ["CASE2", "CNTRL"]]`)
88
86
  """
89
-
90
87
  input = "infile:file, metafile:file, configfile:file"
91
88
  output = "outfile:file:{{in.infile | stem}}.rank"
92
89
  lang = config.lang.rscript
@@ -139,11 +136,9 @@ class FGSEA(Proc):
139
136
  `<rest>`: Rest arguments for `fgsea()`
140
137
 
141
138
  Requires:
142
- - name: bioconductor-fgsea
143
- check: |
144
- {{proc.lang}} -e "library(fgsea)"
139
+ bioconductor-fgsea:
140
+ - check: {{proc.lang}} -e "library(fgsea)"
145
141
  """
146
-
147
142
  input = "infile:file, metafile:file, gmtfile:file, configfile:file"
148
143
  output = "outdir:dir:{{in.infile | stem}}.fgsea"
149
144
  lang = config.lang.rscript
@@ -183,7 +178,6 @@ class Enrichr(Proc):
183
178
  See https://maayanlab.cloud/Enrichr/#libraries for all available
184
179
  databases/libaries
185
180
  """
186
-
187
181
  input = "infile:file"
188
182
  output = "outdir:dir:{{in.infile | stem}}.enrichr"
189
183
  lang = config.lang.rscript
biopipen/ns/misc.py CHANGED
@@ -12,7 +12,6 @@ class File2Proc(Proc):
12
12
  Output:
13
13
  outfile: The output symbolic link to the input file
14
14
  """
15
-
16
15
  input = "infile:file"
17
16
  output = "outfile:file:{{in.infile | basename}}"
18
17
  script = """
@@ -25,7 +24,6 @@ class File2Proc(Proc):
25
24
  class Glob2Dir(Proc):
26
25
  """Create symbolic links in output directory for the files given
27
26
  by the glob pattern"""
28
-
29
27
  input = "pattern:var"
30
28
  output = "outdir:dir:from_glob"
31
29
  script = """
@@ -54,7 +52,6 @@ class Config2File(Proc):
54
52
  infmt: The input format. `json` or `toml`.
55
53
  outfmt: The output format. `json` or `toml`.
56
54
  """
57
-
58
55
  input = "config:var, name:var"
59
56
  output = "outfile:file:{{(in.name or 'config') | slugify}}.{{envs.outfmt}}"
60
57
  envs = {"infmt": "toml", "outfmt": "toml"}
@@ -76,7 +73,6 @@ class Str2File(Proc):
76
73
  Envs:
77
74
  name: The name of the output file
78
75
  """
79
-
80
76
  input = "str, name"
81
77
  output = "outfile:file:{{in.name}}"
82
78
  lang = config.lang.python
@@ -99,7 +95,6 @@ class Shell(Proc):
99
95
  outdir: Whether the `out.outfile` should be a directory.
100
96
  If so a directory will be created before running the command.
101
97
  """
102
-
103
98
  input = "infile:file"
104
99
  output = "outfile:file:{{in.infile | basename}}"
105
100
  envs = {"cmd": "", "outdir": False}
biopipen/ns/plot.py CHANGED
@@ -94,11 +94,9 @@ class Heatmap(Proc):
94
94
  globals: Some globals for the expression in `args` to be evaluated
95
95
 
96
96
  Requires:
97
- - name: bioconductor-complexheatmap
98
- check: |
99
- {{proc.lang}} <(echo "library(ComplexHeatmap)")
97
+ bioconductor-complexheatmap:
98
+ - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
100
99
  """
101
-
102
100
  input = "infile:file, annofiles:files"
103
101
  output = """
104
102
  {%- set outdir = in.infile | stem0 | append: ".heatmap" -%}
biopipen/ns/rnaseq.py CHANGED
@@ -6,7 +6,6 @@ from ..core.config import config
6
6
 
7
7
  class UnitConversion(Proc):
8
8
  """Convert expression value units back and forth"""
9
-
10
9
  input = "infile:file"
11
10
  output = "outfile:file:{{in.infile | basename}}"
12
11
  lang = config.lang.rscript
biopipen/ns/scrna.py CHANGED
@@ -71,17 +71,13 @@ class SeuratPreparing(Proc):
71
71
  `gene_qc` is applied after `cell_qc`.
72
72
 
73
73
  Requires:
74
- - name: r-seurat
75
- check: |
76
- {{proc.lang}} <(echo "library(Seurat)")
77
- - name: r-future
78
- check: |
79
- {{proc.lang}} <(echo "library(future)")
80
- - name: r-bracer
81
- check: |
82
- {{proc.lang}} <(echo "library(bracer)")
74
+ r-seurat:
75
+ - check: {{proc.lang}} <(echo "library(Seurat)")
76
+ r-future:
77
+ - check: {{proc.lang}} <(echo "library(future)")
78
+ r-bracer:
79
+ - check: {{proc.lang}} <(echo "library(bracer)")
83
80
  """
84
-
85
81
  input = "metafile:file"
86
82
  output = "rdsfile:file:{{in.metafile | stem}}.seurat.RDS"
87
83
  lang = config.lang.rscript
@@ -113,17 +109,13 @@ class SeuratClustering(Proc):
113
109
  Note that `dims = 30` will be expanded as `dims = 1:30`
114
110
 
115
111
  Requires:
116
- - name: r-seurat
117
- check: |
118
- {{proc.lang}} <(echo "library(Seurat)")
119
- - name: r-tidyr
120
- check: |
121
- {{proc.lang}} <(echo "library(tidyr)")
122
- - name: r-dplyr
123
- check: |
124
- {{proc.lang}} <(echo "library(dplyr)")
112
+ r-seurat:
113
+ - check: {{proc.lang}} <(echo "library(Seurat)")
114
+ r-tidyr:
115
+ - check: {{proc.lang}} <(echo "library(tidyr)")
116
+ r-dplyr:
117
+ - check: {{proc.lang}} <(echo "library(dplyr)")
125
118
  """
126
-
127
119
  input = "srtobj:file"
128
120
  output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
129
121
  lang = config.lang.rscript
@@ -196,9 +188,8 @@ class SeuratClusterStats(Proc):
196
188
  `Seurat::Dimplot()`, add `devpars`.
197
189
 
198
190
  Requires:
199
- - name: r-seurat
200
- check: |
201
- {{proc.lang}} -e "library(Seurat)"
191
+ r-seurat:
192
+ - check: {{proc.lang}} -e "library(Seurat)"
202
193
  """
203
194
 
204
195
  input = "srtobj:file"
@@ -235,7 +226,6 @@ class CellsDistribution(Proc):
235
226
  Input:
236
227
  srtobj: The seurat object generated by SeuratClustering
237
228
  casefile: The file with the cases
238
-
239
229
  >>> # The name of the job, used in report, optional
240
230
  >>> # If not given, will use `{{in.srtobj | stem}}`, ...
241
231
  >>> name = ""
@@ -284,17 +274,13 @@ class CellsDistribution(Proc):
284
274
  will be used.
285
275
 
286
276
  Requires:
287
- - name: r-seurat
288
- check: |
289
- {{proc.lang}} -e "library(Seurat)"
290
- - name: r-dplyr
291
- check: |
292
- {{proc.lang}} -e "library(dplyr)"
293
- - name: r-tidyr
294
- check: |
295
- {{proc.lang}} -e "library(tidyr)"
277
+ r-seurat:
278
+ - check: {{proc.lang}} -e "library(Seurat)"
279
+ r-dplyr:
280
+ - check: {{proc.lang}} -e "library(dplyr)"
281
+ r-tidyr:
282
+ - check: {{proc.lang}} -e "library(tidyr)"
296
283
  """
297
-
298
284
  input = "srtobj:file, casefile:file"
299
285
  output = "outdir:dir:{{in.srtobj | stem}}.cells_distribution"
300
286
  lang = config.lang.rscript
@@ -320,17 +306,13 @@ class SeuratMetadataMutater(Proc):
320
306
  rdsfile: The seurat object with the additional metadata
321
307
 
322
308
  Requires:
323
- - name: r-seurat
324
- check: |
325
- {{proc.lang}} <(echo "library(Seurat)")
326
- - name: r-tibble
327
- check: |
328
- {{proc.lang}} <(echo "library(tibble)")
329
- - name: r-dplyr
330
- check: |
331
- {{proc.lang}} <(echo "library(dplyr)")
309
+ r-seurat:
310
+ - check: {{proc.lang}} <(echo "library(Seurat)")
311
+ r-tibble:
312
+ - check: {{proc.lang}} <(echo "library(tibble)")
313
+ r-dplyr:
314
+ - check: {{proc.lang}} <(echo "library(dplyr)")
332
315
  """
333
-
334
316
  input = "srtobj:file, metafile:file, mutaters:var"
335
317
  output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
336
318
  lang = config.lang.rscript
@@ -372,7 +354,6 @@ class GeneExpressionInvestigation(Proc):
372
354
  - `res`, `height` and `width` the parameters for `png()`
373
355
  - other arguments for `ComplexHeatmap::Heatmap()`
374
356
  """
375
-
376
357
  input = "srtobj:file, genefile:file, configfile:file"
377
358
  output = "outdir:dir:{{in.configfile | stem0}}.gei"
378
359
  lang = config.lang.rscript
@@ -409,7 +390,6 @@ class DimPlots(Proc):
409
390
  Keys are the names and values are the arguments to
410
391
  `Seurat::Dimplots`
411
392
  """
412
-
413
393
  input = "srtobj:file, configfile:file, name:var"
414
394
  output = "outdir:dir:{{in.srtobj | stem}}.dimplots"
415
395
  lang = config.lang.rscript
@@ -475,7 +455,6 @@ class MarkersFinder(Proc):
475
455
  `p_val_adj`
476
456
  Example - `"p_val_adj < 0.05 & abs(avg_log2FC) > 1"`
477
457
  """
478
-
479
458
  input = "srtobj:file, casefile:file"
480
459
  output = "outdir:dir:{{(in.casefile or in.srtobj) | stem0}}.markers"
481
460
  lang = config.lang.rscript
@@ -520,39 +499,33 @@ class ExprImpute(Proc):
520
499
  python: The python path where magic-impute is installed.
521
500
 
522
501
  Requires:
523
- - name: r-scimpute
524
- if: {{proc.envs.tool == "scimpute"}}
525
- check: |
526
- {{proc.lang}} <(echo "library(scImpute)")
527
- - name: r-rmagic
528
- if: {{proc.envs.tool == "rmagic"}}
529
- check: |
530
- {{proc.lang}} <(\
531
- echo "\
532
- tryCatch(\
533
- { setwd(dirname(Sys.getenv('CONDA_PREFIX'))) }, \
534
- error = function(e) NULL \
535
- ); \
536
- library(Rmagic)\
537
- "\
538
- )
539
- - name: magic-impute
540
- if: {{proc.envs.tool == "rmagic"}}
541
- check: |
542
- {{proc.envs.rmagic_args.python}} -c "import magic")
543
- - name: r-dplyr
544
- if: {{proc.envs.tool == "scimpute"}}
545
- check: |
546
- {{proc.lang}} <(echo "library(dplyr)")
547
- - name: r-seurat
548
- check: |
549
- {{proc.lang}} <(echo "library(Seurat)")
550
- - name: r-seuratwrappers
551
- if: {{proc.envs.tool == "alra"}}
552
- check: |
553
- {{proc.lang}} <(echo "library(SeuratWrappers)")
502
+ r-scimpute:
503
+ - if: {{proc.envs.tool == "scimpute"}}
504
+ - check: {{proc.lang}} <(echo "library(scImpute)")
505
+ r-rmagic:
506
+ - if: {{proc.envs.tool == "rmagic"}}
507
+ - check: |
508
+ {{proc.lang}} <(\
509
+ echo "\
510
+ tryCatch(\
511
+ { setwd(dirname(Sys.getenv('CONDA_PREFIX'))) }, \
512
+ error = function(e) NULL \
513
+ ); \
514
+ library(Rmagic)\
515
+ "\
516
+ )
517
+ magic-impute:
518
+ - if: {{proc.envs.tool == "rmagic"}}
519
+ - check: {{proc.envs.rmagic_args.python}} -c "import magic")
520
+ r-dplyr:
521
+ - if: {{proc.envs.tool == "scimpute"}}
522
+ - check: {{proc.lang}} <(echo "library(dplyr)")
523
+ r-seurat:
524
+ - check: {{proc.lang}} <(echo "library(Seurat)")
525
+ r-seuratwrappers:
526
+ - if: {{proc.envs.tool == "alra"}}
527
+ - check: {{proc.lang}} <(echo "library(SeuratWrappers)")
554
528
  """
555
-
556
529
  input = "infile:file"
557
530
  output = "outfile:file:{{in.infile | stem}}.imputed.RDS"
558
531
  lang = config.lang.rscript
@@ -588,7 +561,6 @@ class SCImpute(Proc):
588
561
  infmt: The input format.
589
562
  Either `seurat` or `matrix`
590
563
  """
591
-
592
564
  input = "infile:file, groupfile:file"
593
565
  output = [
594
566
  "outfile:file:{{in.infile | stem | replace: '.seurat', ''}}."
@@ -625,14 +597,11 @@ class SeuratFilter(Proc):
625
597
  invert: Invert the selection?
626
598
 
627
599
  Requires:
628
- - name: r-seurat
629
- check: |
630
- {{proc.lang}} <(echo "library('Seurat')")
631
- - name: r-dplyr
632
- check: |
633
- {{proc.lang}} <(echo "library('dplyr')")
600
+ r-seurat:
601
+ - check: {{proc.lang}} <(echo "library('Seurat')")
602
+ r-dplyr:
603
+ - check: {{proc.lang}} <(echo "library('dplyr')")
634
604
  """
635
-
636
605
  input = "srtobj:file, filters:var"
637
606
  output = "outfile:file:{{in.srtobj | stem}}.filtered.RDS"
638
607
  lang = config.lang.rscript
@@ -662,14 +631,11 @@ class SeuratSubset(Proc):
662
631
  ignore_nas: Ignore NA values?
663
632
 
664
633
  Requires:
665
- - name: r-seurat
666
- check: |
667
- {{proc.lang}} <(echo "library('Seurat')")
668
- - name: r-dplyr
669
- check: |
670
- {{proc.lang}} <(echo "library('dplyr')")
634
+ r-seurat:
635
+ - check: {{proc.lang}} <(echo "library('Seurat')")
636
+ r-dplyr:
637
+ - check: {{proc.lang}} <(echo "library('dplyr')")
671
638
  """
672
-
673
639
  input = "srtobj:file, subsets:var"
674
640
  output = "outdir:dir:{{in.srtobj | stem}}.subsets"
675
641
  envs = {"ignore_nas": True}
@@ -723,7 +689,6 @@ class Subset10X(Proc):
723
689
  feats_to_keep: The features/genes to keep.
724
690
  The final features list will be `feats_to_keep` + `nfeats`
725
691
  """
726
-
727
692
  input = "indir:dir"
728
693
  output = "outdir:dir:{{in.indir | stem}}"
729
694
  envs = {
@@ -750,7 +715,6 @@ class Write10X(Proc):
750
715
  Envs:
751
716
  version: The version of 10X format
752
717
  """
753
-
754
718
  input = "srtobj:file"
755
719
  output = "outdir:dir:{{in.srtobj | stem}}"
756
720
  envs = {"version": "3"}
@@ -790,14 +754,11 @@ class ScFGSEA(Proc):
790
754
 
791
755
 
792
756
  Requires:
793
- - name: bioconductor-fgsea
794
- check: |
795
- {{proc.lang}} -e "library(fgsea)"
796
- - name: r-seurat
797
- check: |
798
- {{proc.lang}} -e "library(seurat)"
757
+ bioconductor-fgsea:
758
+ - check: {{proc.lang}} -e "library(fgsea)"
759
+ r-seurat:
760
+ - check: {{proc.lang}} -e "library(seurat)"
799
761
  """
800
-
801
762
  input = "srtobj:file, casefile:file"
802
763
  output = "outdir:dir:{{(in.casefile or in.srtobj) | stem0}}.fgsea"
803
764
  lang = config.lang.rscript
@@ -837,22 +798,18 @@ class CellTypeAnnotate(Proc):
837
798
  identity and values as the new cell type.
838
799
 
839
800
  Requires:
840
- - name: r-HGNChelper
841
- if: {{proc.envs.tool == 'sctype'}}
842
- check: |
843
- {{proc.lang}} -e "library(HGNChelper)"
844
- - name: r-seurat
845
- if: {{proc.envs.tool == 'sctype'}}
846
- check: |
847
- {{proc.lang}} -e "library(Seurat)"
848
- - name: r-dplyr
849
- if: {{proc.envs.tool == 'sctype'}}
850
- check: |
851
- {{proc.lang}} -e "library(dplyr)"
852
- - name: r-openxlsx
853
- if: {{proc.envs.tool == 'sctype'}}
854
- check: |
855
- {{proc.lang}} -e "library(openxlsx)"
801
+ r-HGNChelper:
802
+ - if: {{proc.envs.tool == 'sctype'}}
803
+ - check: {{proc.lang}} -e "library(HGNChelper)"
804
+ r-seurat:
805
+ - if: {{proc.envs.tool == 'sctype'}}
806
+ - check: {{proc.lang}} -e "library(Seurat)"
807
+ r-dplyr:
808
+ - if: {{proc.envs.tool == 'sctype'}}
809
+ - check: {{proc.lang}} -e "library(dplyr)"
810
+ r-openxlsx:
811
+ - if: {{proc.envs.tool == 'sctype'}}
812
+ - check: {{proc.lang}} -e "library(openxlsx)"
856
813
  """
857
814
  input = "sobjfile:file"
858
815
  output = "outfile:file:{{in.sobjfile | stem}}.annotated.RDS"