biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(plotthis)
|
|
3
|
+
library(biopipen.utils)
|
|
4
|
+
|
|
5
|
+
indir <- {{in.indir | r}}
|
|
6
|
+
outdir <- {{out.outdir | r}}
|
|
7
|
+
plink <- {{envs.plink | r}}
|
|
8
|
+
ncores <- {{envs.ncores | r}}
|
|
9
|
+
modifier <- {{envs.modifier | r}}
|
|
10
|
+
gz <- {{envs.gz | r}}
|
|
11
|
+
cutoffs <- {{envs.cutoff | r}}
|
|
12
|
+
filters <- {{envs.filter | r}}
|
|
13
|
+
doplot <- {{envs.plot | r}}
|
|
14
|
+
devpars <- {{envs.devpars | r}}
|
|
15
|
+
|
|
16
|
+
bedfile = Sys.glob(file.path(indir, '*.bed'))
|
|
17
|
+
if (length(bedfile) == 0)
|
|
18
|
+
stop("No bed files found in the input directory.")
|
|
19
|
+
if (length(bedfile) > 1) {
|
|
20
|
+
log_warn("Multiple bed files found in the input directory. Using the first one.")
|
|
21
|
+
bedfile <- bedfile[1]
|
|
22
|
+
}
|
|
23
|
+
input <- tools::file_path_sans_ext(bedfile)
|
|
24
|
+
output <- file.path(outdir, basename(input))
|
|
25
|
+
|
|
26
|
+
modifier <- match.arg(modifier, c("none", "counts", "x"))
|
|
27
|
+
|
|
28
|
+
cmd <- c(
|
|
29
|
+
plink,
|
|
30
|
+
"--threads", ncores,
|
|
31
|
+
"--bfile", input,
|
|
32
|
+
"--out", output
|
|
33
|
+
)
|
|
34
|
+
if (modifier == "counts") {
|
|
35
|
+
cmd <- c(cmd, "--freq", "counts")
|
|
36
|
+
if (!is.list(cutoffs)) { cutoffs <- list(ALT1_CT = cutoffs) }
|
|
37
|
+
# } else if (modifier == "case-control") {
|
|
38
|
+
# cmd <- c(cmd, "--freq", "case-control")
|
|
39
|
+
# if (!is.list(cutoffs)) { cutoffs <- list(MAF_A = cutoffs) }
|
|
40
|
+
} else if (modifier == "x") {
|
|
41
|
+
cmd <- c(cmd, "--geno-counts")
|
|
42
|
+
if (!is.list(cutoffs)) { cutoffs <- list("HOM_ALT1_CT" = cutoffs) }
|
|
43
|
+
} else {
|
|
44
|
+
cmd <- c(cmd, "--freq")
|
|
45
|
+
if (!is.list(cutoffs)) { cutoffs <- list(MAF = cutoffs) }
|
|
46
|
+
}
|
|
47
|
+
if (isTRUE(gz)) { cmd <- c(cmd, "gz") }
|
|
48
|
+
|
|
49
|
+
if (!is.list(filters)) {
|
|
50
|
+
filters <- as.list(rep(filters, length(cutoffs)))
|
|
51
|
+
names(filters) <- names(cutoffs)
|
|
52
|
+
} else {
|
|
53
|
+
for (name in names(filters)) {
|
|
54
|
+
if (is.null(cutoffs[[name]])) {
|
|
55
|
+
stop(paste0("Cutoff for filter ", name, " is not provided."))
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
run_command(cmd, fg = TRUE)
|
|
61
|
+
|
|
62
|
+
post_process <- function(suffix, snp_col = "ID", sep = "\t", modifier = NULL) {
|
|
63
|
+
freq <- read.table(
|
|
64
|
+
paste0(output, suffix),
|
|
65
|
+
header=TRUE,
|
|
66
|
+
check.names=FALSE,
|
|
67
|
+
row.names = NULL,
|
|
68
|
+
sep = sep,
|
|
69
|
+
comment = ""
|
|
70
|
+
)
|
|
71
|
+
colnames(freq)[1] <- sub("#", "", colnames(freq)[1])
|
|
72
|
+
if (!is.null(modifier)) { freq <- modifier(freq) }
|
|
73
|
+
iter_in <- input
|
|
74
|
+
n <- 0
|
|
75
|
+
for (metric_col in names(cutoffs)) {
|
|
76
|
+
if (is.null(cutoffs[[metric_col]])) {
|
|
77
|
+
stop(paste0(
|
|
78
|
+
"Cutoff for metric ",
|
|
79
|
+
metric_col,
|
|
80
|
+
" is not provided in ",
|
|
81
|
+
suffix, "(x) file."))
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
freq[[metric_col]] <- as.numeric(freq[[metric_col]])
|
|
85
|
+
cutoff <- cutoffs[[metric_col]]
|
|
86
|
+
filter <- filters[[metric_col]] %||% "no"
|
|
87
|
+
|
|
88
|
+
if (filter == "no") {
|
|
89
|
+
ge_flag <- paste0(metric_col, " >= ", cutoff)
|
|
90
|
+
lt_flag <- paste0(metric_col, " < ", cutoff)
|
|
91
|
+
freq$GE <- freq[[metric_col]] >= cutoff
|
|
92
|
+
freq$Flag <- ifelse(freq$GE, ge_flag, lt_flag)
|
|
93
|
+
freq$Flag <- factor(freq$Flag, levels = c(lt_flag, ge_flag))
|
|
94
|
+
write.table(
|
|
95
|
+
freq[[snp_col]][freq$GE],
|
|
96
|
+
file = paste0(output, suffix, ".", metric_col, ".ge"),
|
|
97
|
+
col.names=FALSE,
|
|
98
|
+
row.names=FALSE,
|
|
99
|
+
quote=FALSE
|
|
100
|
+
)
|
|
101
|
+
write.table(
|
|
102
|
+
freq[[snp_col]][!freq$GE],
|
|
103
|
+
file = paste0(output, suffix, ".", metric_col, ".lt"),
|
|
104
|
+
col.names=FALSE,
|
|
105
|
+
row.names=FALSE,
|
|
106
|
+
quote=FALSE
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if (doplot) {
|
|
110
|
+
p <- Histogram(
|
|
111
|
+
freq,
|
|
112
|
+
x = metric_col,
|
|
113
|
+
group_by = "Flag",
|
|
114
|
+
alpha = 0.8,
|
|
115
|
+
bins = 50,
|
|
116
|
+
xlab = metric_col,
|
|
117
|
+
ylab = "Count",
|
|
118
|
+
palette = "Set1"
|
|
119
|
+
)
|
|
120
|
+
res <- 70
|
|
121
|
+
height <- attr(p, "height") * res
|
|
122
|
+
width <- attr(p, "width") * res
|
|
123
|
+
png(paste0(output, suffix, ".", metric_col, ".png"), width = width, height = height, res = res)
|
|
124
|
+
print(p)
|
|
125
|
+
dev.off()
|
|
126
|
+
}
|
|
127
|
+
} else {
|
|
128
|
+
iter_dir <- file.path(outdir, paste0(metric_col, "_filtered"))
|
|
129
|
+
dir.create(iter_dir, showWarnings = FALSE)
|
|
130
|
+
iter_out <- file.path(iter_dir, basename(output))
|
|
131
|
+
|
|
132
|
+
filter <- match.arg(filter, c("gt", "lt", "ge", "le"))
|
|
133
|
+
indicate <- function(metric){
|
|
134
|
+
if (filter == "gt") {
|
|
135
|
+
return(freq[[metric_col]] > cutoff)
|
|
136
|
+
} else if (filter == "lt") {
|
|
137
|
+
return(freq[[metric_col]] < cutoff)
|
|
138
|
+
} else if (filter == "ge") {
|
|
139
|
+
return(freq[[metric_col]] >= cutoff)
|
|
140
|
+
} else if (filter == "le") {
|
|
141
|
+
return(freq[[metric_col]] <= cutoff)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
freq$Flag <- ifelse(indicate(freq), "Fail", "Pass")
|
|
145
|
+
freq$Flag <- factor(freq$Flag, levels = c("Fail", "Pass"))
|
|
146
|
+
failfile <- paste0(output, suffix, ".", metric_col, ".fail")
|
|
147
|
+
write.table(
|
|
148
|
+
freq[[snp_col]][freq$Flag == "Fail"],
|
|
149
|
+
file = failfile,
|
|
150
|
+
col.names=FALSE,
|
|
151
|
+
row.names=FALSE,
|
|
152
|
+
quote=FALSE
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if (doplot) {
|
|
156
|
+
p <- Histogram(
|
|
157
|
+
freq,
|
|
158
|
+
x = metric_col,
|
|
159
|
+
group_by = "Flag",
|
|
160
|
+
alpha = 0.8,
|
|
161
|
+
bins = 50,
|
|
162
|
+
xlab = metric_col,
|
|
163
|
+
ylab = "Count",
|
|
164
|
+
palette = "Set1"
|
|
165
|
+
)
|
|
166
|
+
res <- 70
|
|
167
|
+
height <- attr(p, "height") * res
|
|
168
|
+
width <- attr(p, "width") * res
|
|
169
|
+
png(paste0(output, suffix, ".", metric_col, ".png"), width = width, height = height, res = res)
|
|
170
|
+
print(p)
|
|
171
|
+
dev.off()
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
filter_cmd <- c(
|
|
175
|
+
plink,
|
|
176
|
+
"--threads", ncores,
|
|
177
|
+
"--bfile", shQuote(iter_in),
|
|
178
|
+
"--exclude", shQuote(failfile),
|
|
179
|
+
"--make-bed",
|
|
180
|
+
"--out", shQuote(iter_out)
|
|
181
|
+
)
|
|
182
|
+
run_command(filter_cmd, fg = TRUE)
|
|
183
|
+
|
|
184
|
+
iter_in <- iter_out
|
|
185
|
+
n <- n + 1
|
|
186
|
+
|
|
187
|
+
if (n == length(cutoffs)) {
|
|
188
|
+
# make symbolic links to output from input .bed, .bim and .fam files
|
|
189
|
+
file.symlink(paste0(iter_in, '.bed'), paste0(output, '.bed'))
|
|
190
|
+
file.symlink(paste0(iter_in, '.bim'), paste0(output, '.bim'))
|
|
191
|
+
file.symlink(paste0(iter_in, '.fam'), paste0(output, '.fam'))
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
splitup <- function(x, agg = NULL) {
|
|
198
|
+
sp <- strsplit(as.character(x), ",")
|
|
199
|
+
if (is.null(agg)) {
|
|
200
|
+
return(sp)
|
|
201
|
+
}
|
|
202
|
+
return(sapply(sp, agg))
|
|
203
|
+
}
|
|
204
|
+
if (modifier == "none") {
|
|
205
|
+
mod <- function(freq) {
|
|
206
|
+
# Add ALT1, ALT1_FREQ, REF_FREQ and MAF columns
|
|
207
|
+
writing = FALSE
|
|
208
|
+
if (is.null(freq$ALT1)) {
|
|
209
|
+
# should be the first allele of ALT
|
|
210
|
+
freq$ALT1 <- splitup(freq$ALT, agg = function(s) s[1])
|
|
211
|
+
writing = TRUE
|
|
212
|
+
}
|
|
213
|
+
if (is.null(freq$ALT1_FREQ)) {
|
|
214
|
+
freq$ALT1_FREQ <- as.double(splitup(freq$ALT_FREQS, agg = function(s) s[1]))
|
|
215
|
+
writing = TRUE
|
|
216
|
+
}
|
|
217
|
+
if (is.null(freq$REF_FREQ)) {
|
|
218
|
+
freq$REF_FREQ <- 1 - splitup(freq$ALT_FREQS, agg = function(s) sum(as.double(s)))
|
|
219
|
+
writing = TRUE
|
|
220
|
+
}
|
|
221
|
+
if (is.null(freq$MAF)) {
|
|
222
|
+
min_alt_freqs <- splitup(freq$ALT_FREQS, agg = function(s) min(as.double(s)))
|
|
223
|
+
freq$MAF <- pmin(freq$REF_FREQ, min_alt_freqs)
|
|
224
|
+
writing = TRUE
|
|
225
|
+
}
|
|
226
|
+
if (writing) {
|
|
227
|
+
write.table(
|
|
228
|
+
freq,
|
|
229
|
+
file = paste0(output, ".afreqx"),
|
|
230
|
+
col.names=TRUE,
|
|
231
|
+
row.names=FALSE,
|
|
232
|
+
quote=FALSE,
|
|
233
|
+
sep = "\t"
|
|
234
|
+
)
|
|
235
|
+
}
|
|
236
|
+
return(freq)
|
|
237
|
+
}
|
|
238
|
+
post_process(".afreq", modifier = mod)
|
|
239
|
+
} else if (modifier == "counts") {
|
|
240
|
+
mod <- function(freq) {
|
|
241
|
+
# Add ALT1, ALT1_CT, and REF_CT columns
|
|
242
|
+
writing = FALSE
|
|
243
|
+
if (is.null(freq$ALT1)) {
|
|
244
|
+
# should be the first allele of ALT
|
|
245
|
+
freq$ALT1 <- splitup(freq$ALT, agg = function(s) s[1])
|
|
246
|
+
writing = TRUE
|
|
247
|
+
}
|
|
248
|
+
if (is.null(freq$ALT1_CT)) {
|
|
249
|
+
freq$ALT1_CT <- as.integer(splitup(freq$ALT_CTS, agg = function(s) s[1]))
|
|
250
|
+
writing = TRUE
|
|
251
|
+
}
|
|
252
|
+
if (is.null(freq$REF_CT)) {
|
|
253
|
+
freq$REF_CT <- freq$OBS_CT - splitup(freq$ALT_CTS, agg = function(s) sum(as.integer(s)))
|
|
254
|
+
writing = TRUE
|
|
255
|
+
}
|
|
256
|
+
if (writing) {
|
|
257
|
+
write.table(
|
|
258
|
+
freq,
|
|
259
|
+
file = paste0(output, ".acountx"),
|
|
260
|
+
col.names=TRUE,
|
|
261
|
+
row.names=FALSE,
|
|
262
|
+
quote=FALSE,
|
|
263
|
+
sep = "\t"
|
|
264
|
+
)
|
|
265
|
+
}
|
|
266
|
+
return(freq)
|
|
267
|
+
}
|
|
268
|
+
post_process(".acount", modifier = mod)
|
|
269
|
+
# } else if (modifier == "case-control") {
|
|
270
|
+
# post_process(".frq.cc")
|
|
271
|
+
} else if (modifier == "x") {
|
|
272
|
+
mod <- function(freq) {
|
|
273
|
+
# Add ALT1, HET_REF_ALT1_CT, HOM_ALT1_CT
|
|
274
|
+
writing = FALSE
|
|
275
|
+
if (is.null(freq$ALT1)) {
|
|
276
|
+
# should be the first allele of ALT
|
|
277
|
+
freq$ALT1 <- splitup(freq$ALT, agg = function(s) s[1])
|
|
278
|
+
writing = TRUE
|
|
279
|
+
}
|
|
280
|
+
if (is.null(freq$HET_REF_ALT1_CT)) {
|
|
281
|
+
freq$HET_REF_ALT1_CT <- as.integer(splitup(freq$HET_REF_ALT_CTS, agg = function(s) s[1]))
|
|
282
|
+
writing = TRUE
|
|
283
|
+
}
|
|
284
|
+
if (is.null(freq$HOM_ALT1_CT)) {
|
|
285
|
+
freq$HOM_ALT1_CT <- as.integer(splitup(freq$TWO_ALT_GENO_CTS, agg = function(s) s[1]))
|
|
286
|
+
writing = TRUE
|
|
287
|
+
}
|
|
288
|
+
return(freq)
|
|
289
|
+
}
|
|
290
|
+
post_process(".gcount", modifier = mod)
|
|
291
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from os import path, PathLike
|
|
4
|
+
from biopipen.core.filters import dict_to_cli_args
|
|
5
|
+
from biopipen.utils.reference import tabix_index
|
|
6
|
+
from biopipen.utils.misc import run_command
|
|
7
|
+
|
|
8
|
+
invcf: str | PathLike = {{in.invcf | quote}} # noqa: E999 # pyright: ignore
|
|
9
|
+
outprefix: str = {{in.invcf | stem0 | quote}} # pyright: ignore
|
|
10
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
11
|
+
args: dict = {{envs | dict}} # pyright: ignore
|
|
12
|
+
|
|
13
|
+
plink = args.pop("plink")
|
|
14
|
+
tabix = args.pop("tabix")
|
|
15
|
+
ncores = args.pop("ncores")
|
|
16
|
+
|
|
17
|
+
# normalize vcf-filter
|
|
18
|
+
args.setdefault("vcf_filter", True)
|
|
19
|
+
if isinstance(args["vcf_filter"], str):
|
|
20
|
+
args["vcf_filter"] = args["vcf_filter"].split()
|
|
21
|
+
|
|
22
|
+
# normalize biallelic-only
|
|
23
|
+
args.setdefault("max_alleles", 2)
|
|
24
|
+
|
|
25
|
+
# This makes it possible to keep the allele order in the output
|
|
26
|
+
# no need for plink2
|
|
27
|
+
# args["keep_allele_order"] = True
|
|
28
|
+
args.setdefault("keep_allele_order", True)
|
|
29
|
+
|
|
30
|
+
# resolve plink 1.x --set-missing-var-ids doesn't distinguish $1, $2,...
|
|
31
|
+
# for ref and alts
|
|
32
|
+
# if (
|
|
33
|
+
# "set_missing_var_ids" in args
|
|
34
|
+
# and args["set_missing_var_ids"]
|
|
35
|
+
# and ("$" in args["set_missing_var_ids"] or "%" in args["set_missing_var_ids"])
|
|
36
|
+
# ):
|
|
37
|
+
# tmpfile = path.join(outdir, 'with_var_ids.vcf')
|
|
38
|
+
# set_missing_var_ids = args.pop("set_missing_var_ids")
|
|
39
|
+
# set_missing_var_ids = (
|
|
40
|
+
# set_missing_var_ids
|
|
41
|
+
# .replace("@", "%CHROM")
|
|
42
|
+
# .replace("#", "%POS")
|
|
43
|
+
# .replace("$1", "%REF")
|
|
44
|
+
# .replace("$2", "%ALT{0}")
|
|
45
|
+
# .replace("$3", "%ALT{1}")
|
|
46
|
+
# .replace("$4", "%ALT{2}")
|
|
47
|
+
# .replace("$5", "%ALT{3}")
|
|
48
|
+
# .replace("$6", "%ALT{4}")
|
|
49
|
+
# .replace("%CHROM_", "%CHROM\\_")
|
|
50
|
+
# .replace("%POS_", "%POS\\_")
|
|
51
|
+
# .replace("%REF_", "%REF\\_")
|
|
52
|
+
# )
|
|
53
|
+
# set_vid_cmd = [
|
|
54
|
+
# bcftools,
|
|
55
|
+
# "annotate",
|
|
56
|
+
# "--set-id",
|
|
57
|
+
# f"+{set_missing_var_ids}",
|
|
58
|
+
# "--output-type",
|
|
59
|
+
# "z",
|
|
60
|
+
# "--output",
|
|
61
|
+
# tmpfile,
|
|
62
|
+
# "--threads",
|
|
63
|
+
# ncores,
|
|
64
|
+
# invcf,
|
|
65
|
+
# ]
|
|
66
|
+
|
|
67
|
+
# run_command(set_vid_cmd, fg=True, env={"cwd": outdir})
|
|
68
|
+
# invcf = tmpfile
|
|
69
|
+
|
|
70
|
+
invcf = tabix_index(invcf, "vcf", tabix=tabix)
|
|
71
|
+
args["vcf"] = invcf
|
|
72
|
+
args["out"] = path.join(outdir, outprefix)
|
|
73
|
+
args["threads"] = ncores
|
|
74
|
+
|
|
75
|
+
cmd = [
|
|
76
|
+
plink,
|
|
77
|
+
"--make-bed",
|
|
78
|
+
*dict_to_cli_args(args, dup_key=False, dashify = True),
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
run_command(cmd, fg=True, env={"cwd": outdir})
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
library(plotthis)
|
|
2
|
+
library(biopipen.utils)
|
|
3
|
+
|
|
4
|
+
indir <- {{in.indir | r}}
|
|
5
|
+
outdir <- {{out.outdir | r}}
|
|
6
|
+
plink <- {{envs.plink | r}}
|
|
7
|
+
ncores <- {{envs.ncores | r}}
|
|
8
|
+
cutoff <- {{envs.cutoff | r}}
|
|
9
|
+
doplot <- {{envs.plot | r}}
|
|
10
|
+
devpars <- {{envs.devpars | r}}
|
|
11
|
+
|
|
12
|
+
bedfile = Sys.glob(file.path(indir, '*.bed'))
|
|
13
|
+
if (length(bedfile) == 0)
|
|
14
|
+
stop("No bed files found in the input directory.")
|
|
15
|
+
if (length(bedfile) > 1) {
|
|
16
|
+
log_warn("Multiple bed files found in the input directory. Using the first one.")
|
|
17
|
+
bedfile <- bedfile[1]
|
|
18
|
+
}
|
|
19
|
+
input <- tools::file_path_sans_ext(bedfile)
|
|
20
|
+
output <- file.path(outdir, basename(input))
|
|
21
|
+
|
|
22
|
+
cmd <- c(
|
|
23
|
+
plink,
|
|
24
|
+
"--threads", ncores,
|
|
25
|
+
"--bfile", input,
|
|
26
|
+
"--hardy",
|
|
27
|
+
"--out", output
|
|
28
|
+
)
|
|
29
|
+
run_command(cmd, fg = TRUE)
|
|
30
|
+
|
|
31
|
+
hardy <- read.table(
|
|
32
|
+
paste0(output, '.hardy'),
|
|
33
|
+
header = TRUE,
|
|
34
|
+
row.names = NULL,
|
|
35
|
+
check.names = FALSE,
|
|
36
|
+
comment.char = ""
|
|
37
|
+
)
|
|
38
|
+
hardy.fail <- hardy[which(hardy$P < cutoff), 'ID', drop = FALSE]
|
|
39
|
+
write.table(
|
|
40
|
+
hardy.fail,
|
|
41
|
+
paste0(output, '.hardy.fail'),
|
|
42
|
+
col.names = FALSE,
|
|
43
|
+
row.names = FALSE,
|
|
44
|
+
sep = "\t",
|
|
45
|
+
quote = FALSE
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if (doplot) {
|
|
49
|
+
hardy$Pval <- -log10(hardy$P)
|
|
50
|
+
hardy$Status <- "Pass"
|
|
51
|
+
hardy[which(hardy$SNP %in% hardy.fail$SNP), "Status"] <- "Fail"
|
|
52
|
+
hardy$Status <- factor(hardy$Status, levels = c("Fail", "Pass"))
|
|
53
|
+
|
|
54
|
+
p <- Histogram(
|
|
55
|
+
hardy,
|
|
56
|
+
x = "Pval",
|
|
57
|
+
group_by = "Status",
|
|
58
|
+
alpha = 0.8,
|
|
59
|
+
bins = 50,
|
|
60
|
+
xlab = "-log10(HWE p-value)",
|
|
61
|
+
ylab = "Count",
|
|
62
|
+
palette = "Set1"
|
|
63
|
+
)
|
|
64
|
+
res <- 70
|
|
65
|
+
height <- attr(p, "height") * res
|
|
66
|
+
width <- attr(p, "width") * res
|
|
67
|
+
png(
|
|
68
|
+
filename = paste0(output, '.hardy.png'),
|
|
69
|
+
width = width,
|
|
70
|
+
height = height,
|
|
71
|
+
res = res
|
|
72
|
+
)
|
|
73
|
+
print(p)
|
|
74
|
+
dev.off()
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
cmd <- c(
|
|
78
|
+
plink,
|
|
79
|
+
"--threads", ncores,
|
|
80
|
+
"--bfile", input,
|
|
81
|
+
"--exclude", paste0(output, '.hardy.fail'),
|
|
82
|
+
"--make-bed",
|
|
83
|
+
"--out", output
|
|
84
|
+
)
|
|
85
|
+
run_command(cmd, fg = TRUE)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
library(plotthis)
|
|
2
|
+
library(biopipen.utils)
|
|
3
|
+
|
|
4
|
+
indir <- {{in.indir | r}}
|
|
5
|
+
outdir <- {{out.outdir | r}}
|
|
6
|
+
plink <- {{envs.plink | r}}
|
|
7
|
+
ncores <- {{envs.ncores | r}}
|
|
8
|
+
cutoff <- {{envs.cutoff | r}}
|
|
9
|
+
doplot <- {{envs.plot | r}}
|
|
10
|
+
devpars <- {{envs.devpars | r}}
|
|
11
|
+
|
|
12
|
+
log <- get_logger()
|
|
13
|
+
|
|
14
|
+
bedfile = Sys.glob(file.path(indir, '*.bed'))
|
|
15
|
+
if (length(bedfile) == 0)
|
|
16
|
+
stop("No bed files found in the input directory.")
|
|
17
|
+
if (length(bedfile) > 1) {
|
|
18
|
+
log$warn("Multiple bed files found in the input directory. Using the first one.")
|
|
19
|
+
bedfile <- bedfile[1]
|
|
20
|
+
}
|
|
21
|
+
input <- tools::file_path_sans_ext(bedfile)
|
|
22
|
+
output <- file.path(outdir, basename(input))
|
|
23
|
+
|
|
24
|
+
# need .afreq for --het for plink2
|
|
25
|
+
freq_cmd <- cmd <- c(
|
|
26
|
+
plink,
|
|
27
|
+
"--threads", ncores,
|
|
28
|
+
"--bfile", input,
|
|
29
|
+
"--freq",
|
|
30
|
+
"--out", output
|
|
31
|
+
)
|
|
32
|
+
run_command(freq_cmd, fg = TRUE)
|
|
33
|
+
|
|
34
|
+
cmd <- c(
|
|
35
|
+
plink,
|
|
36
|
+
"--threads", ncores,
|
|
37
|
+
"--bfile", input,
|
|
38
|
+
"--het",
|
|
39
|
+
"--out", output,
|
|
40
|
+
"--read-freq", paste0(output, '.afreq')
|
|
41
|
+
)
|
|
42
|
+
run_command(cmd, fg = TRUE)
|
|
43
|
+
|
|
44
|
+
phet <- read.table(
|
|
45
|
+
paste0(output, '.het'),
|
|
46
|
+
header = TRUE,
|
|
47
|
+
row.names = NULL,
|
|
48
|
+
check.names = FALSE,
|
|
49
|
+
comment.char = ""
|
|
50
|
+
)
|
|
51
|
+
het <- data.frame(Het = 1 - phet[, "O(HOM)"]/phet[, "OBS_CT"])
|
|
52
|
+
rownames(het) <- paste(phet$FID, phet$IID, sep = "\t")
|
|
53
|
+
het.mean <- mean(het$Het, na.rm = TRUE)
|
|
54
|
+
het.sd <- sd(het$Het, na.rm = TRUE)
|
|
55
|
+
het.fail <- rownames(het[
|
|
56
|
+
!is.na(het$Het) & (het$Het < het.mean-cutoff*het.sd | het$Het > het.mean+cutoff*het.sd), , drop = FALSE
|
|
57
|
+
])
|
|
58
|
+
writeLines(het.fail, con = file(paste0(output, '.het.fail')))
|
|
59
|
+
|
|
60
|
+
if (doplot) {
|
|
61
|
+
het$Status <- "Pass"
|
|
62
|
+
het[het.fail, "Status"] <- "Fail"
|
|
63
|
+
het$Status <- factor(het$Status, levels = c("Fail", "Pass"))
|
|
64
|
+
|
|
65
|
+
p <- Histogram(
|
|
66
|
+
het,
|
|
67
|
+
x = "Het",
|
|
68
|
+
group_by = "Status",
|
|
69
|
+
alpha = 0.8,
|
|
70
|
+
bins = 50,
|
|
71
|
+
xlab = "Sample Heterozygosity",
|
|
72
|
+
ylab = "Count",
|
|
73
|
+
palette = "Set1"
|
|
74
|
+
)
|
|
75
|
+
res <- 70
|
|
76
|
+
height <- attr(p, "height") * res
|
|
77
|
+
width <- attr(p, "width") * res
|
|
78
|
+
png(
|
|
79
|
+
filename = paste0(output, '.het.png'),
|
|
80
|
+
width = width,
|
|
81
|
+
height = height,
|
|
82
|
+
res = res
|
|
83
|
+
)
|
|
84
|
+
print(p)
|
|
85
|
+
dev.off()
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
cmd <- c(
|
|
89
|
+
plink,
|
|
90
|
+
"--threads", ncores,
|
|
91
|
+
"--bfile", input,
|
|
92
|
+
"--remove", paste0(output, '.het.fail'),
|
|
93
|
+
"--make-bed",
|
|
94
|
+
"--out", output
|
|
95
|
+
)
|
|
96
|
+
run_command(cmd, fg = TRUE)
|