biopipen 0.32.3__py3-none-any.whl → 0.33.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +6 -0
- biopipen/core/filters.py +35 -23
- biopipen/core/testing.py +6 -1
- biopipen/ns/bam.py +39 -0
- biopipen/ns/cellranger.py +5 -0
- biopipen/ns/cellranger_pipeline.py +2 -2
- biopipen/ns/cnvkit_pipeline.py +4 -1
- biopipen/ns/delim.py +33 -27
- biopipen/ns/protein.py +99 -0
- biopipen/ns/scrna.py +428 -250
- biopipen/ns/snp.py +16 -3
- biopipen/ns/tcr.py +125 -1
- biopipen/ns/vcf.py +34 -0
- biopipen/ns/web.py +5 -1
- biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
- biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
- biopipen/reports/tcr/ClonalStats.svelte +15 -0
- biopipen/reports/utils/misc.liq +20 -7
- biopipen/scripts/bam/BamMerge.py +2 -2
- biopipen/scripts/bam/BamSampling.py +4 -4
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +3 -3
- biopipen/scripts/bam/CNVpytor.py +10 -10
- biopipen/scripts/bam/ControlFREEC.py +11 -11
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
- biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +20 -9
- biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
- biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
- biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/SampleInfo.R +94 -148
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +4 -4
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifScan.py +8 -8
- biopipen/scripts/scrna/CellCellCommunication.py +59 -22
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MarkersFinder.R +273 -654
- biopipen/scripts/scrna/RadarPlots.R +73 -53
- biopipen/scripts/scrna/SCP-plot.R +15202 -0
- biopipen/scripts/scrna/ScVelo.py +0 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
- biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
- biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
- biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
- biopipen/scripts/scrna/SeuratPreparing.R +138 -81
- biopipen/scripts/scrna/SlingShot.R +71 -0
- biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
- biopipen/scripts/snp/Plink2GTMat.py +26 -11
- biopipen/scripts/snp/PlinkFilter.py +7 -7
- biopipen/scripts/snp/PlinkFromVcf.py +8 -5
- biopipen/scripts/snp/PlinkSimulation.py +4 -4
- biopipen/scripts/snp/PlinkUpdateName.py +4 -4
- biopipen/scripts/stats/ChowTest.R +48 -22
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/ClonalStats.R +484 -0
- biopipen/scripts/tcr/ScRepLoading.R +127 -0
- biopipen/scripts/tcr/TCRDock.py +10 -6
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
- biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +4 -4
- biopipen/scripts/vcf/BcftoolsView.py +5 -5
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +12 -3
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +3 -3
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
- biopipen/scripts/web/gcloud_common.py +1 -1
- biopipen/utils/gsea.R +75 -35
- biopipen/utils/misc.R +205 -7
- biopipen/utils/misc.py +17 -8
- biopipen/utils/reference.py +11 -11
- biopipen/utils/repr.R +146 -0
- biopipen/utils/vcf.py +1 -1
- {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/METADATA +8 -8
- {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/RECORD +115 -105
- {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/WHEEL +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
- biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
- {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/entry_points.txt +0 -0
biopipen/ns/snp.py
CHANGED
|
@@ -183,7 +183,7 @@ class PlinkFromVcf(Proc):
|
|
|
183
183
|
vcf_idspace_to: convert all spaces in sample IDs to this character.
|
|
184
184
|
set_missing_var_ids: update variant IDs using a template string,
|
|
185
185
|
with a '@' where the chromosome code should go, and a '#' where the
|
|
186
|
-
base-pair position belongs. You can also specify
|
|
186
|
+
base-pair position belongs. You can also specify `\\$r` and `\\$a` for
|
|
187
187
|
the reference and alternate alleles, respectively.
|
|
188
188
|
See <https://www.cog-genomics.org/plink/2.0/data#set_all_var_ids>
|
|
189
189
|
max_alleles (type=int): Maximum number of alleles per variant.
|
|
@@ -191,7 +191,7 @@ class PlinkFromVcf(Proc):
|
|
|
191
191
|
Note that `_` will be replaced by `-` in the argument names.
|
|
192
192
|
""" # noqa: E501
|
|
193
193
|
input = "invcf:file"
|
|
194
|
-
output = "outdir:dir:{{in.invcf | regex_replace: '\\.gz$', ''
|
|
194
|
+
output = "outdir:dir:{{in.invcf.stem | regex_replace: '\\.gz$', ''}}"
|
|
195
195
|
lang = config.lang.python
|
|
196
196
|
envs = {
|
|
197
197
|
"plink": config.exe.plink2,
|
|
@@ -217,7 +217,14 @@ class Plink2GTMat(Proc):
|
|
|
217
217
|
|
|
218
218
|
The allelic dosage is used as the values of genotype matrix.
|
|
219
219
|
"--keep-allele-order" is used to keep the allele order consistent with the
|
|
220
|
-
reference allele first.
|
|
220
|
+
reference allele first. This way, the genotype of homozygous reference alleles
|
|
221
|
+
will be encoded as 2, heterozygous as 1, and homozygous alternate alleles as 0.
|
|
222
|
+
This is the PLINK dosage encoding. If you want to use this encoding, you can
|
|
223
|
+
set `envs.gtcoding` to `plink`. Otherwise, the default encoding is `vcf`, which
|
|
224
|
+
will encode the genotype as 0, 1, and 2 for homozygous reference, heterozygous,
|
|
225
|
+
and homozygous alternate alleles, respectively.
|
|
226
|
+
|
|
227
|
+
Note that `envs.gtcoding = "vcf"` only works for biallelic variants for now.
|
|
221
228
|
|
|
222
229
|
Input:
|
|
223
230
|
indir: Input directory containing the PLINK files.
|
|
@@ -241,6 +248,11 @@ class Plink2GTMat(Proc):
|
|
|
241
248
|
respectively.
|
|
242
249
|
trans_chr: A dictionary to translate chromosome numbers to chromosome names.
|
|
243
250
|
missing_id: what to use as the rs if missing.
|
|
251
|
+
gtcoding (choice): The genotype coding to use.
|
|
252
|
+
- vcf: 0/1/2 for homozygous reference, heterozygous, and homozygous
|
|
253
|
+
alternate alleles, respectively.
|
|
254
|
+
- plink: 2/1/0 for homozygous reference, heterozygous, and homozygous
|
|
255
|
+
alternate alleles, respectively.
|
|
244
256
|
"""
|
|
245
257
|
input = "indir:dir"
|
|
246
258
|
output = "outfile:file:{{in.indir | stem}}-gtmat.txt"
|
|
@@ -253,6 +265,7 @@ class Plink2GTMat(Proc):
|
|
|
253
265
|
"varid": "{chr}_{pos}_{varid}_{ref}_{alt}",
|
|
254
266
|
"trans_chr": {"23": "X", "24": "Y", "25": "XY", "26": "M"},
|
|
255
267
|
"missing_id": "NA",
|
|
268
|
+
"gtcoding": "vcf",
|
|
256
269
|
}
|
|
257
270
|
script = "file://../scripts/snp/Plink2GTMat.py"
|
|
258
271
|
|
biopipen/ns/tcr.py
CHANGED
|
@@ -39,7 +39,8 @@ class ImmunarchLoading(Proc):
|
|
|
39
39
|
information.
|
|
40
40
|
|
|
41
41
|
Output:
|
|
42
|
-
rdsfile: The RDS file with the data and metadata
|
|
42
|
+
rdsfile: The RDS file with the data and metadata, which can be processed by
|
|
43
|
+
other `immunarch` functions.
|
|
43
44
|
metatxt: The meta data at cell level, which can be used to attach to the Seurat object
|
|
44
45
|
|
|
45
46
|
Envs:
|
|
@@ -1675,3 +1676,126 @@ class TCRDock(Proc):
|
|
|
1675
1676
|
"data_dir": None,
|
|
1676
1677
|
}
|
|
1677
1678
|
script = "file://../scripts/tcr/TCRDock.py"
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
class ScRepLoading(Proc):
|
|
1682
|
+
"""Load the single cell TCR/BCR data into a `scRepertoire` compatible object
|
|
1683
|
+
|
|
1684
|
+
This process loads the single cell TCR/BCR data into a `scRepertoire`
|
|
1685
|
+
compatible object. Later, `scRepertoire::combineExpression` can be used to
|
|
1686
|
+
combine the expression data with the TCR/BCR data.
|
|
1687
|
+
|
|
1688
|
+
For the data path specified at `TCRData` in the input file, we will first find
|
|
1689
|
+
`filtered_contig_annotations.csv` and `filtered_config_annotations.csv.gz` in the
|
|
1690
|
+
path. If neighter of them exists, we will find `all_contig_annotations.csv` and
|
|
1691
|
+
`all_contig_annotations.csv.gz` in the path and a warning will be raised
|
|
1692
|
+
(You can find it at `./.pipen/<pipeline-name>/ImmunarchLoading/<job.index>/job.stderr`).
|
|
1693
|
+
|
|
1694
|
+
If none of the files exists, an error will be raised.
|
|
1695
|
+
|
|
1696
|
+
Input:
|
|
1697
|
+
metafile: The meta data of the samples
|
|
1698
|
+
A tab-delimited file
|
|
1699
|
+
Two columns are required:
|
|
1700
|
+
* `Sample` to specify the sample names.
|
|
1701
|
+
* `TCRData` to assign the path of the data to the samples,
|
|
1702
|
+
and this column will be excluded as metadata.
|
|
1703
|
+
Immunarch is able to fetch the sample names from the names of
|
|
1704
|
+
the target files. However, 10x data yields result like
|
|
1705
|
+
`filtered_contig_annotations.csv`, which doesn't have any name
|
|
1706
|
+
information.
|
|
1707
|
+
|
|
1708
|
+
Output:
|
|
1709
|
+
outfile: The `scRepertoire` compatible object in RDS format
|
|
1710
|
+
|
|
1711
|
+
Envs:
|
|
1712
|
+
combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR` function.
|
|
1713
|
+
See also <https://www.borch.dev/uploads/screpertoire/reference/combinetcr>
|
|
1714
|
+
exclude (auto): The columns to exclude from the metadata to add to the object.
|
|
1715
|
+
A list of column names to exclude or a string with column names separated by `,`.
|
|
1716
|
+
By default, `TCRData` and `RNAData` will be excluded.
|
|
1717
|
+
|
|
1718
|
+
""" # noqa: E501
|
|
1719
|
+
input = "metafile:file"
|
|
1720
|
+
output = "outfile:file:{{in.metafile | stem}}.scRep.RDS"
|
|
1721
|
+
lang = config.lang.rscript
|
|
1722
|
+
envs = {"combineTCR": {"samples": True}, "exclude": ["TCRData", "RNAData"]}
|
|
1723
|
+
script = "file://../scripts/tcr/ScRepLoading.R"
|
|
1724
|
+
|
|
1725
|
+
|
|
1726
|
+
class ClonalStats(Proc):
|
|
1727
|
+
"""Visualize the clonal information.
|
|
1728
|
+
|
|
1729
|
+
Using [`scplotter`](https://github.com/pwwang/scplotter) to visualize the clonal
|
|
1730
|
+
information.
|
|
1731
|
+
|
|
1732
|
+
Input:
|
|
1733
|
+
screpfile: The `scRepertoire` object in RDS format
|
|
1734
|
+
|
|
1735
|
+
Output:
|
|
1736
|
+
outdir: The output directory containing the plots
|
|
1737
|
+
|
|
1738
|
+
Envs:
|
|
1739
|
+
mutaters (type=json;order=-9): The mutaters passed to `dplyr::mutate()` to add new variables.
|
|
1740
|
+
When the object loaded form `in.screpfile` is a list, the mutaters will be applied to each element.
|
|
1741
|
+
The keys are the names of the new variables, and the values are the expressions.
|
|
1742
|
+
When it is a `Seurat` object, typically an output of `scRepertoire::combineExpression()`,
|
|
1743
|
+
the mutaters will be applied to the `meta.data`.
|
|
1744
|
+
viz_type (choice): The type of visualization to generate.
|
|
1745
|
+
- volume: The volume of the clones using [`ClonalVolumePlot`](https://pwwang.github.io/scplotter/reference/ClonalVolumePlot.html)
|
|
1746
|
+
- abundance: The abundance of the clones using [`ClonalAbundancePlot`](https://pwwang.github.io/scplotter/reference/ClonalAbundancePlot.html)
|
|
1747
|
+
- length: The length of the CDR3 sequences using [`ClonalLengthPlot`](https://pwwang.github.io/scplotter/reference/ClonalLengthPlot.html)
|
|
1748
|
+
- residency: The residency of the clones using [`ClonalResidencyPlot`](https://pwwang.github.io/scplotter/reference/ClonalResidencyPlot.html)
|
|
1749
|
+
- dynamics: The dynamics of the clones using [`ClonalDynamicsPlot`](https://pwwang.github.io/scplotter/reference/ClonalDynamicsPlot.html)
|
|
1750
|
+
- composition: The composition of the clones using [`ClonalCompositionPlot`](https://pwwang.github.io/scplotter/reference/ClonalCompositionPlot.html)
|
|
1751
|
+
- overlap: The overlap of the clones using [`ClonalOverlapPlot`](https://pwwang.github.io/scplotter/reference/ClonalOverlapPlot.html)
|
|
1752
|
+
- diversity: The diversity of the clones using [`ClonalDiversityPlot`](https://pwwang.github.io/scplotter/reference/ClonalDiversityPlot.html)
|
|
1753
|
+
- geneusage: The gene usage of the clones using [`ClonalGeneUsagePlot`](https://pwwang.github.io/scplotter/reference/ClonalGeneUsagePlot.html)
|
|
1754
|
+
- positional: The positional information of the clones using [`ClonalPositionalPlot`](https://pwwang.github.io/scplotter/reference/ClonalPositionalPlot.html)
|
|
1755
|
+
- kmer: The kmer information of the clones using [`ClonalKmerPlot`](https://pwwang.github.io/scplotter/reference/ClonalKmerPlot.html)
|
|
1756
|
+
- rarefaction: The rarefaction curve of the clones using [`ClonalRarefactionPlot`](https://pwwang.github.io/scplotter/reference/ClonalRarefactionPlot.html)
|
|
1757
|
+
subset: An expression to subset the data before plotting.
|
|
1758
|
+
Similar to `mutaters`, it will be applied to each element by `dplyr::filter()` if the object
|
|
1759
|
+
loaded form `in.screpfile` is a list; otherwise, it will be applied to
|
|
1760
|
+
`subset(sobj, subset = <expr>)` if the object is a `Seurat` object.
|
|
1761
|
+
devpars (ns): The parameters for the plotting device.
|
|
1762
|
+
- width (type=int): The width of the device
|
|
1763
|
+
- height (type=int): The height of the device
|
|
1764
|
+
- res (type=int): The resolution of the device
|
|
1765
|
+
more_formats (list): The extra formats to save the plots in, other than PNG.
|
|
1766
|
+
save_code (flag): Whether to save the code used to generate the plots
|
|
1767
|
+
Note that the data directly used to generate the plots will also be saved in an `rda` file.
|
|
1768
|
+
Be careful if the data is large as it may take a lot of disk space.
|
|
1769
|
+
descr: The description of the plot, used to show in the report.
|
|
1770
|
+
<more>: The arguments for the plot function
|
|
1771
|
+
See the documentation of the corresponding plot function for the details
|
|
1772
|
+
cases (type=json): The cases to generate the plots if we have multiple cases.
|
|
1773
|
+
The keys are the names of the cases, and the values are the arguments for the plot function.
|
|
1774
|
+
The arguments in `envs` will be used if not specified in `cases`, except for `mutaters`.
|
|
1775
|
+
Sections can be specified as the prefix of the case name, separated by `::`.
|
|
1776
|
+
For example, if you have a case named `Clonal Volume::Case1`, the plot will be put in the
|
|
1777
|
+
section `Clonal Volume`. By default, when there are multiple cases for the same 'viz_type', the name of the 'viz_type' will be used
|
|
1778
|
+
as the default section name (for example, when 'viz_type' is 'volume', the section name will be 'Clonal Volume').
|
|
1779
|
+
When there is only a single case, the section name will default to 'DEFAULT', which will not be shown
|
|
1780
|
+
in the report.
|
|
1781
|
+
""" # noqa: E501
|
|
1782
|
+
input = "screpfile:file"
|
|
1783
|
+
output = "outdir:dir:{{in.screpfile | stem}}.clonalstats"
|
|
1784
|
+
lang = config.lang.rscript
|
|
1785
|
+
envs = {
|
|
1786
|
+
"mutaters": {},
|
|
1787
|
+
"subset": None,
|
|
1788
|
+
"viz_type": None,
|
|
1789
|
+
"devpars": {"width": None, "height": None, "res": 100},
|
|
1790
|
+
"more_formats": [],
|
|
1791
|
+
"save_code": False,
|
|
1792
|
+
"descr": None,
|
|
1793
|
+
"cases": {
|
|
1794
|
+
"Clonal Volume": {"viz_type": "volume"},
|
|
1795
|
+
"Clonal Abundance": {"viz_type": "abundance"},
|
|
1796
|
+
"CDR3 Length": {"viz_type": "length"},
|
|
1797
|
+
"Clonal Diversity": {"viz_type": "diversity"},
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
script = "file://../scripts/tcr/ClonalStats.R"
|
|
1801
|
+
plugin_opts = {"report": "file://../reports/tcr/ClonalStats.svelte"}
|
biopipen/ns/vcf.py
CHANGED
|
@@ -595,6 +595,40 @@ class BcftoolsSort(Proc):
|
|
|
595
595
|
script = "file://../scripts/vcf/BcftoolsSort.py"
|
|
596
596
|
|
|
597
597
|
|
|
598
|
+
class BcftoolsMerge(Proc):
|
|
599
|
+
"""Merge multiple VCF files using `bcftools merge`.
|
|
600
|
+
|
|
601
|
+
Input:
|
|
602
|
+
infiles: The input VCF files
|
|
603
|
+
|
|
604
|
+
Output:
|
|
605
|
+
outfile: The merged VCF file.
|
|
606
|
+
|
|
607
|
+
Envs:
|
|
608
|
+
bcftools: Path to bcftools
|
|
609
|
+
tabix: Path to tabix, used to index infile/outfile
|
|
610
|
+
ncores (type=int): Number of cores (`--threads`) to use
|
|
611
|
+
gz (flag): Whether to gzip the output file
|
|
612
|
+
index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
|
|
613
|
+
<more>: Other arguments for `bcftools merge`.
|
|
614
|
+
See also <https://samtools.github.io/bcftools/bcftools.html#merge>
|
|
615
|
+
"""
|
|
616
|
+
input = "infiles:files"
|
|
617
|
+
output = (
|
|
618
|
+
"outfile:file:{{in.infiles | first | stem | append: '_etc_merged'}}.vcf"
|
|
619
|
+
"{{'.gz' if envs.index or envs.gz else ''}}"
|
|
620
|
+
)
|
|
621
|
+
lang = config.lang.python
|
|
622
|
+
envs = {
|
|
623
|
+
"bcftools": config.exe.bcftools,
|
|
624
|
+
"tabix": config.exe.tabix,
|
|
625
|
+
"ncores": config.misc.ncores,
|
|
626
|
+
"gz": True,
|
|
627
|
+
"index": True,
|
|
628
|
+
}
|
|
629
|
+
script = "file://../scripts/vcf/BcftoolsMerge.py"
|
|
630
|
+
|
|
631
|
+
|
|
598
632
|
class BcftoolsView(Proc):
|
|
599
633
|
"""View, subset and filter VCF files by position and filtering expression.
|
|
600
634
|
|
biopipen/ns/web.py
CHANGED
|
@@ -32,7 +32,11 @@ class Download(Proc):
|
|
|
32
32
|
input = "url"
|
|
33
33
|
output = (
|
|
34
34
|
"outfile:file:"
|
|
35
|
-
"{{in.url
|
|
35
|
+
"""{{in.url
|
|
36
|
+
| basename
|
|
37
|
+
| url_decode
|
|
38
|
+
| slugify: separator='.', lowercase=False, regex_pattern='[^-a-zA-Z0-9_]+'
|
|
39
|
+
}}"""
|
|
36
40
|
)
|
|
37
41
|
lang = config.lang.python
|
|
38
42
|
envs = {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
2
|
{% from_ os import path %}
|
|
3
3
|
<script>
|
|
4
|
-
import { DataTable, Image, Descr } from "$libs";
|
|
4
|
+
import { DataTable, Image, Descr, Plotly } from "$libs";
|
|
5
5
|
import { Tabs, Tab, TabContent } from "$ccs";
|
|
6
6
|
</script>
|
|
7
7
|
|
|
@@ -6,8 +6,21 @@
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
7
|
|
|
8
8
|
<h{{h}}>UMAPs</h{{h}}>
|
|
9
|
-
{% set imgs =
|
|
10
|
-
{
|
|
9
|
+
{% set imgs = [] %}
|
|
10
|
+
{% set caps = [] %}
|
|
11
|
+
{% for png in job.outdir | glob: "UMAPs-*.png" %}
|
|
12
|
+
{% set pdf = png | regex_replace: "\\.png$", ".pdf" %}
|
|
13
|
+
{% set stm = png | stem %}
|
|
14
|
+
{% set _ = imgs.append({"src": png, "download": pdf}) %}
|
|
15
|
+
{% set _ = caps.append(stm | replace: "UMAPs-", "") %}
|
|
16
|
+
{% endfor %}
|
|
17
|
+
{{ table_of_images(imgs, caps) }}
|
|
18
|
+
|
|
19
|
+
<h{{h}}>Mapping Score</h{{h}}>
|
|
20
|
+
<Image
|
|
21
|
+
src="{{job.outdir | joinpath: 'mapping_score.png'}}"
|
|
22
|
+
download="{{job.outdir | joinpath: 'mapping_score.pdf'}}"
|
|
23
|
+
/>
|
|
11
24
|
|
|
12
25
|
<h{{h}}>Stats</h{{h}}>
|
|
13
26
|
{% for stfile in job.outdir | glob: "stats-*.txt" %}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
|
|
3
|
+
<script>
|
|
4
|
+
import { Image, DataTable, Descr } from "$libs";
|
|
5
|
+
</script>
|
|
6
|
+
|
|
7
|
+
{%- macro report_job(job, h=1) -%}
|
|
8
|
+
{{ job | render_job: h=h }}
|
|
9
|
+
{%- endmacro -%}
|
|
10
|
+
|
|
11
|
+
{%- macro head_job(job) -%}
|
|
12
|
+
<h1>{{job.in.screpfile | stem | escape }}</h1>
|
|
13
|
+
{%- endmacro -%}
|
|
14
|
+
|
|
15
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
biopipen/reports/utils/misc.liq
CHANGED
|
@@ -25,7 +25,14 @@ import { Image } from "$libs";
|
|
|
25
25
|
{% for batch_srcs in srcs | batch: col, "" %}
|
|
26
26
|
{% set outer_loop = loop %}
|
|
27
27
|
<tr>
|
|
28
|
-
{% for
|
|
28
|
+
{% for srcinfo in batch_srcs %}
|
|
29
|
+
{% if srcinfo | isinstance: str %}
|
|
30
|
+
{% set src = srcinfo %}
|
|
31
|
+
{% set download = None %}
|
|
32
|
+
{% else %}
|
|
33
|
+
{% set src = srcinfo['src'] %}
|
|
34
|
+
{% set download = srcinfo.get('download', None) %}
|
|
35
|
+
{% endif %}
|
|
29
36
|
{% set i = col * outer_loop.index0 + loop.index0 %}
|
|
30
37
|
{% if i >= len(srcs) %}
|
|
31
38
|
<td style="width: {{table_width / col}}%"></td>
|
|
@@ -33,21 +40,27 @@ import { Image } from "$libs";
|
|
|
33
40
|
<td style="width: {{table_width / col}}%; vertical-align:top;">
|
|
34
41
|
{% if caps is none %}
|
|
35
42
|
<div
|
|
36
|
-
style="padding-left: 28px; font-weight: bold; padding-top:
|
|
43
|
+
style="padding-left: 28px; font-weight: bold; padding-top: 16px;">
|
|
37
44
|
{{ src | stem }}
|
|
38
45
|
</div>
|
|
39
46
|
{% elif caps is false %}
|
|
40
47
|
{% else %}
|
|
41
48
|
<div
|
|
42
|
-
style="padding-left: 28px; font-weight: bold; padding-top:
|
|
49
|
+
style="padding-left: 28px; font-weight: bold; padding-top: 16px;">
|
|
43
50
|
{{ caps[i] }}
|
|
44
51
|
</div>
|
|
45
52
|
{% endif %}
|
|
46
|
-
{% if
|
|
47
|
-
<Image
|
|
48
|
-
|
|
53
|
+
{% if download %}
|
|
54
|
+
<Image
|
|
55
|
+
style="max-width: 90%"
|
|
56
|
+
src={{src | quote}}
|
|
57
|
+
download={ {{download | json}} }
|
|
58
|
+
/>
|
|
49
59
|
{% else %}
|
|
50
|
-
<Image
|
|
60
|
+
<Image
|
|
61
|
+
style="max-width: 90%"
|
|
62
|
+
src={{src | quote}}
|
|
63
|
+
/>
|
|
51
64
|
{% endif %}
|
|
52
65
|
</td>
|
|
53
66
|
{% endif %}
|
biopipen/scripts/bam/BamMerge.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from biopipen.utils.misc import run_command, logger
|
|
3
3
|
|
|
4
|
-
bamfiles = {{in.bamfiles |
|
|
5
|
-
outfile = Path({{out.outfile |
|
|
4
|
+
bamfiles = {{in.bamfiles | default: [] | each: str}} # pyright: ignore # noqa
|
|
5
|
+
outfile = Path({{out.outfile | quote}}) # pyright: ignore
|
|
6
6
|
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
7
7
|
tool = {{envs.tool | quote}} # pyright: ignore
|
|
8
8
|
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
@@ -4,12 +4,12 @@ from biopipen.utils.misc import run_command, logger
|
|
|
4
4
|
# using:
|
|
5
5
|
# samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
|
|
6
6
|
|
|
7
|
-
bamfile = {{ in.bamfile |
|
|
8
|
-
outfile = Path({{ out.outfile |
|
|
7
|
+
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
8
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
9
9
|
ncores = {{ envs.ncores | int }} # pyright: ignore
|
|
10
10
|
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
11
11
|
tool = {{ envs.tool | repr }} # pyright: ignore
|
|
12
|
-
fraction = {{ envs.fraction | repr }} # pyright: ignore
|
|
12
|
+
fraction: float = {{ envs.fraction | repr }} # pyright: ignore
|
|
13
13
|
seed = {{ envs.seed | int }} # pyright: ignore
|
|
14
14
|
should_index = {{ envs.index | repr }} # pyright: ignore
|
|
15
15
|
should_sort = {{ envs.sort | repr }} # pyright: ignore
|
|
@@ -38,7 +38,7 @@ if fraction > 1:
|
|
|
38
38
|
"-c",
|
|
39
39
|
bamfile
|
|
40
40
|
]
|
|
41
|
-
nreads = run_command(cmd, stdout="return").strip()
|
|
41
|
+
nreads = run_command(cmd, stdout="return").strip() # type: ignore
|
|
42
42
|
fraction = fraction / float(int(nreads))
|
|
43
43
|
|
|
44
44
|
ofile = (
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from hashlib import md5
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
|
+
|
|
5
|
+
infile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
6
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
7
|
+
args: dict = {{ envs | dict | repr }} # pyright: ignore
|
|
8
|
+
ncores = args.pop("ncores")
|
|
9
|
+
tool = args.pop("tool")
|
|
10
|
+
samtools = args.pop("samtools")
|
|
11
|
+
sambamba = args.pop("sambamba")
|
|
12
|
+
tmpdir = args.pop("tmpdir")
|
|
13
|
+
byname = args.pop("byname")
|
|
14
|
+
should_index = args.pop("index")
|
|
15
|
+
sig = md5(infile.encode()).hexdigest()
|
|
16
|
+
tmpdir = Path(tmpdir).joinpath(
|
|
17
|
+
f"biopipen_BamSort_{{job.index}}_{sig}_{Path(infile).name}"
|
|
18
|
+
)
|
|
19
|
+
tmpdir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
tmpdir = str(tmpdir)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def use_samtools():
|
|
24
|
+
"""Use samtools to sort/index bam file.
|
|
25
|
+
|
|
26
|
+
Usage: samtools sort [options...] [in.bam]
|
|
27
|
+
Options:
|
|
28
|
+
-l INT Set compression level, from 0 (uncompressed) to 9 (best)
|
|
29
|
+
-u Output uncompressed data (equivalent to -l 0)
|
|
30
|
+
-m INT Set maximum memory per thread; suffix K/M/G recognized [768M]
|
|
31
|
+
-M Use minimiser for clustering unaligned/unplaced reads
|
|
32
|
+
-K INT Kmer size to use for minimiser [20]
|
|
33
|
+
-n Sort by read name (not compatible with samtools index command)
|
|
34
|
+
-t TAG Sort by value of TAG. Uses position as secondary index (or read name if -n is set)
|
|
35
|
+
-o FILE Write final output to FILE rather than standard output
|
|
36
|
+
-T PREFIX Write temporary files to PREFIX.nnnn.bam
|
|
37
|
+
--no-PG
|
|
38
|
+
Do not add a PG line
|
|
39
|
+
--template-coordinate
|
|
40
|
+
Sort by template-coordinate
|
|
41
|
+
--input-fmt-option OPT[=VAL]
|
|
42
|
+
Specify a single input file format option in the form
|
|
43
|
+
of OPTION or OPTION=VALUE
|
|
44
|
+
-O, --output-fmt FORMAT[,OPT[=VAL]]...
|
|
45
|
+
Specify output format (SAM, BAM, CRAM)
|
|
46
|
+
--output-fmt-option OPT[=VAL]
|
|
47
|
+
Specify a single output file format option in the form
|
|
48
|
+
of OPTION or OPTION=VALUE
|
|
49
|
+
--reference FILE
|
|
50
|
+
Reference sequence FASTA FILE [null]
|
|
51
|
+
-@, --threads INT
|
|
52
|
+
Number of additional threads to use [0]
|
|
53
|
+
--write-index
|
|
54
|
+
Automatically index the output files [off]
|
|
55
|
+
--verbosity INT
|
|
56
|
+
Set level of verbosity
|
|
57
|
+
""" # noqa
|
|
58
|
+
sargs = args.copy()
|
|
59
|
+
sargs["n"] = byname
|
|
60
|
+
sargs["T"] = f"{tmpdir}/tmp"
|
|
61
|
+
sargs["threads"] = ncores
|
|
62
|
+
|
|
63
|
+
if should_index:
|
|
64
|
+
sargs["write-index"] = True
|
|
65
|
+
# https://github.com/samtools/samtools/issues/1196
|
|
66
|
+
sargs["o"] = f"{outfile}##idx##{outfile}.bai"
|
|
67
|
+
else:
|
|
68
|
+
sargs["o"] = outfile
|
|
69
|
+
|
|
70
|
+
n_outfmt = sum(["O" in sargs, "output-fmt" in sargs])
|
|
71
|
+
if n_outfmt > 1:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"envs.args cannot contain both 'O' and 'output-fmt'"
|
|
74
|
+
)
|
|
75
|
+
if n_outfmt == 0:
|
|
76
|
+
sargs["O"] = "BAM"
|
|
77
|
+
|
|
78
|
+
cmd = [
|
|
79
|
+
samtools,
|
|
80
|
+
"sort",
|
|
81
|
+
*dict_to_cli_args(sargs),
|
|
82
|
+
infile,
|
|
83
|
+
]
|
|
84
|
+
run_command(cmd)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def use_sambamba():
|
|
88
|
+
"""Use sambamba to sort/index bam file.
|
|
89
|
+
|
|
90
|
+
sambamba 0.8.2
|
|
91
|
+
by Artem Tarasov and Pjotr Prins (C) 2012-2021
|
|
92
|
+
LDC 1.28.1 / DMD v2.098.1 / LLVM12.0.0 / bootstrap LDC - the LLVM D compiler (1.28.1)
|
|
93
|
+
|
|
94
|
+
Usage: sambamba-sort [options] <input.bam>
|
|
95
|
+
|
|
96
|
+
Options: -m, --memory-limit=LIMIT
|
|
97
|
+
approximate total memory limit for all threads (by default 2GB)
|
|
98
|
+
--tmpdir=TMPDIR
|
|
99
|
+
directory for storing intermediate files; default is system directory for temporary files
|
|
100
|
+
-o, --out=OUTPUTFILE
|
|
101
|
+
output file name; if not provided, the result is written to a file with .sorted.bam extension
|
|
102
|
+
-n, --sort-by-name
|
|
103
|
+
sort by read name instead of coordinate (lexicographical order)
|
|
104
|
+
--sort-picard
|
|
105
|
+
sort by query name like in picard
|
|
106
|
+
-N, --natural-sort
|
|
107
|
+
sort by read name instead of coordinate (so-called 'natural' sort as in samtools)
|
|
108
|
+
-M, --match-mates
|
|
109
|
+
pull mates of the same alignment together when sorting by read name
|
|
110
|
+
-l, --compression-level=COMPRESSION_LEVEL
|
|
111
|
+
level of compression for sorted BAM, from 0 to 9
|
|
112
|
+
-u, --uncompressed-chunks
|
|
113
|
+
write sorted chunks as uncompressed BAM (default is writing with compression level 1), that might be faster in some cases but uses more disk space
|
|
114
|
+
-p, --show-progress
|
|
115
|
+
show progressbar in STDERR
|
|
116
|
+
-t, --nthreads=NTHREADS
|
|
117
|
+
use specified number of threads
|
|
118
|
+
-F, --filter=FILTER
|
|
119
|
+
keep only reads that satisfy FILTER
|
|
120
|
+
""" # noqa
|
|
121
|
+
sargs = args.copy()
|
|
122
|
+
sargs["nthreads"] = ncores
|
|
123
|
+
sargs["n"] = byname
|
|
124
|
+
sargs["tmpdir"] = tmpdir
|
|
125
|
+
sargs["o"] = outfile
|
|
126
|
+
cmd = [
|
|
127
|
+
sambamba,
|
|
128
|
+
"sort",
|
|
129
|
+
*dict_to_cli_args(sargs, sep="="),
|
|
130
|
+
infile,
|
|
131
|
+
]
|
|
132
|
+
run_command(cmd)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
if tool == "samtools":
|
|
137
|
+
use_samtools()
|
|
138
|
+
elif tool == "sambamba":
|
|
139
|
+
use_sambamba()
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(f"Unknown tool: {tool}")
|
|
@@ -2,12 +2,12 @@ from pathlib import Path
|
|
|
2
2
|
from biopipen.utils.misc import run_command
|
|
3
3
|
from biopipen.utils.reference import bam_index
|
|
4
4
|
|
|
5
|
-
bamfile = {{in.bamfile | quote}} # pyright: ignore
|
|
6
|
-
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
7
|
-
tool = {{envs.tool | quote}} # pyright: ignore
|
|
8
|
-
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
9
|
-
sambamba = {{envs.sambamba | quote}} # pyright: ignore
|
|
10
|
-
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
5
|
+
bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
|
|
6
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
7
|
+
tool: str = {{envs.tool | quote}} # pyright: ignore
|
|
8
|
+
samtools: str = {{envs.samtools | quote}} # pyright: ignore
|
|
9
|
+
sambamba: str = {{envs.sambamba | quote}} # pyright: ignore
|
|
10
|
+
ncores: int = {{envs.ncores | repr}} # pyright: ignore
|
|
11
11
|
keep_other_sq = {{envs.keep_other_sq | repr}} # pyright: ignore
|
|
12
12
|
chroms_to_keep = {{envs.chroms | repr}} # pyright: ignore
|
|
13
13
|
should_index = {{envs.index | bool}} # pyright: ignore
|
|
@@ -17,13 +17,13 @@ def _remove_other_sq(infile, chrom, outfile):
|
|
|
17
17
|
exe = samtools if tool == "samtools" else sambamba
|
|
18
18
|
print("\nRemoving other chromosomes in @SQ in header")
|
|
19
19
|
header_cmd = [exe, "view", "-H", infile]
|
|
20
|
-
header_p = run_command(
|
|
20
|
+
header_p = run_command( # type: ignore
|
|
21
21
|
header_cmd,
|
|
22
22
|
stdout=True,
|
|
23
23
|
wait=False,
|
|
24
24
|
print_command=True,
|
|
25
25
|
)
|
|
26
|
-
header = header_p.stdout.read().decode().strip().splitlines()
|
|
26
|
+
header = header_p.stdout.read().decode().strip().splitlines() # type: ignore
|
|
27
27
|
new_header = []
|
|
28
28
|
for line in header:
|
|
29
29
|
if line.startswith("@SQ"):
|
|
@@ -63,7 +63,7 @@ def use_samtools():
|
|
|
63
63
|
"| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
|
|
64
64
|
)
|
|
65
65
|
p = run_command(cmd, stdout=True, wait=False)
|
|
66
|
-
chroms = p.stdout.read().decode().strip().splitlines()
|
|
66
|
+
chroms = p.stdout.read().decode().strip().splitlines() # type: ignore
|
|
67
67
|
else:
|
|
68
68
|
print("\nUsing provided chromosomes")
|
|
69
69
|
chroms = chroms_to_keep
|
|
@@ -121,7 +121,7 @@ def use_sambamba():
|
|
|
121
121
|
"| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
|
|
122
122
|
)
|
|
123
123
|
p = run_command(cmd, stdout=True, wait=False)
|
|
124
|
-
chroms = p.stdout.read().decode().splitlines()
|
|
124
|
+
chroms = p.stdout.read().decode().splitlines() # type: ignore
|
|
125
125
|
else:
|
|
126
126
|
print("\nUsing provided chromosomes")
|
|
127
127
|
chroms = chroms_to_keep
|
|
@@ -4,9 +4,9 @@ from biopipen.utils.misc import run_command, logger
|
|
|
4
4
|
# using:
|
|
5
5
|
# samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
|
|
6
6
|
|
|
7
|
-
bamfile = {{ in.bamfile |
|
|
8
|
-
bedfile = {{ in.bedfile |
|
|
9
|
-
outfile = Path({{ out.outfile |
|
|
7
|
+
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
8
|
+
bedfile = {{ in.bedfile | quote }} # pyright: ignore # noqa
|
|
9
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
10
10
|
ncores = {{ envs.ncores | int }} # pyright: ignore
|
|
11
11
|
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
12
12
|
tool = {{ envs.tool | repr }} # pyright: ignore
|
biopipen/scripts/bam/CNVpytor.py
CHANGED
|
@@ -6,17 +6,17 @@ from datetime import datetime
|
|
|
6
6
|
from biopipen.utils.reference import bam_index
|
|
7
7
|
from biopipen.utils.misc import run_command, dict_to_cli_args, logger
|
|
8
8
|
|
|
9
|
-
bamfile = {{in.bamfile | quote}} # pyright: ignore # noqa
|
|
10
|
-
snpfile = {{in.snpfile |
|
|
9
|
+
bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
|
|
10
|
+
snpfile: str = {{in.snpfile | quote}} # pyright: ignore
|
|
11
11
|
outdir = Path({{out.outdir | quote}}) # pyright: ignore
|
|
12
|
-
cnvpytor = {{envs.cnvpytor | quote}} # pyright: ignore
|
|
13
|
-
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
14
|
-
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
12
|
+
cnvpytor: str = {{envs.cnvpytor | quote}} # pyright: ignore
|
|
13
|
+
samtools: str = {{envs.samtools | quote}} # pyright: ignore
|
|
14
|
+
ncores: int = {{envs.ncores | int}} # pyright: ignore
|
|
15
15
|
refdir = {{envs.refdir | quote}} # pyright: ignore
|
|
16
16
|
genome = {{envs.genome | quote}} # pyright: ignore
|
|
17
|
-
chrsize = {{envs.chrsize | quote}} # pyright: ignore
|
|
18
|
-
filters = {{envs.filters | repr}} # pyright: ignore
|
|
19
|
-
args = {{envs |
|
|
17
|
+
chrsize: str = {{envs.chrsize | quote}} # pyright: ignore
|
|
18
|
+
filters: dict = {{envs.filters | repr}} # pyright: ignore
|
|
19
|
+
args: dict = {{envs | dict}} # pyright: ignore
|
|
20
20
|
|
|
21
21
|
del args['cnvpytor']
|
|
22
22
|
del args['ncores']
|
|
@@ -27,7 +27,7 @@ del args['chrsize']
|
|
|
27
27
|
del args['filters']
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
bamfile = bam_index(bamfile, outdir, samtools, ncores)
|
|
30
|
+
bamfile: Path = bam_index(bamfile, str(outdir), samtools, ncores=ncores)
|
|
31
31
|
|
|
32
32
|
NOSNP_COLS = [
|
|
33
33
|
"CNVtype",
|
|
@@ -293,7 +293,7 @@ def cnvpytor2vcf(infile, snp):
|
|
|
293
293
|
fout.write('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n')
|
|
294
294
|
fout.write('##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">\n')
|
|
295
295
|
fout.write('##FORMAT=<ID=PE,Number=1,Type=String,Description="Number of paired-ends that support the event">\n')
|
|
296
|
-
fout.write(f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{
|
|
296
|
+
fout.write(f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{bamfile.stem}\n")
|
|
297
297
|
prev_chrom, chrom_seq, count = "", "", 0
|
|
298
298
|
for line in fin:
|
|
299
299
|
# type, coor, length, rd, p1, p2, p3, p4, q0, pe = line.strip("\n").split()
|