PyPI - biopipen - Versions diffs - 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +307 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +14 -2
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte CHANGED Viewed

@@ -1,8 +1,7 @@
 {% from "utils/misc.liq" import report_jobs -%}
 <script>
-    import { Image, Descr } from "$libs";
-    import { ListItem, UnorderedList } from "$ccs";
+    import { Image, DataTable, Descr, Math } from "$libs";
+    import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
 </script>
 <h1>Introduction</h1>
@@ -12,7 +11,7 @@
 </Descr>
 <h2>Workflow of the original analysis</h2>
-<Image src="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape/raw/master/pipeline.png" />
+<Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
 <h2>Reference</h2>
 <UnorderedList>
@@ -29,61 +28,66 @@
 <h2>Analyses with this pipeline</h2>
 <Descr>
-The cells are grouped at 2 dimensions: `grouping`, usually the cell types, and `subsetting`, usually
-the groups that bring biological meaning (i.e. different timepoints or sample types (tumor/normal)).
+The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
+(i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
 </Descr>
 <UnorderedList>
 <ListItem>
     MetabolicPathwayActivity (this page)
-    <p>Investigating the metabolic pathways of the cells in different groups and subsets.</p>
-    <p>The cells are first grouped by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
+    <Tile>
+        <p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
+        <p>The cells are first subset by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
+        <p> </p>
+        <p>A pathway activity score defined as the relative gene expression value averaged over all genes in the pathway and all cells of the group.</p>
+        <p> </p>
+        <p>For the i-th metabolic gene, we first calculated its mean expression level across cells of the j-th cell group:
+        <Math displayMode>E_{i,j} = \frac{ {\mathop {\sum }\nolimits_{k = 1}^{n_j} g_{i,k}}}{ {n_j}},\,i \in 1 \ldots M,j \in 1 \ldots N</Math>
+        <p>
+            In which n<sub>j</sub> is the number of cells in the j-th cell group, g<sub>i,k</sub> is the expression level of the i-th gene in the k-th cell in this cell group,
+            M is the number of metabolic genes, and N is the number of cell groups. The relative expression level of the i-th gene in the j-th cell group was then
+            defined as the ratio of E<sub>i,j</sub> to its average over all cell groups:
+        </p>
+        <Math displayMode>r_{i,j} = \frac{ {E_{i,j}}}{ {\frac{1}{N}\mathop {\sum }\nolimits_j^N E_{i,j}}}</Math>
+        <p>
+            Here r<sub>i,j</sub> quantifies the relative expression level of gene i in cell group j comparing to the average expression level of this gene in all cell groups.
+            A r<sub>i,j</sub> value &gt;1 means that expression level of gene i is higher in cell group j compared to its average expression level over all cell groups.
+            The pathway activity score for the t-th pathway and the j-th cell group was then defined as the weighted average of r<sub>i,j</sub> over all genes included
+            in this pathway:
+        </p>
+        <Math displayMode>p_{t,j} = \frac{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i \times r_{i,j}}}{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i}}</Math>
+        <p>Where p<sub>t,j</sub> represents the activity of the t-th pathway in the j-th cell group, m<sub>t</sub> is the number of genes in the pathway t, w<sub>i</sub>
+        is the weighting factor equal to the reciprocal of number of pathways that include the i-th gene.
+        To avoid the possibility that pathway activity scores were affected by genes with low expression level or high drop-out rates,
+        we excluded the outliers in each pathway defined by genes with relative expression levels greater than three times 75th percentile or below 1/3 times 25th percentile.
+        Statistical significance of higher or lower pathway activity in a specific cell group was then evaluated by a random permutation test,
+        in which the cell group labels were randomly shuffled for 5000 (for the scRNA datasets) to simulate a null distribution of the pathway activity scores
+        and compare to the pathway activity scores in the original, non-shuffled dataset.
+        For the pathway activity score p<sub>t,j</sub>, we then calculated a p-value defined as the fraction of random pathway activity scores larger than pt,j
+        (if p<sub>t,j</sub> is &gt;1) or smaller than p<sub>t,j</sub> (if p<sub>t,j</sub> is &lt;1) to assess if activity of this pathway is significantly
+        higher or lower in this cell group than average.</p>
+    </Tile>
 </ListItem>
 <ListItem>
     <a href="../MetabolicPathwayHeterogeneity/index.html">MetabolicPathwayHeterogeneity</a>
-    <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
+    <Tile>
+        <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
+    </Tile>
 </ListItem>
 <ListItem>
     <a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
-    <p>Gene set enrichment analysis against the metabolic pathways for groups in different subsets.</p>
-</ListItem>
-<ListItem>
-    <a href="../MetabolicFeaturesIntraSubsets/index.html">MetabolicFeaturesIntraSubsets</a>
-    <p>Gene set enrichment analysis against the metabolic pathways for subsets in different groups.</p>
+    <Tile>
+        <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
+    </Tile>
 </ListItem>
 </UnorderedList>
-{%- macro report_job(job, h=2) -%}
-  {%- for ssdir in job.out.outdir | glob: "*" -%}
-  {%- if not isdir(ssdir) -%}
-    {%- continue -%}
-  {%- endif -%}
-  <h{{h}}>{{ ssdir | stem }}</h{{h}}>
-  <h{{ h+1 }}>Metabolic pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
-  <Image src="{{ssdir | joinpaths: 'KEGGpathway_activity_heatmap.png'}}" />
-  <h{{ h+1 }}>Distributions of pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
-  <Image src="{{ssdir | joinpaths: 'pathway_activity_violinplot.png'}}" />
-  {%- endfor -%}
-  {% if job.out.outdir | glob: "*.group-*.png" -%}
-  <h{{h}}>Merged heatmaps</h{{h}}>
-  {% for group_hm in job.out.outdir | glob: "*.group-*.png" -%}
-    {%- if group_hm.endswith(".group-unclustered.png") -%}
-        <h{{h+1}}>{{group_hm | stem | replace: ".group-unclustered", " (Group Unclustered)"}}</h{{h+1}}>
-        <Image src="{{group_hm}}" />
-    {%- else -%}
-        <h{{h+1}}>{{group_hm | stem | replace: ".group-clustered", " (Group Clustered)"}}</h{{h+1}}>
-        <Image src="{{group_hm}}" />
-    {%- endif -%}
-  {%- endfor -%}
-  {%- endif -%}
+{%- macro report_job(job, h=1) -%}
+    {{ job | render_job: h=h }}
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-  <h1>{{job.in.sobjfile | stem | escape}}</h1>
+    <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte CHANGED Viewed

@@ -1,15 +1,72 @@
-{% from "utils/misc.liq" import report_jobs, table_of_images -%}
+{% from "utils/misc.liq" import report_jobs -%}
 <script>
-    import { Image, Descr } from "$libs";
+    import { Image, DataTable, Descr } from "$libs";
+    import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
 </script>
-{%- macro report_job(job, h=2) -%}
-  {{ job | render_job: h=h }}
+<h1>Introduction</h1>
+<Descr>
+    Metabolic landscape of single cells in the tumor microenvironment.
+</Descr>
+<h2>Workflow of the original analysis</h2>
+<Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
+<h2>Reference</h2>
+<UnorderedList>
+    <ListItem><a href="https://www.nature.com/articles/s41467-019-11738-0" target="_blank">
+        Zhengtao, Ziwei Dai, and Jason W. Locasale.
+        "Metabolic landscape of the tumor microenvironment at single cell resolution."
+        Nature communications 10.1 (2019): 1-12.
+    </a></ListItem>
+    <ListItem><a href="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape" target="_blank">
+        Orginal pipeline
+    </a></ListItem>
+</UnorderedList>
+<h2>Analyses with this pipeline</h2>
+<Descr>
+The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
+(i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
+</Descr>
+<UnorderedList>
+<ListItem>
+    <a href="../MetabolicPathwayActivity/index.html">MetabolicPathwayActivity</a>
+    <Tile>
+    <p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
+    </Tile>
+</ListItem>
+<ListItem>
+    MetabolicPathwayHeterogeneity (this page)
+    <Tile>
+    <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
+    <p>
+        The PCA analysis was applied on normalized expression values.
+        The function prcomp in R was used to perform the PCA analysis.
+        For each metabolic gene, we computed its PCA score defined as the sum of absolute values of the loadings of this gene in the top PCs
+        that in total account for certain variance to measure variability of gene expression across cells.
+        We then sorted the PCA scores of the genes in descending order and applied GSEA analysis to the ranked list of genes to identify metabolic pathways
+        enriched in genes with highest variability.
+    </p>
+    </Tile>
+</ListItem>
+<ListItem>
+    <a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
+    <Tile>
+    <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
+    </Tile>
+</ListItem>
+</UnorderedList>
+{%- macro report_job(job, h=1) -%}
+    {{ job | render_job: h=h }}
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-  <h1>{{job.in.sobjfile | stem | escape}}</h1>
+    <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/snp/PlinkCallRate.svelte CHANGED Viewed

@@ -5,13 +5,13 @@
 {%- macro report_job(job, h=1) -%}
     <h{{h+1}}>Sample Call Rate</h{{h+1}}>
-    {%- for pngfile in job.out.outdir | joinpaths: '*.samplecr.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.samplecr.png' -%}
     <Descr>Cutoff: {{envs.samplecr}}</Descr>
     <Image src="{{pngfile}}" />
     {%- endfor -%}
     <h{{h+1}}>Variant Call Rate</h{{h+1}}>
-    {%- for pngfile in job.out.outdir | joinpaths: '*.varcr.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.varcr.png' -%}
     <Descr>Cutoff: {{envs.varcr}}</Descr>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/snp/PlinkFreq.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
         {% set metric_col = pngfile | stem | ext0 %}
         <h{{h+1}}>{{metric_col}} distribution</h{{h+1}}>
         <Image src="{{pngfile}}" />

biopipen/reports/snp/PlinkHWE.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>Distribution</h{{h+1}}>
     <Descr>Cutoff: {{envs.cutoff}}</Descr>
     <Image src="{{pngfile}}" />

biopipen/reports/snp/PlinkHet.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>Distribution</h{{h+1}}>
     <Descr>Cutoff: [mean - {{envs.cutoff}} x sd, mean + {{envs.cutoff}} x sd]</Descr>
     <Image src="{{pngfile}}" />

biopipen/reports/snp/PlinkIBD.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>Heatmap</h{{h+1}}>
     <Descr>PI_HAT threshold = {{envs.pihat}}</Descr>
     <Image src="{{pngfile}}" />

biopipen/reports/tcr/CDR3AAPhyschem.svelte CHANGED Viewed

@@ -26,7 +26,7 @@
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-    <h1>{{job.out.outdir | stem | replace: ".immunarch", ""}}</h1>
+    <h1>{{job.out.outdir | stem | replace: ".scRep", ""}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen/scripts/bam/CNAClinic.R CHANGED Viewed

@@ -1,14 +1,40 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(parallel)
 library(dplyr)
+library(biopipen.utils)
 library(CNAclinic)
+# https://github.com/sdchandra/CNAclinic/issues/4
+.reorderByChrom.patched <- function(x){
+    chromosome <- as.character(x$chromosome)
+    chromosome[which(chromosome == "X")] <- "23"
+    chromosome[which(chromosome == "Y")] <- "24"
+    chromosome[which(chromosome == "MT")] <- "25"
+    x$chromosome <- as.numeric(chromosome)
+    # Error in xtfrm.data.frame(x) : cannot xtfrm data frames
+    # x <- x[order(x["chromosome"], x["start"]), ]
+    x <- x[order(x[, "chromosome"], x[, "start"]), ]
+    x$chromosome <- as.character(x$chromosome)
+    # Replace 23 by X:
+    x$chromosome[which(x$chromosome == "23")] <- "X"
+    # Replace 24 by Y
+    x$chromosome[which(x$chromosome == "24")] <- "Y"
+    # Replace 25 by MT
+    x$chromosome[which(x$chromosome == "25")] <- "MT"
+    return(x)
+}
+monkey_patch("CNAclinic", ".reorderByChrom", .reorderByChrom.patched)
 metafile = {{in.metafile | r}}
 outdir = {{out.outdir | r}}
 ncores = {{envs.ncores | int}}
 binsizer = {{envs.binsizer | r}}
-binsize = {{envs.binsize | int}}
+binsize = {{envs.binsize | r}}
 seed = {{envs.seed | int}}
 genome = {{envs.genome | r}}
 run_args = {{envs.run_args | r}}
@@ -30,7 +56,11 @@ if (("Group" %in% metacols) && !("Patient" %in% metacols)) {
 }
 if (!("Binsizer" %in% metacols) && is.null(binsizer) && is.null(binsize)) {
-    stop("The metadata file must have a column named 'Binsizer' or the `envs.binsizer` must be specified")
+    stop(
+        "The metadata file must have a column named 'Binsizer' or ",
+        "the `envs.binsizer` must be specified when no `envs.binsize` is provided. ",
+        "The Binsizer column should indicate which samples are to be used for binsize selection."
+    )
 }
 # add missing columns
@@ -109,7 +139,7 @@ do_one_sample = function(i) {
         bamfile,
         sample,
         refSamples=refSamples,
-        binSize=binsize
+        binSize=binsize / 1000
     )
     run_args_i = run_args
@@ -119,7 +149,12 @@ do_one_sample = function(i) {
     plot_args_i = plot_args
     plot_args_i$object = CNAData
-    genomewide_plot = do_call(plotSampleData, plot_args_i)
+    genomewide_plot <- tryCatch({
+        do_call(plotSampleData, plot_args_i)
+    }, error = function(e) {
+        message("Error in plotting genomewide data for sample ", sample, ": ", e$message)
+        return(ggplot2::ggplot() + ggplot2::labs(title = paste("Error in plotting genomewide data for sample", sample)))
+    })
     odir = file.path(outdir, sample)
     dir.create(odir, recursive = TRUE, showWarnings = FALSE)

biopipen/scripts/bam/CNVpytor.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 import warnings
 import pandas
 from datetime import datetime
+from diot import Diot  # pyright: ignore
 from biopipen.utils.reference import bam_index
 from biopipen.utils.misc import run_command, dict_to_cli_args, logger
@@ -16,7 +17,7 @@ refdir = {{envs.refdir | quote}}  # pyright: ignore
 genome = {{envs.genome | quote}}  # pyright: ignore
 chrsize: str = {{envs.chrsize | quote}}  # pyright: ignore
 filters: dict = {{envs.filters | repr}}  # pyright: ignore
-args: dict = {{envs | dict}}  # pyright: ignore
+args: Diot = {{envs | repr}}  # pyright: ignore
 del args['cnvpytor']
 del args['ncores']

biopipen/scripts/bam/ControlFREEC.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
 import glob
-import rtoml
 import shutil
-from diot import Diot
+from diot import Diot  # type: ignore
 from biopipen.utils.misc import dict_to_cli_args, run_command
 bamfile = {{ in.bamfile | quote }}  # pyright: ignore # noqa
@@ -79,7 +78,7 @@ config.BAF |= Diot(
 os.makedirs(f"{outdir}/FREEC-output", exist_ok=True)
-config_ini = rtoml.dumps(config).replace('"', "")
+config_ini = config.to_toml().replace('"', "")   # type: ignore
 with open(configfile, "w") as fconf:
     fconf.write(config_ini)

biopipen/scripts/bam/SamtoolsView.py ADDED Viewed

@@ -0,0 +1,33 @@
+from pathlib import PosixPath  # type: ignore # noqa
+from biopipen.utils.misc import run_command, dict_to_cli_args
+from biopipen.utils.reference import bam_index
+bamfile: str = {{ in.bamfile | quote }}  # pyright: ignore # noqa
+outfile: str = {{ out.outfile | quote }}  # pyright: ignore # noqa
+envs: dict = {{envs | attr: "to_dict" | call}}  # pyright: ignore  # noqa
+ncores = envs.pop("ncores")
+samtools = envs.pop("samtools")
+should_index = envs.pop("index")
+def run_samtools(infile):
+    cmd = [
+        samtools,
+        "view",
+        "-b",
+        "--threads",
+        str(ncores),
+        "-o",
+        outfile,
+    ] + dict_to_cli_args(envs, dashify=True) + [infile]
+    run_command(cmd, fg=True)
+    if should_index:
+        bam_index(outfile, tool="samtools", samtools=samtools, ncores=ncores)
+    return outfile
+if __name__ == "__main__":
+    infile = bam_index(bamfile, tool="samtools", samtools=samtools, ncores=ncores)
+    run_samtools(infile)

biopipen/scripts/cnv/AneuploidyScore.R CHANGED Viewed

@@ -1,11 +1,9 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(AneuploidyScore)
 library(dplyr)
 library(tidyr)
 library(tibble)
-library(ggplot2)
-library(ggprism)
+library(plotthis)
+library(biopipen.utils)
 segfile = {{in.segfile | r}}
 outdir = {{out.outdir | r}}
@@ -59,7 +57,15 @@ getCAA <- function(segf, cytoarm, tcn_col,
     }
     ## Create a GRanges object with all unique intervals between segc and cytoc
-    starts <- sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
+    starts <- tryCatch({
+      sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
+    }, error=function(e) {
+      warning("Error to detect start on chromosome: ", chr_id, immediate. = TRUE)
+      NULL
+    })
+    if (is.null(starts)) {
+      return(NULL)
+    }
     ends <- sort(c(GenomicRanges::end(segc), GenomicRanges::end(cytoc)))
     combc <- GRanges(seqnames=chr_id,
                      IRanges(start=unique(sort(c(starts, ends[-length(ends)]+1))),
@@ -123,7 +129,7 @@ getCAA <- function(segf, cytoarm, tcn_col,
     return(combc_arms)
   })
   names(seg_cyto_chr) <- names(seg_chr)
+  seg_cyto_chr <- seg_cyto_chr[!sapply(seg_cyto_chr, is.null)]
   return(as(seg_cyto_chr, "GRangesList"))
 }
@@ -250,11 +256,17 @@ sig_min = min(-1, plotdata$Signal, na.rm=TRUE)
 sig_max = max(1, plotdata$Signal, na.rm=TRUE)
 png(file.path(outdir, "AneuploidyScore.png"), width=1000, height=600, res=100)
-ggplot(plotdata) +
-    geom_bar(aes(x=Arms, y=Signal, fill=Type), stat="identity") +
-    geom_hline(yintercept=0, color="black", size=0.1) +
-    ylim(c(sig_min, sig_max)) +
-    theme_prism() +
-    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
-    facet_wrap(~SignalType, scales="free_y", nrow=2)
+p <- BarPlot(
+    plotdata,
+    x = "Arms",
+    y = "Signal",
+    fill = "Type",
+    facet_by = "SignalType",
+    facet_nrow = 2,
+    y_min = sig_min,
+    y_max = sig_max,
+    x_text_angle = 90,
+    aspect.ratio = 0.2
+)
+print(p)
 dev.off()

biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl