PyPI - biopipen - Versions diffs - 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +307 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +14 -2
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/ns/tcr.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """Tools to analyze single-cell TCR sequencing data"""
+from pipen.utils import mark
 from ..core.defaults import SCRIPT_DIR
 from ..core.proc import Proc
 from ..core.config import config
+@mark(deprecated="{proc.name} is deprecated, use ScRepLoading instead.")
 class ImmunarchLoading(Proc):
     """Immuarch - Loading data
@@ -94,6 +95,7 @@ class ImmunarchLoading(Proc):
     script = "file://../scripts/tcr/ImmunarchLoading.R"
+@mark(deprecated=True)
 class ImmunarchFilter(Proc):
     """Immunarch - Filter data
@@ -172,6 +174,7 @@ class ImmunarchFilter(Proc):
     script = "file://../scripts/tcr/ImmunarchFilter.R"
+@mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
 class Immunarch(Proc):
     """Exploration of Single-cell and Bulk T-cell/Antibody Immune Repertoires
@@ -857,6 +860,7 @@ class Immunarch(Proc):
     }
+@mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
 class SampleDiversity(Proc):
     """Sample diversity and rarefaction analysis
@@ -905,6 +909,7 @@ class SampleDiversity(Proc):
     }
+@mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
 class CloneResidency(Proc):
     """Identification of clone residency
@@ -1018,6 +1023,7 @@ class CloneResidency(Proc):
     plugin_opts = {"report": "file://../reports/tcr/CloneResidency.svelte"}
+@mark(deprecated=True)
 class Immunarch2VDJtools(Proc):
     """Convert immuarch format into VDJtools input formats.
@@ -1054,6 +1060,7 @@ class Immunarch2VDJtools(Proc):
     script = "file://../scripts/tcr/Immunarch2VDJtools.R"
+@mark(deprecated=True)
 class ImmunarchSplitIdents(Proc):
     """Split the data into multiple immunarch datasets by Idents from Seurat
@@ -1087,6 +1094,7 @@ class ImmunarchSplitIdents(Proc):
     script = "file://../scripts/tcr/ImmunarchSplitIdents.R"
+@mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
 class VJUsage(Proc):
     """Circos-style V-J usage plot displaying the frequency of
     various V-J junctions using vdjtools.
@@ -1129,6 +1137,7 @@ class VJUsage(Proc):
     plugin_opts = {"report": "file://../reports/tcr/VJUsage.svelte"}
+@mark(deprecated=True)
 class Attach2Seurat(Proc):
     """Attach the clonal information to a Seurat object as metadata
@@ -1191,15 +1200,12 @@ class TCRClustering(Proc):
     CDR3 sequence may be shared by multiple cells.
     Input:
-        immfile: The immunarch object in RDS
+        screpfile: The TCR data object loaded by `scRepertoire::CombineTCR()` or
+            `scRepertoire::CombineExpression()`
     Output:
-        immfile: The immnuarch object in RDS with TCR cluster information
-        clusterfile: The cluster file.
-            Columns are CDR3.aa, TCR_Cluster, TCR_Cluster_Size and
-            TCR_Cluster_Size1.
-            TCR_Cluster_Size is the number of cells in the cluster.
-            TCR_Cluster_Size1 is the unique CDR3 sequences in the cluster.
+        outfile: The `scRepertoire` object in qs with TCR cluster information.
+            Column `TCR_Cluster` will be added to the metadata.
     Envs:
         tool (choice): The tool used to do the clustering, either
@@ -1208,41 +1214,40 @@ class TCRClustering(Proc):
             For GIANA, using TRBV mutations is not supported
             - GIANA: by Li lab at UT Southwestern Medical Center
             - ClusTCR: by Sebastiaan Valkiers, etc
-        prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
-            The prefixed barcodes will be used to match the barcodes in `in.metafile`.
-            Not used if `in.metafile` is not specified.
-            If `None` (default), `immdata$prefix` will be used.
         python: The path of python with `GIANA`'s dependencies installed
             or with `clusTCR` installed. Depending on the `tool` you choose.
+        within_sample (flag): Whether to cluster the TCR clones within each sample.
+            When `in.screpfile` is a `Seurat` object, the samples are marked by
+            the `Sample` column in the metadata.
         args (type=json): The arguments for the clustering tool
             For GIANA, they will be passed to `python GIAna.py`
             See <https://github.com/s175573/GIANA#usage>.
             For ClusTCR, they will be passed to `clustcr.Clustering(...)`
             See <https://svalkiers.github.io/clusTCR/docs/clustering/how-to-use.html#clustering>.
-        on_multi (flag;hidden): Whether to run clustering on
-            multi-chain seq or the seq read and processed by immunarch
+        chain (choice): The TCR chain to use for clustering.
+            - alpha: TCR alpha chain (the first sequence in CTaa, separated by `_`)
+            - beta: TCR beta chain (the second sequence in CTaa, separated by `_`)
+            - both: Both TCR alpha and beta chains
     Requires:
         clusTCR:
             - if: {{ proc.envs.tool == 'ClusTCR' }}
             - check: {{ proc.envs.python }} -c "import clustcr"
     """  # noqa: E501
-    input = "immfile:file"
-    output = [
-        "immfile:file:{{in.immfile | basename}}",
-        "clusterfile:file:{{in.immfile | stem}}.clusters.txt",
-    ]
+    input = "screpfile:file"
+    output = "outfile:file:{{in.screpfile | stem}}.tcr_clustered.qs"
     lang = config.lang.rscript
     envs = {
         "tool": "GIANA",  # or ClusTCR
-        "prefix": None,
-        "on_multi": False,
         "python": config.lang.python,
+        "within_sample": True,  # whether to cluster the TCR clones within each sample
         "args": {},
+        "chain": "both",  # alpha, beta, both
     }
     script = "file://../scripts/tcr/TCRClustering.R"
+@mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
 class TCRClusterStats(Proc):
     """Statistics of TCR clusters, generated by `TCRClustering`.
@@ -1398,6 +1403,7 @@ class TCRClusterStats(Proc):
     }
+@mark(deprecated=True)
 class CloneSizeQQPlot(Proc):
     """QQ plot of the clone sizes
@@ -1457,15 +1463,9 @@ class CDR3AAPhyschem(Proc):
     - [Zamyatnin, A. A. Protein volume in solution. Prog. Biophys. Mol. Biol. 24, 107-123 (1972).](https://www.sciencedirect.com/science/article/pii/0079610772900053)
     Input:
-        immdata: The data loaded by `immunarch::repLoad()`, saved in RDS format
-        srtobj: The `Seurat` object, saved in RDS format, used to get the
-            metadata for each cell (e.g. cell type)
-            It could also be a tab delimited file with `meta.data` of the
-            `Seurat` object.
-            It has to have a `Sample` column, which is used to match the
-            `immdata` object.
-            It is optional, if not provided, the metadata from the `immdata`
-            object will be used.
+        scrfile: The data loaded by `ScRepCombiningExpression`, saved in RDS or qs/qs2 format.
+            The data is actually generated by `scRepertiore::combineExpression()`.
+            The data must have both TRA and TRB chains.
     Output:
         outdir: The output directory
@@ -1474,41 +1474,32 @@ class CDR3AAPhyschem(Proc):
         group: The key of group in metadata to define the groups to
             compare. For example, `CellType`, which has cell types annotated
             for each cell in the combined object (immdata + Seurat metadata)
-        comparison (type=json): A dict of two groups, with keys as the
+        comparison (type=auto): A dict of two groups, with keys as the
             group names and values as the group labels. For example,
             ```toml
             Treg = ["CD4 CTL", "CD4 Naive", "CD4 TCM", "CD4 TEM"]
             Tconv = "Tconv"
             ```
-        prefix: The prefix of the cell names (rownames) in the metadata.
-            The prefix is usually not needed in immdata, as the data is stored
-            in the `immdata` object separately for each sample. However, the
-            `Seurat` object has a combined `meta.data` for all the samples,
-            so the prefix is needed. Usually, the prefix is the sample name.
-            For example, `Sample1-AACGTTGAGGCTACGT-1`.
-            We need this prefix to add the sample name to the cell names in
-            immdata, so that we can match the cells in `immdata` and
-            `Seurat` object. Set it to `None` or an empty string if the
-            `Seurat` object has the same cell names as `immdata`. You can use
-            placeholders to specify the prefix, e.g., `{Sample}_`. In such a
-            case, the `Sample` column must exist in the `Seurat` object.
+            Or simply a list of two groups, for example, `["Treg", "Tconv"]` when
+            they are both in the `group` column.
         target: Which group to use as the target group. The target
             group will be labeled as 1, and the other group will be labeled as
             0 in the regression.
-        subset: A column, or a list of columns separated by comma,
-            in the merged object to subset the cells to perform the regression,
-            for each group in the columns.
+            If not specified, the first group in `comparison` will be used as
+            the target group.
+        each (auto): A column, or a list of columns or a string of columns separated by comma.
+            The columns will be used to split the data into multiple groups and the regression will be
+            applied to each group separately.
             If not provided, all the cells will be used.
     """  # noqa: E501
-    input = "immdata:file,srtobj:file"
+    input = "scrfile:file"
     output = "outdir:dir:{{in.immdata | stem}}.cdr3aaphyschem"
     lang = config.lang.rscript
     envs = {
         "group": None,
         "comparison": None,
-        "prefix": "{Sample}_",
         "target": None,
-        "subset": None,
+        "each": None,
     }
     script = "file://../scripts/tcr/CDR3AAPhyschem.R"
     plugin_opts = {"report": "file://../reports/tcr/CDR3AAPhyschem.svelte"}
@@ -1548,29 +1539,17 @@ class TESSA(Proc):
             [link](https://www.nature.com/articles/s42256-021-00383-2)
     Input:
-        immdata: The immunarch object in RDS file or text file of TCR data loaded by
-            [`ImmunarchLoading`](!!#biopipennstcrimmunarchloading)
-        srtobj: The `Seurat` object, saved in RDS format, with dimension
-            reduction performed if you want to use them to represent the
-            transcriptome of T cells.
-            This could also be a tab delimited file (can be gzipped) with
-            expression matrix or dimension reduction results.
+        screpdata: The data loaded by `ScRepCombiningExpression`, saved in RDS or
+            qs/qs2 format.
+            The data is actually generated by `scRepertiore::combineExpression()`.
+            The data must have both TRA and TRB chains.
     Output:
-        outfile: The tab-delimited file with three columns
-            (`barcode`, `TESSA_Cluster` and `TESSA_Cluster_Size`) or
-            an RDS file if  `in.srtobj` is an RDS file of a Seurat object, with
+        outfile: a qs fileof a Seurat object, with
             `TESSA_Cluster` and `TESSA_Cluster_Size` added to the `meta.data`
     Envs:
         python: The path of python with `TESSA`'s dependencies installed
-        prefix: The prefix of the cell barcodes in the `Seurat` object.
-            Once could use a fixed prefix, or a placeholder with the column
-            name in meta data. For example, `"{Sample}_"` will replace the
-            placeholder with the value of the column `Sample` in meta data.
-            If `in.immdata` is text file, the prefix will be ignored and the
-            barcode should be already prefixed.
-            If `None` and `in.immdata` is RDS file, `immdata$prefix` will be used.
         within_sample (flag): Whether the TCR networks are constructed only
             within TCRs from the same sample/patient (True) or with all the
             TCRs in the meta data matrix (False).
@@ -1582,21 +1561,13 @@ class TESSA(Proc):
             If True, the tessa will not update b in the MCMC iterations.
         max_iter (type=int): The maximum number of iterations for MCMC.
         save_tessa (flag): Save tessa detailed results to seurat object?
-            Only works if `in.srtobj` is an RDS file of a Seurat object.
             It will be saved to `sobj@misc$tessa`.
     """
-    input = "immdata:file,srtobj:file"
-    output = """outfile:file:
-        {%- if in.srtobj.lower().endswith(".rds") -%}
-        {{in.srtobj | stem}}.tessa.RDS
-        {%- else -%}
-        {{in.immdata | stem}}.tessa.txt
-        {%- endif -%}
-    """
+    input = "screpdata:file"
+    output = "outfile:file:{{in.screpdata | stem}}.tessa.qs"
     lang = config.lang.rscript
     envs = {
         "python": config.lang.python,
-        "prefix": None,
         "assay": None,
         "within_sample": False,
         "predefined_b": False,
@@ -1682,47 +1653,144 @@ class ScRepLoading(Proc):
     """Load the single cell TCR/BCR data into a `scRepertoire` compatible object
     This process loads the single cell TCR/BCR data into a `scRepertoire`
-    compatible object. Later, `scRepertoire::combineExpression` can be used to
-    combine the expression data with the TCR/BCR data.
+    (>= v2.0.8, < v2.3.2) compatible object. Later, `scRepertoire::combineExpression`
+    can be used to combine the expression data with the TCR/BCR data.
-    For the data path specified at `TCRData` in the input file, we will first find
-    `filtered_contig_annotations.csv` and `filtered_config_annotations.csv.gz` in the
-    path. If neighter of them exists, we will find `all_contig_annotations.csv` and
-    `all_contig_annotations.csv.gz` in the path and a warning will be raised
-    (You can find it at `./.pipen/<pipeline-name>/ImmunarchLoading/<job.index>/job.stderr`).
+    For the data path specified at `TCRData`/`BCRData` in the input file
+    (`in.metafile`), will be used to find the TCR/BCR data files and
+    `scRepertoire::loadContigs()` will be used to load the data.
-    If none of the files exists, an error will be raised.
+    A directory can be specified in `TCRData`/`BCRData`, then
+    `scRepertoire::loadContigs()` will be used directly to load the data from the
+    directory. Otherwise if a file is specified, it will be symbolically linked to
+    a directory for `scRepertoire::loadContigs()` to load.
+    Note that when the file name can not be recognized by `scRepertoire::loadContigs()`,
+    `envs.format` must be set for the correct format of the data.
     Input:
         metafile: The meta data of the samples
             A tab-delimited file
             Two columns are required:
             * `Sample` to specify the sample names.
-            * `TCRData` to assign the path of the data to the samples,
+            * `TCRData`/`BCRData` to assign the path of the data to the samples,
             and this column will be excluded as metadata.
-            Immunarch is able to fetch the sample names from the names of
-            the target files. However, 10x data yields result like
-            `filtered_contig_annotations.csv`, which doesn't have any name
-            information.
     Output:
-        outfile: The `scRepertoire` compatible object in RDS format
+        outfile: The `scRepertoire` compatible object in qs/qs2 format
     Envs:
-        combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR` function.
+        type (choice): The type of the data to load.
+            - TCR: T cell receptor data
+            - BCR: B cell receptor data
+        combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR`
+            function.
             See also <https://www.borch.dev/uploads/screpertoire/reference/combinetcr>
+        combineBCR (type=json): The extra arguments for `scRepertoire::combineBCR`
+            function.
+            See also <https://www.borch.dev/uploads/screpertoire/reference/combinebcr>
         exclude (auto): The columns to exclude from the metadata to add to the object.
-            A list of column names to exclude or a string with column names separated by `,`.
-            By default, `TCRData` and `RNAData` will be excluded.
+            A list of column names to exclude or a string with column names separated
+            by `,`. By default, `BCRData`, `TCRData` and `RNAData` will be excluded.
+        tmpdir: The temporary directory to store the symbolic links to the
+            TCR/BCR data files.
+        format (choice): The format of the TCR/BCR data files.
+            - 10X: 10X Genomics data, which is usually in a directory with
+                `filtered_contig_annotations.csv` file.
+            - AIRR: AIRR format, which is usually in a file with
+                `airr_rearrangement.tsv` file.
+            - BD: Becton Dickinson data, which is usually in a file with
+                `Contigs_AIRR.tsv` file.
+            - Dandelion: Dandelion data, which is usually in a file with
+                `all_contig_dandelion.tsv` file.
+            - Immcantation: Immcantation data, which is usually in a file with
+                `data.tsv` file.
+            - JSON: JSON format, which is usually in a file with `.json` extension.
+            - ParseBio: ParseBio data, which is usually in a file with
+                `barcode_report.tsv` file.
+            - MiXCR: MiXCR data, which is usually in a file with `clones.tsv` file.
+            - Omniscope: Omniscope data, which is usually in a file with `.csv`
+                extension.
+            - TRUST4: TRUST4 data, which is usually in a file with
+                `barcode_report.tsv` file.
+            - WAT3R: WAT3R data, which is usually in a file with
+                `barcode_results.csv` file.
+            See also: <https://rdrr.io/github/ncborcherding/scRepertoire/man/loadContigs.html>
+            If not provided, the format will be guessed from the file name by `scRepertoire::loadContigs()`.
     """  # noqa: E501
     input = "metafile:file"
-    output = "outfile:file:{{in.metafile | stem}}.scRep.RDS"
+    output = "outfile:file:{{in.metafile | stem}}.scRep.qs"
     lang = config.lang.rscript
-    envs = {"combineTCR": {"samples": True}, "exclude": ["TCRData", "RNAData"]}
+    envs = {
+        "type": "TCR",  # or BCR
+        "combineTCR": {"samples": True},
+        "combineBCR": {"samples": True},
+        "exclude": ["BCRData", "TCRData", "RNAData"],
+        "format": None,
+        "tmpdir": config.path.tmpdir,
+    }
     script = "file://../scripts/tcr/ScRepLoading.R"
+class ScRepCombiningExpression(Proc):
+    """Combine the scTCR/BCR data with the expression data
+    This process combines the scTCR/BCR data with the expression data using
+    `scRepertoire::combineExpression` function. The expression data should be
+    in `Seurat` format. The `scRepertoire` object should be a combined contig
+    object, usually generated by `scRepertoire::combineTCR` or
+    `scRepertoire::combineBCR`.
+    See also: <https://www.borch.dev/uploads/screpertoire/reference/combineexpression>.
+    Input:
+        screpfile: The `scRepertoire` object in RDS/qs format
+        srtobj: The `Seurat` object, saved in RDS/qs format
+    Output:
+        outfile: The `Seurat` object with the TCR/BCR data combined
+    Envs:
+        cloneCall: How to call the clone - VDJC gene (gene), CDR3 nucleotide (nt),
+            CDR3 amino acid (aa), VDJC gene + CDR3 nucleotide (strict) or
+            a custom variable in the data.
+        chain: indicate if both or a specific chain should be used
+            e.g. "both", "TRA", "TRG", "IGH", "IGL".
+        group-by: The column label in the combined clones in which clone frequency will
+            be calculated. NULL or "none" will keep the format of input.data.
+        proportion (flag): Whether to proportion (TRUE) or total frequency (FALSE) of
+            the clone based on the group.by variable.
+        filterNA (flag): Method to subset Seurat/SCE object of barcodes without clone
+            information
+        cloneSize (type=json): The bins for the grouping based on proportion or
+            frequency.
+            If proportion is FALSE and the cloneSizes are not set high enough based on
+            frequency, the upper limit of cloneSizes will be automatically updated.
+        addLabel (flag): This will add a label to the frequency header, allowing the
+            user to try multiple group.by variables or recalculate frequencies after
+            subsetting the data.
+    """
+    input = "screpfile:file,srtobj:file"
+    output = "outfile:file:{{in.screpfile | stem}}.qs"
+    lang = config.lang.rscript
+    envs = {
+        "cloneCall": "aa",
+        "chain": "both",
+        "group-by": "Sample",
+        "proportion": True,
+        "filterNA": False,
+        "cloneSize": {
+            "Rare": 1e-04,
+            "Small": 0.001,
+            "Medium": 0.01,
+            "Large": 0.1,
+            "Hyperexpanded": 1,
+        },
+        "addLabel": False,
+    }
+    script = "file://../scripts/tcr/ScRepCombiningExpression.R"
 class ClonalStats(Proc):
     """Visualize the clonal information.
@@ -1730,7 +1798,7 @@ class ClonalStats(Proc):
     information.
     Input:
-        screpfile: The `scRepertoire` object in RDS format
+        screpfile: The `scRepertoire` object in RDS/qs format
     Output:
         outdir: The output directory containing the plots

biopipen/reports/bam/CNVpytor.svelte CHANGED Viewed

@@ -4,19 +4,16 @@
     import { Tabs, Tab, TabContent } from "$ccs";
 </script>
-{% for case in envs.cases %}
-<h1>{{case}}</h1>
-{%  for binsize in envs.cases[case].binsizes %}
-<h2>Binsize: {{binsize}}</h2>
+{% for binsize in envs.binsizes %}
+<h1>Binsize: {{binsize}}</h1>
 {% from_ os.path import join, basename %}
 {% assign manplots = [] %}
 {% assign circplots = [] %}
 {% assign samples = [] %}
 {% for job in jobs %}
-{%  set manplot = job.out.outdir | joinpaths: case, "manhattan."+str(binsize)+".*.png" | glob %}
-{%  set circplot = job.out.outdir | joinpaths: case, "circular."+str(binsize)+".*.png" | glob %}
+{%  set manplot = job.out.outdir | glob: "manhattan."+str(binsize)+".*.png" %}
+{%  set circplot = job.out.outdir | glob: "circular."+str(binsize)+".*.png" %}
 {%  set _ = manplots.append(manplot[0]) %}
 {%  if len(circplot) > 0 %}
 {%      set _ = circplots.append(circplot[0]) %}
@@ -45,6 +42,4 @@
     </div>
 </Tabs>
-{%  endfor %}
 {% endfor %}

biopipen/reports/cnvkit/CNVkitDiagram.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/cnvkit/CNVkitHeatmap.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/cnvkit/CNVkitScatter.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/{delim/SampleInfo.svelte → common.svelte} RENAMED Viewed

@@ -1,7 +1,7 @@
 {% from "utils/misc.liq" import report_jobs, table_of_images -%}
 <script>
     import { Image, DataTable, Descr } from "$libs";
+    import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification } from "$ccs";
 </script>
 {%- macro report_job(job, h=1) -%}
@@ -9,8 +9,7 @@
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-    <h1>{{job.in.infile | stem | escape }}</h1>
+    <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/scrna/DimPlots.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-{% set images = job.out.outdir | joinpaths: "*.png" | glob %}
+{% set images = job.out.outdir | glob: "*.png" %}
 {{ table_of_images(images) }}
 {%- endmacro -%}

biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte CHANGED Viewed

@@ -1,32 +1,61 @@
-{% from "utils/misc.liq" import report_jobs, table_of_images -%}
-{% from "utils/gsea.liq" import fgsea_report_script, fgsea_report, gsea_report -%}
+{% from "utils/misc.liq" import report_jobs -%}
 <script>
-  import { Image, DataTable, Descr } from "$libs";
-  import { Tabs, Tab, TabContent, Accordion, AccordionItem, InlineNotification } from "$ccs";
+    import { Image, DataTable, Descr } from "$libs";
+    import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
 </script>
-{%- macro report_job(job, h=2) -%}
-  {% if envs.fgsea %}
+<h1>Introduction</h1>
+<Descr>
+    Metabolic landscape of single cells in the tumor microenvironment.
+</Descr>
+<h2>Workflow of the original analysis</h2>
+<Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
+<h2>Reference</h2>
+<UnorderedList>
+    <ListItem><a href="https://www.nature.com/articles/s41467-019-11738-0" target="_blank">
+        Zhengtao, Ziwei Dai, and Jason W. Locasale.
+        "Metabolic landscape of the tumor microenvironment at single cell resolution."
+        Nature communications 10.1 (2019): 1-12.
+    </a></ListItem>
+    <ListItem><a href="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape" target="_blank">
+        Orginal pipeline
+    </a></ListItem>
+</UnorderedList>
+<h2>Analyses with this pipeline</h2>
+<Descr>
+The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
+(i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
+</Descr>
+<UnorderedList>
+<ListItem>
+    <a href="../MetabolicPathwayActivity/index.html">MetabolicPathwayActivity</a>
+    <Tile><p>Investigating the metabolic pathways of the cells in different subsets and groups.</p></Tile>
+</ListItem>
+<ListItem>
+    <a href="../MetabolicPathwayHeterogeneity/index.html">MetabolicPathwayHeterogeneity</a>
+    <Tile><p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p></Tile>
+</ListItem>
+<ListItem>
+    MetabolicFeatures (this page)
+    <Tile>
+    <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
+    <p>The metabolic features are actual gene set enrichment analysis (GSEA) results for the metabolic pathways with given comparisons.</p>
+    </Tile>
+</ListItem>
+</UnorderedList>
+{%- macro report_job(job, h=1) -%}
     {{ job | render_job: h=h }}
-  {% else %}
-    {%- for ssdir in job.out.outdir | glob: "*" -%}
-      {%- if basename(ssdir) == "ALL" -%}
-        {%- set h = 1 -%}
-      {%- else -%}
-        <h{{h}}>{{ ssdir | stem }}</h{{h}}>
-      {%- endif -%}
-      {% for cldir in ssdir | glob: '*' %}
-        <h{{h+1}}>{{ cldir | basename }}</h{{h+1}}>
-        {{ gsea_report(cldir, h+2, envs, envs.top) }}
-      {% endfor %}
-    {%- endfor -%}
-  {% endif %}
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-  <h1>{{job.in.sobjfile | stem | escape}}</h1>
+    <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl