PyPI - biopipen - Versions diffs - 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl - Mend

biopipen 0.28.1py3-none-any.whl → 0.29.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (85) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +8 -0
biopipen/ns/bam.py +0 -2
biopipen/ns/bed.py +35 -0
biopipen/ns/cellranger_pipeline.py +5 -5
biopipen/ns/cnv.py +18 -2
biopipen/ns/cnvkit_pipeline.py +16 -11
biopipen/ns/gene.py +68 -23
biopipen/ns/misc.py +2 -15
biopipen/ns/plot.py +204 -0
biopipen/ns/regulatory.py +214 -0
biopipen/ns/scrna.py +31 -5
biopipen/ns/snp.py +516 -8
biopipen/ns/stats.py +167 -3
biopipen/ns/vcf.py +196 -0
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/scripts/bam/CNVpytor.py +144 -46
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMerge.py +1 -1
biopipen/scripts/cnv/AneuploidyScore.R +30 -7
biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
biopipen/scripts/cnv/TMADScore.R +21 -5
biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
biopipen/scripts/delim/SampleInfo.R +10 -5
biopipen/scripts/gene/GeneNameConversion.R +65 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/plot/Manhattan.R +146 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/atSNP.R +33 -0
biopipen/scripts/regulatory/motifBreakR.R +1594 -0
biopipen/scripts/scrna/MarkersFinder.R +69 -67
biopipen/scripts/scrna/SeuratClustering.R +71 -29
biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
biopipen/scripts/scrna/SeuratPreparing.R +252 -122
biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
biopipen/scripts/snp/MatrixEQTL.R +85 -44
biopipen/scripts/snp/Plink2GTMat.py +133 -0
biopipen/scripts/snp/PlinkCallRate.R +190 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +298 -0
biopipen/scripts/snp/PlinkFromVcf.py +78 -0
biopipen/scripts/snp/PlinkHWE.R +80 -0
biopipen/scripts/snp/PlinkHet.R +92 -0
biopipen/scripts/snp/PlinkIBD.R +200 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/Mediation.R +94 -0
biopipen/scripts/stats/MetaPvalue.R +2 -1
biopipen/scripts/stats/MetaPvalue1.R +70 -0
biopipen/scripts/tcr/TCRClusterStats.R +12 -7
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/VcfFix_utils.py +1 -1
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/utils/gene.R +83 -37
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.R +56 -0
biopipen/utils/misc.py +5 -2
biopipen/utils/reference.py +54 -10
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
biopipen/ns/bcftools.py +0 -111
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.28.1"
1	+ __version__ = "0.29.1"

biopipen/core/config.toml CHANGED Viewed

@@ -23,12 +23,16 @@ cnvpytor = "cnvpytor"
 cnvnator2vcf = "cnvnator2VCF.pl"
 # convert
 convert = "convert"
+# fimo from meme
+fimo = "fimo"
 # wget
 wget = "wget"
 # aria2c
 aria2c = "aria2c"
 # plink
 plink = "plink"
+# plink2
+plink2 = "plink2"
 # tabix
 tabix = "tabix"
 # sambamba
@@ -86,6 +90,10 @@ genome = ""
 # Database file for scType
 # https://github.com/IanevskiAleksandr/sc-type/
 sctype_db = ""
+# TF Motif database
+tf_motifdb = ""
+# TF motif pairs
+tf_motifs = ""
 [misc]
 # Number of cores used for each job

biopipen/ns/bam.py CHANGED Viewed

@@ -17,7 +17,6 @@ class CNVpytor(Proc):
     Envs:
         cnvpytor: Path to cnvpytor
-        cnvnator2vcf: Path to CNVnator2VCF.pl to convert the result to VCF file
         samtools: Path to samtools, used to index bam file in case it's not
         ncores: Number of cores to use (`-j` for cnvpytor)
         refdir: The directory containing the fasta file for each chromosome
@@ -41,7 +40,6 @@ class CNVpytor(Proc):
     lang = config.lang.python
     envs = {
         "cnvpytor": config.exe.cnvpytor,
-        "cnvnator2vcf": config.exe.cnvnator2vcf,
         "samtools": config.exe.samtools,
         "ncores": config.misc.ncores,
         "refdir": config.ref.refdir,

biopipen/ns/bed.py CHANGED Viewed

@@ -163,3 +163,38 @@ class BedtoolsMerge(Proc):
         "bedtools": config.exe.bedtools,
     }
     script = "file://../scripts/bed/BedtoolsMerge.py"
+class BedtoolsIntersect(Proc):
+    """Find the intersection of two BED files, using `bedtools intersect`
+    See <https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html>
+    Input:
+        afile: The first BED file
+        bfile: The second BED file
+    Output:
+        outfile: The output BED file
+    Envs:
+        bedtools: The path to bedtools
+        sort: Sort `afile` and `bfile` before intersecting.
+            By default, `-sorted` is used, assuming the input files are sorted.
+            If error occurs, try to set `sort` to `True`.
+        chrsize: Alias for `g` in `bedtools intersect`.
+        postcmd: The command to be executed for the output file after intersecting.
+            You can use `$infile`, `$outfile`, and `$outdir` to refer to the input,
+            output, and output directory, respectively.
+        <more>: Other options to be passed to `bedtools intersect`
+    """  # noqa: E501
+    input = "afile:file", "bfile:file"
+    output = "outfile:file:{{in.afile | stem0}}_{{in.bfile | stem0}}-intersect.bt"
+    lang = config.lang.python
+    envs = {
+        "bedtools": config.exe.bedtools,
+        "sort": False,
+        "chrsize": config.ref.chrsize,
+        "postcmd": None,
+    }
+    script = "file://../scripts/bed/BedtoolsIntersect.py"

biopipen/ns/cellranger_pipeline.py CHANGED Viewed

@@ -7,7 +7,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 from diot import Diot
-from pipen.utils import mark, is_loading_pipeline
+from pipen.utils import is_loading_pipeline
 from pipen_args.procgroup import ProcGroup
 if TYPE_CHECKING:
@@ -20,9 +20,9 @@ class CellRangerCountPipeline(ProcGroup):
     Run cellranger count for multiple samples and summarize the metrics.
     Args:
-        input (type=list): The list of lists of fastq files.
+        input (list): The list of lists of fastq files.
             or the list of comma-separated string of fastq files.
-        ids (type=list): The list of ids for the samples.
+        ids (list): The list of ids for the samples.
     """
     DEFAULTS = Diot(input=None, ids=None)
@@ -76,9 +76,9 @@ class CellRangerVdjPipeline(ProcGroup):
     Run cellranger vdj for multiple samples and summarize the metrics.
     Args:
-        input (type=list): The list of lists of fastq files.
+        input (list): The list of lists of fastq files.
             or the list of comma-separated string of fastq files.
-        ids (type=list): The list of ids for the samples.
+        ids (list): The list of ids for the samples.
     """
     DEFAULTS = Diot(input=None, ids=None)

biopipen/ns/cnv.py CHANGED Viewed

@@ -12,7 +12,15 @@ class AneuploidyScore(Proc):
     Input:
         segfile: The seg file, generally including chrom, start, end and
-            seg.mean (the log2 ratio)
+            seg.mean (the log2 ratio).
+            It is typically a tab-delimited file or a BED file.
+            If so, envs.chrom_col, envs.start_col, envs.end_col and envs.seg_col
+            are the 1st, 2nd, 3rd and 5th columns, respectively.
+            It can also be a VCF file. If so, envs.chrom_col and envs.start_col
+            are not required.
+            `end_col` and `envs.seg_col` will be a field in the INFO column.
+            [`VariantAnnotation`](https://rdrr.io/bioc/VariantAnnotation/)
+            is required to extract the INFO field.
     Output:
         outdir: The output directory containing the CAAs, AS and a histogram
@@ -122,7 +130,15 @@ class TMADScore(Proc):
     Input:
         segfile: The seg file, two columns are required:
             * chrom: The chromosome name, used for filtering
-            * seg.mean: The log2 ratio
+            * seg.mean: The log2 ratio.
+            It is typically a tab-delimited file or a BED file.
+            If so, envs.chrom_col and envs.seg_col
+            are the 1st and 5th columns, respectively.
+            It can also be a VCF file. If so, envs.chrom_col and envs.start_col
+            are not required.
+            `end_col` and `envs.seg_col` will be a field in the INFO column.
+            [`VariantAnnotation`](https://rdrr.io/bioc/VariantAnnotation/)
+            is required to extract the INFO field.
     Output:
         outfile: The output file containing the TMAD score

biopipen/ns/cnvkit_pipeline.py CHANGED Viewed

@@ -487,7 +487,8 @@ class CNVkitPipeline(ProcGroup):
                 target_file = None
                 antitarget_file = None
                 if self.col.sex in metadf:
-                    sample_sex = ",".join(metadf[self.col.sex][control_masks])
+                    all_sex = metadf[self.col.sex][control_masks].unique()
+                    sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
                 else:
                     sample_sex = [None]
             else:
@@ -774,13 +775,15 @@ class CNVkitPipeline(ProcGroup):
             else:
                 tumor_masks = metadf[self.col.group] == self.opts.case
+            if self.col.sex in metadf:
+                all_sex = metadf[self.col.sex][tumor_masks].unique()
+                sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
+            else:
+                sample_sex = [None]
             return tibble(
                 segfiles=[ch2.outfile.tolist()],
-                sample_sex=(
-                    ",".join(metadf[self.col.sex][tumor_masks])
-                    if self.col.sex in metadf
-                    else [None]
-                ),
+                sample_sex=sample_sex,
             )
         @annotate.format_doc(indent=3)
@@ -823,13 +826,15 @@ class CNVkitPipeline(ProcGroup):
             else:
                 tumor_masks = metadf[self.col.group] == self.opts.case
+            if self.col.sex in metadf:
+                all_sex = metadf[self.col.sex][tumor_masks].unique()
+                sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
+            else:
+                sample_sex = [None]
             return tibble(
                 segfiles=[ch2.outfile.tolist()],
-                sample_sex=(
-                    ",".join(metadf[self.col.sex][tumor_masks])
-                    if self.col.sex in metadf
-                    else [None]
-                ),
+                sample_sex=sample_sex,
             )
         @annotate.format_doc(indent=3)

biopipen/ns/gene.py CHANGED Viewed

@@ -9,46 +9,91 @@ class GeneNameConversion(Proc):
     Input:
         infile: The input file with original gene names
+            It should be a tab-separated file with header
     Output:
         outfile: The output file with converted gene names
     Envs:
-        inopts: Options to read `in.infile` for `pandas.read_csv()`
-            See https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
-        outopts: Options to write `out.outfile` for `pandas.to_csv()`
-            See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
-        notfound: What to do if a conversion cannot be done.
-            use-query: Ignore the conversion and use the original name
-            skip: Ignore the conversion and skip the entire row in input file
-            error: Report error
-        genecol: The index (0-based) or name of the column where
-            genes are present
-        output: How to output
-            keep: Keep the original name column and add new converted columns
-            drop: Drop the original name column, and add the converted names
-            replace: Drop the original name column, and insert
-                the converted names at the original position
-            only: Only keep the query and the converted name columns
+        notfound (choice): What to do if a conversion cannot be done.
+            - use-query: Ignore the conversion and use the original name
+            - skip: Ignore the conversion and skip the entire row in input file
+            - ignore: Same as skip
+            - error: Report error
+            - na: Use NA
+        dup (choice): What to do if a conversion results in multiple names.
+            - first: Use the first name, sorted by matching score descendingly (default)
+            - last: Use the last name, sorted by matching score descendingly
+            - combine: Combine all names using `;` as separator
+        genecol: The index (1-based) or name of the column where genes are present
+        output (choice): How to output.
+            - append: Add the converted names as new columns at the end using `envs.outfmt`
+                as the column name.
+            - replace: Drop the original name column, and insert
+                the converted names at the original position.
+            - converted: Only keep the converted names.
+            - with-query: Output 2 columns with original and converted names.
         infmt: What's the original gene name format
             Available fields
             https://docs.mygene.info/en/latest/doc/query_service.html#available-fields
-        outfmt: What's the target gene name format
+        outfmt: What's the target gene name format. Currently only a single format
+            is supported.
         species: Limit gene query to certain species.
             Supported: human, mouse, rat, fruitfly, nematode, zebrafish,
             thale-cress, frog and pig
     """  # noqa: E501
     input = "infile:file"
     output = "outfile:file:{{in.infile | basename}}"
-    lang = config.lang.python
+    lang = config.lang.rscript
     envs = {
-        "inopts": {"sep": "\t", "index_col": False},
-        "outopts": {"sep": "\t", "index": False},
         "notfound": "error",
-        "genecol": 0,
-        "output": "keep",
+        "genecol": 1,
+        "dup": "first",
+        "output": "append",
         "infmt": ["symbol", "alias"],
         "outfmt": "symbol",
         "species": "human",
     }
-    script = "file://../scripts/gene/GeneNameConversion.py"
+    script = "file://../scripts/gene/GeneNameConversion.R"
+class GenePromoters(Proc):
+    """Get gene promoter regions by specifying the flanking regions of TSS
+    Input:
+        infile: The input file with gene ids/names
+    Output:
+        outfile: The output file with promoter regions in BED format
+    Envs:
+        up (type=int): The upstream distance from TSS
+        down (type=int): The downstream distance from TSS
+            If not specified, the default is `envs.up`
+        notfound (choice): What to do if a gene is not found.
+            - skip: Skip the gene
+            - error: Report error
+        refgene: The reference gene annotation file in GTF format
+        header (flag): Whether the input file has a header
+        genecol (type=int): The index (1-based) of the gene column
+        match_id (flag): Should we match the genes in `in.infile` by `gene_id`
+            instead of `gene_name` in `envs.refgene`
+        sort (flag): Sort the output by chromosome and start position
+        chrsize: The chromosome size file, from which the chromosome order is
+            used to sort the output
+    """
+    input = "infile:file"
+    output = "outfile:file:{{in.infile | stem}}-promoters.bed"
+    lang = config.lang.rscript
+    envs = {
+        "up": 2000,
+        "down": None,
+        "notfound": "error",
+        "refgene": config.ref.refgene,
+        "header": True,
+        "genecol": 1,
+        "match_id": False,
+        "sort": False,
+        "chrsize": config.ref.chrsize,
+    }
+    script = "file://../scripts/gene/GenePromoters.R"

biopipen/ns/misc.py CHANGED Viewed

@@ -80,7 +80,7 @@ class Str2File(Proc):
         name: The name of the output file
     """
     input = "str, name"
-    output = "outfile:file:{{in.name}}"
+    output = "outfile:file:{{in.name | default: 'unnamed.txt'}}"
     lang = config.lang.python
     envs = {"name": None}
     script = "file://../scripts/misc/Str2File.py"
@@ -105,17 +105,4 @@ class Shell(Proc):
     output = "outfile:file:{{in.infile | basename}}"
     envs = {"cmd": "", "outdir": False}
     lang = config.lang.bash
-    script = """
-        infile={{in.infile | quote}}
-        outfile={{out.outfile | quote}}
-        is_outdir={{envs.outdir | int}}
-        cmd={{envs.cmd | quote}}
-        if [[ -z "$cmd" ]]; then
-            echo "No command given." 1>&2
-            exit 1
-        fi
-        if [[ $is_outdir -eq 1 ]]; then
-            mkdir -p "$outfile"
-        fi
-        eval "$cmd"
-    """
+    script = "file://../scripts/misc/Shell.sh"

biopipen/ns/plot.py CHANGED Viewed

@@ -150,3 +150,207 @@ class ROC(Proc):
         "show_auc": True,
     }
     script = "file://../scripts/plot/ROC.R"
+class Manhattan(Proc):
+    """Plot Manhattan plot.
+    Using the [`ggmanh`](https://bioconductor.org/packages/devel/bioc/vignettes/ggmanh/inst/doc/ggmanh.html) package.
+    Requires `ggmanh` v1.9.6 or later.
+    Input:
+        infile: The input file for data
+            It should contain at least three columns, the chromosome, the position
+            and the p-value of the SNPs.
+            Header is required.
+    Output:
+        outfile: The output figure file
+    Envs:
+        chrom_col: The column for chromosome
+            An integer (1-based) or a string indicating the column name.
+        pos_col: The column for position
+            An integer (1-based) or a string indicating the column name.
+        pval_col: The column for p-value
+            An integer (1-based) or a string indicating the column name.
+        label_col: The column for label.
+            Once specified, the significant SNPs will be labeled on the plot.
+        devpars (ns): The parameters for `png()`
+            - res (type=int): The resolution
+            - width (type=int): The width
+            - height (type=int): The height
+        title: The title of the plot
+        ylabel: The y-axis label
+        rescale (flag): Whether to rescale the p-values
+        rescale_ratio_threshold (type=float): Threshold of that triggers the rescale
+        signif (auto): A single value or a list of values to indicate the significance levels
+            Multiple values should be also separated by comma (`,`).
+            The minimum value will be used as the cutoff to determine if the SNPs are significant.
+        hicolors (auto): The colors for significant and non-significant SNPs
+            If a single color is given, the non-significant SNPs will be in grey.
+            Set it to None to disable the highlighting.
+        thin_n (type=int): Number of max points per horizontal partitions of the plot.
+            `0` or `None` to disable thinning.
+        thin_bins (type=int): Number of bins to partition the data.
+        zoom (auto): Chromosomes to zoom in
+            Each chromosome should be separated by comma (`,`) or in a list. Single chromosome is also accepted.
+            Ranges are also accepted, see `envs.chroms`.
+            Each chromosome will be saved in a separate file.
+        zoom_devpars (ns): The parameters for the zoomed plot
+            - width (type=int): The width
+            - height (type=int): The height, inherited from `devpars` by default
+            - res (type=int): The resolution, inherited from `devpars` by default
+        chroms (auto): The chromosomes and order to plot
+            A hyphen (`-`) can be used to indicate a range.
+            For example `chr1-22,chrX,chrY,chrM` will plot all autosomes, X, Y and M.
+            if `auto`, only the chromosomes in the data will be plotted in the order
+            they appear in the data.
+        args (ns): Additional arguments for `manhattan_plot()`.
+            See <https://rdrr.io/github/leejs-abv/ggmanh/man/manhattan_plot.html>.
+            Note that `-` will be replaced by `.` in the argument names.
+            - <more>: Additional arguments for `manhattan_plot()`
+    """  # noqa: E501
+    input = "infile:file"
+    output = "outfile:file:{{in.infile | stem0}}.manhattan.png"
+    lang = config.lang.rscript
+    envs = {
+        "chrom_col": 1,
+        "pos_col": 2,
+        "pval_col": 3,
+        "label_col": None,
+        "devpars": {"res": 100, "width": 1000, "height": 500},
+        "zoom_devpars": {"width": 500, "height": None, "res": None},
+        "title": None,
+        "ylabel": "-log10(p-value)",
+        "rescale": True,
+        "rescale_ratio_threshold": 5,
+        "signif": [5e-8, 1e-5],
+        "hicolors": None,
+        "thin_n": None,
+        "thin_bins": 200,
+        "zoom": None,
+        "chroms": "auto",
+        "args": {},
+    }
+    script = "file://../scripts/plot/Manhattan.R"
+class QQPlot(Proc):
+    """Generate QQ-plot or PP-plot using qqplotr.
+    See <https://cran.r-project.org/web/packages/qqplotr/vignettes/introduction.html>.
+    Input:
+        infile: The input file for data
+            It should contain at least one column of p-values or the values to be
+            plotted. Header is required.
+        theorfile: The file for theoretical values (optional)
+            This file should contain at least one column of theoretical values.
+            The values will be passed to `envs.theor_qfunc` to calculate the theoretical
+            quantiles.
+            Header is required.
+    Output:
+        outfile: The output figure file
+    Envs:
+        val_col: The column for values to be plotted
+            An integer (1-based) or a string indicating the column name.
+        devpars (ns): The parameters for `png()`
+            - res (type=int): The resolution
+            - width (type=int): The width
+            - height (type=int): The height
+        xlabel: The x-axis label
+        ylabel: The y-axis label
+        title: The title of the plot
+        trans: The transformation of the values
+            You can use `-log10` to transform the values to `-log10(values)`.
+            Otherwise you can a direct R function or a custom R function.
+            For example `function(x) -log10(x)`.
+        kind (choice): The kind of the plot, `qq` or `pp`
+            - qq: QQ-plot
+            - pp: PP-plot
+        theor_col: The column for theoretical values in `in.theorfile` if provided,
+            otherwise in `in.infile`.
+            An integer (1-based) or a string indicating the column name.
+            If `distribution` of `band`, `line`, or `point` is `custom`, this column
+            must be provided.
+        theor_trans: The transformation of the theoretical values.
+            The `theor_funs` have default functions to take the theoretical values.
+            This transformation will be applied to the theoretical values before
+            passing to the `theor_funs`.
+        theor_funs (ns): The R functions to generate density, quantile and deviates
+            of the theoretical distribution base on the theoretical values
+            if `distribution` of `band`, `line`, or `point` is `custom`.
+            - dcustom: The density function, used by band
+            - qcustom: The quantile function, used by point
+            - rcustom: The deviates function, used by line
+        args (ns): The common arguments for `envs.band`, `envs.line` and `envs.point`.
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Other shared arguments between `stat_*_band`, `stat_*_line`
+                and `stat_*_point`.
+        band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`.
+            See <https://rdrr.io/cran/qqplotr/man/stat_qq_band.html> and
+            <https://rdrr.io/cran/qqplotr/man/stat_pp_band.html>.
+            Set to `None` or `band.disabled` to True to disable the band.
+            - disabled (flag): Disable the band
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Additional arguments for `stat_qq_band()` or `stat_pp_band()`
+        line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`.
+            See <https://rdrr.io/cran/qqplot/man/stat_qq_line.html> and
+            <https://rdrr.io/cran/qqplot/man/stat_pp_line.html>.
+            Set to `None` or `line.disabled` to True to disable the line.
+            - disabled (flag): Disable the line
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Additional arguments for `stat_qq_line()` or `stat_pp_line()`
+        point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`.
+            See <https://rdrr.io/cran/qqplot/man/stat_qq_point.html> and
+            <https://rdrr.io/cran/qqplot/man/stat_pp_point.html>.
+            Set to `None` or `point.disabled` to True to disable the point.
+            - disabled (flag): Disable the point
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Additional arguments for `geom_qq_point()` or `geom_pp_point()`
+        ggs (list): Additional ggplot expression to adjust the plot.
+    """
+    input = "infile:file, theorfile:file"
+    output = "outfile:file:{{in.infile | stem}}.{{envs.kind}}.png"
+    lang = config.lang.rscript
+    envs = {
+        "val_col": 1,
+        "theor_col": None,
+        "theor_trans": None,
+        "theor_funs": {
+            "dcustom": """
+              function(x, values, ...) {
+                density(values, from = min(values), to = max(values), n = length(x))$y
+              }
+            """,
+            "qcustom": "function(p, values, ...) {quantile(values, probs = p)}",
+            "rcustom": "function(n, values, ...) { sample(values, n, replace = TRUE) }",
+        },
+        "args": {"distribution": "norm", "dparams": {}},
+        "devpars": {"res": 100, "width": 1000, "height": 1000},
+        "xlabel": "Theoretical Quantiles",
+        "ylabel": "Observed Quantiles",
+        "title": "QQ-plot",
+        "trans": None,
+        "kind": "qq",
+        "band": {"disabled": False, "distribution": None, "dparams": None},
+        "line": {"disabled": False, "distribution": None, "dparams": None},
+        "point": {"disabled": False, "distribution": None, "dparams": None},
+        "ggs": None,
+    }
+    script = "file://../scripts/plot/QQPlot.R"

biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.28.1py3-none-any.whl → 0.29.1py3-none-any.whl