episodic 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. episodic/__about__.py +4 -0
  2. episodic/__init__.py +3 -0
  3. episodic/__main__.py +9 -0
  4. episodic/cli/__init__.py +8 -0
  5. episodic/snk.yaml +125 -0
  6. episodic/workflow/Snakefile +107 -0
  7. episodic/workflow/envs/arviz.yml +12 -0
  8. episodic/workflow/envs/beast.yml +6 -0
  9. episodic/workflow/envs/ggtree.yml +11 -0
  10. episodic/workflow/envs/phylo.yml +12 -0
  11. episodic/workflow/envs/plot_traces.yml +11 -0
  12. episodic/workflow/envs/python.yml +12 -0
  13. episodic/workflow/profiles/slurm/config.yaml +36 -0
  14. episodic/workflow/profiles/slurm/slurm-cancle +3 -0
  15. episodic/workflow/rules/beast.smk +76 -0
  16. episodic/workflow/rules/config.smk +10 -0
  17. episodic/workflow/rules/report.smk +95 -0
  18. episodic/workflow/rules/tree.smk +68 -0
  19. episodic/workflow/scripts/arviz_output.py +166 -0
  20. episodic/workflow/scripts/calculate_odds.py +72 -0
  21. episodic/workflow/scripts/densitree.R +50 -0
  22. episodic/workflow/scripts/extract_mle.py +91 -0
  23. episodic/workflow/scripts/phylo_rate_quantile_analysis.py +103 -0
  24. episodic/workflow/scripts/plotMCCtree.R +48 -0
  25. episodic/workflow/scripts/plot_traces.py +184 -0
  26. episodic/workflow/scripts/populate_beast_template.py +168 -0
  27. episodic/workflow/scripts/tree_converter.py +49 -0
  28. episodic/workflow/templates/beast_xml_template.jinja +514 -0
  29. episodic/workflow/utils.py +12 -0
  30. episodic-0.0.1.dist-info/METADATA +45 -0
  31. episodic-0.0.1.dist-info/RECORD +34 -0
  32. episodic-0.0.1.dist-info/WHEEL +4 -0
  33. episodic-0.0.1.dist-info/entry_points.txt +2 -0
  34. episodic-0.0.1.dist-info/licenses/LICENSE.txt +9 -0
episodic/__about__.py ADDED
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: 2023-present Wytamma Wirth <wytamma.wirth@me.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ __version__ = "0.0.1"
episodic/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2023-present Wytamma Wirth <wytamma.wirth@me.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
episodic/__main__.py ADDED
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: 2023-present Wytamma Wirth <wytamma.wirth@me.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import sys
5
+
6
+ if __name__ == "__main__":
7
+ from episodic.cli import episodic
8
+
9
+ sys.exit(episodic())
@@ -0,0 +1,8 @@
1
+ # SPDX-FileCopyrightText: 2023-present Wytamma Wirth <wytamma.wirth@me.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ from pathlib import Path
5
+
6
+ from snk.cli import CLI
7
+
8
+ episodic = CLI(pipeline_dir_path = Path(__file__).parent.parent)
episodic/snk.yaml ADDED
@@ -0,0 +1,125 @@
1
+ logo: Episodic
2
+ tagline: A pipeline for fitting and testing Fixed Local Clock (FLC) molecular clock models for episodic evolution.
3
+ annotations:
4
+ alignment:
5
+ type: Path
6
+ help: "Path to a fasta file containing the alignment."
7
+ required: true
8
+ group:
9
+ type: List[str]
10
+ help: "Group sequences containing this value (in the header) to define a FLC. Can specify multiple."
11
+ required: true
12
+ date_delimiter:
13
+ type: str
14
+ help: "Delimiter to use to split the date from the rest of the header."
15
+ required: false
16
+ default: '"|"'
17
+ date_index:
18
+ type: int
19
+ help: "Index of the date in the header. 0-based."
20
+ required: false
21
+ default: -1
22
+ newick:
23
+ type: Path
24
+ help: "Path to a newick tree file. If provided topology will be fixed."
25
+ required: false
26
+ clock:
27
+ type: List[str]
28
+ help: "Clock models to test. Options are 'strict', 'relaxed', and 'flc-stem'. Can specify multiple."
29
+ required: false
30
+ default: ['strict', 'relaxed', 'flc-stem']
31
+ chain_length:
32
+ type: int
33
+ help: "Length of the chain."
34
+ required: false
35
+ default: 10000000
36
+ samples:
37
+ type: int
38
+ help: "Number of samples to draw from the chain."
39
+ required: false
40
+ default: 10000
41
+ rate_gamma_prior_shape:
42
+ type: float
43
+ help: "Shape parameter for the gamma prior on the rate."
44
+ required: false
45
+ default: 0.5
46
+ rate_gamma_prior_scale:
47
+ type: float
48
+ help: "Scale parameter for the gamma prior on the rate."
49
+ required: false
50
+ default: 0.1
51
+ outdir:
52
+ type: Path
53
+ help: "Path to the output directory."
54
+ required: false
55
+ default: results
56
+ dated:
57
+ type: bool
58
+ help: Create a timestamped output subdirectory
59
+ required: false
60
+ default: false
61
+ duplicates:
62
+ type: int
63
+ help: "Number of duplicate runs. To test for convergence."
64
+ required: false
65
+ default: 2
66
+ trees:
67
+ type: bool
68
+ help: "If false, the trees will not be saved."
69
+ required: false
70
+ default: true
71
+ mcc_tree:
72
+ heights:
73
+ type: List[str]
74
+ help: "Height to use for the MCC tree. Can specify multiple. 'mean' (default), 'median', 'keep' or 'ca'"
75
+ required: false
76
+ default: ['mean']
77
+ marginal_likelihood:
78
+ estimate:
79
+ type: bool
80
+ help: "If true, the PS/SS will be used to estimate the marginal likelihood."
81
+ required: false
82
+ default: True
83
+ paths:
84
+ type: int
85
+ help: "Number of paths to use for the marginal likelihood estimation."
86
+ required: false
87
+ default: 100
88
+ chain_length:
89
+ type: int
90
+ help: "Length of the chain for the marginal likelihood estimation."
91
+ required: false
92
+ default: 1000000
93
+ duplicates:
94
+ type: int
95
+ help: "Number of duplicate MLE runs."
96
+ required: false
97
+ default: 3
98
+ fit_clocks:
99
+ type: bool
100
+ help: "If true, BEAST will be used to fit the clocks. Use --no-fit-clocks to only run the MLE analysis."
101
+ required: false
102
+ default: true
103
+ beast:
104
+ threads:
105
+ type: int
106
+ help: "Number of threads to use for BEAST."
107
+ required: false
108
+ default: 4
109
+ resources:
110
+ runtime:
111
+ help: "Runtime in minutes."
112
+ default: 10080 # 7 days
113
+ mem_mb:
114
+ help: "Memory to request."
115
+ default: 400M
116
+ partition:
117
+ help: "Partition to submit to."
118
+ default: gpu-a100
119
+ gres:
120
+ help: "GPU resource to request."
121
+ default: gpu:1
122
+ envmodules:
123
+ default:
124
+ - "GCC/11.3.0"
125
+ - "beagle-lib/3.1.2-CUDA-11.7.0"
@@ -0,0 +1,107 @@
1
+ from utils import decimal_year_to_date
2
+ from scripts.populate_beast_template import taxa_from_fasta
3
+
4
+
5
+ SNAKE_DIR = Path(workflow.basedir)
6
+ TEMPLATE_DIR = SNAKE_DIR / "templates"
7
+ SCRIPT_DIR = SNAKE_DIR / "scripts"
8
+
9
+ OUT_DIR=Path(config["outdir"])
10
+ if config["dated"]:
11
+ # create timestamped output directory
12
+ from datetime import datetime
13
+ OUT_DIR = OUT_DIR / datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
14
+ CLOCK_DIR = OUT_DIR / "clocks"
15
+
16
+ MLE: bool = config["marginal_likelihood"].get("estimate")
17
+ fit_clocks: bool = config["fit_clocks"]
18
+ rate_gamma_prior_scale=config["rate_gamma_prior_scale"]
19
+ rate_gamma_prior_shape=config["rate_gamma_prior_shape"]
20
+ clocks = expand("{clock}_{rate_gamma_prior_shape}_{rate_gamma_prior_scale}", clock=config["clock"], rate_gamma_prior_shape=rate_gamma_prior_shape, rate_gamma_prior_scale=rate_gamma_prior_scale)
21
+ flc_clocks = [c for c in clocks if "flc" in c]
22
+ relaxed_clocks = [c for c in clocks if "relaxed" in c]
23
+ alignment_path=config["alignment"]
24
+ date_delimiter=config["date_delimiter"]
25
+ date_index=config["date_index"]
26
+
27
+ duplicates = range(1, config["duplicates"] + 1) if fit_clocks else []
28
+ mle_duplicates = range(1, config["marginal_likelihood"].get("duplicates") + 1) if MLE else []
29
+
30
+ ALL_LOG_FILES = expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.log", clock=clocks, duplicate=duplicates)
31
+ PER_CLOCK_LOG_FILES = lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates]
32
+
33
+ TAXA = taxa_from_fasta(
34
+ alignment_path,
35
+ date_delimiter=date_delimiter,
36
+ date_index=date_index,
37
+ )
38
+
39
+ most_recent_sampling_date = decimal_year_to_date(min(TAXA, key=lambda taxa: taxa.date).date)
40
+
41
+ print(f"Running Episodic with {len(TAXA)} taxa")
42
+ print(f"Most recent sampling date: {most_recent_sampling_date}")
43
+
44
+ include: "rules/beast.smk"
45
+ include: "rules/config.smk"
46
+ include: "rules/report.smk"
47
+ include: "rules/tree.smk"
48
+
49
+
50
+
51
+
52
+ CLOCK_FILES = []
53
+
54
+ CLOCK_FILES.extend(
55
+ [
56
+ expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}_trace_plots", clock=clocks, duplicate=duplicates),
57
+ expand(CLOCK_DIR / "{clock}" / "{clock}-summary.csv", clock=clocks),
58
+ ]
59
+ )
60
+ CLOCK_FILES.extend(
61
+ [CLOCK_DIR / f"{clock}" / f"{clock}-violin.svg" for clock in flc_clocks],
62
+ )
63
+ CLOCK_FILES.extend(
64
+ [CLOCK_DIR / f"{clock}" / f"{clock}-odds.csv" for clock in flc_clocks],
65
+ )
66
+ CLOCK_FILES.extend(
67
+ [CLOCK_DIR / "clocks-violin.svg",
68
+ CLOCK_DIR / "clocks-trace.svg",]
69
+ )
70
+ if config.get("trees"):
71
+ CLOCK_FILES.extend(
72
+ expand(
73
+ CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.mcc.{heights}.{ext}",
74
+ clock=clocks,
75
+ duplicate=duplicates,
76
+ heights=config["mcc_tree"].get("heights", "mean"),
77
+ ext=["nwk", "svg"]
78
+ ),
79
+ )
80
+ CLOCK_FILES.extend(
81
+ expand(
82
+ CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.rate_quantiles.{ext}",
83
+ clock=clocks,
84
+ duplicate=duplicates,
85
+ ext=["csv", "svg"]
86
+ )
87
+ )
88
+
89
+
90
+
91
+ OUTPUT_FILES = [
92
+ OUT_DIR / "config.yaml",
93
+ ]
94
+
95
+ if fit_clocks:
96
+ OUTPUT_FILES.extend(
97
+ CLOCK_FILES
98
+ )
99
+
100
+ if MLE:
101
+ OUTPUT_FILES.append(
102
+ OUT_DIR / "mle" / "mle.svg"
103
+ )
104
+
105
+ rule all:
106
+ input:
107
+ *OUTPUT_FILES
@@ -0,0 +1,12 @@
1
+ name: arviz
2
+ channels:
3
+ - conda-forge
4
+ dependencies:
5
+ - python=3.9
6
+ - pip
7
+ - pip:
8
+ - arviz
9
+ - pandas
10
+ - typer
11
+ - bokeh
12
+ - holoviews
@@ -0,0 +1,6 @@
1
+ channels:
2
+ - bioconda
3
+ - conda-forge
4
+ dependencies:
5
+ - beast==1.10.4
6
+ - beagle-lib<4
@@ -0,0 +1,11 @@
1
+ name: ggtree
2
+ channels:
3
+ - conda-forge
4
+ - bioconda
5
+ - defaults
6
+ dependencies:
7
+ - r-base
8
+ - bioconductor-treeio
9
+ - bioconductor-ggtree
10
+ - r-optparse
11
+ - r-svglite
@@ -0,0 +1,12 @@
1
+ name: phylo
2
+ channels:
3
+ - conda-forge
4
+ dependencies:
5
+ - python=3.9
6
+ - pip
7
+ - pip:
8
+ - dendropy
9
+ - numpy
10
+ - typer
11
+ - scipy
12
+ - matplotlib
@@ -0,0 +1,11 @@
1
+ channels:
2
+ - conda-forge
3
+ dependencies:
4
+ - python=3.9
5
+ - numpy
6
+ - typer
7
+ - pandas
8
+ - plotly
9
+ - pip
10
+ - pip:
11
+ - kaleido
@@ -0,0 +1,12 @@
1
+ channels:
2
+ - conda-forge
3
+ - bioconda
4
+ - defaults
5
+ dependencies:
6
+ - python==3.9
7
+ - pip
8
+ - pip:
9
+ - pandas
10
+ - seaborn
11
+ - matplotlib
12
+ - typer
@@ -0,0 +1,36 @@
1
+ # Note that we use `threads` here as SLURM `--cpus-per-task`.
2
+ cluster: >-
3
+ mkdir -p logs && sbatch
4
+ $(if [[ '{resources.account}' ]]; then echo '-A {resources.account}'; fi)
5
+ $(if [[ '{resources.partition}' ]]; then echo '-p {resources.partition}'; fi)
6
+ --parsable
7
+ --time={resources.runtime}
8
+ --mem={resources.mem_mb}
9
+ -N {resources.nodes}
10
+ -n {resources.tasks_per_node}
11
+ -c {threads}
12
+ -o logs/{rule}-{wildcards}.out -e logs/{rule}-{wildcards}.err
13
+ $(if [[ '{resources.qos}' ]]; then echo '-q {resources.qos}'; fi)
14
+ $(if [[ '{resources.gres}' ]]; then echo '--gres={resources.gres}'; fi)
15
+ {resources.extra}
16
+ default-resources:
17
+ - account=''
18
+ - partition=''
19
+ - runtime=15
20
+ - mem_mb=4000
21
+ - nodes=1
22
+ - tasks_per_node=1
23
+ - qos='covid19'
24
+ - gres=''
25
+ - extra=''
26
+ cluster-cancel: "scancel {jobid}"
27
+ jobs: 50
28
+ use-conda: true
29
+ use-envmodules: true
30
+ printshellcmds: true
31
+ rerun-incomplete: true
32
+ keep-going: true
33
+ local-cores: 1
34
+ max-jobs-per-second: 10
35
+ max-status-checks-per-second: 1
36
+ latency-wait: 30
@@ -0,0 +1,3 @@
1
+ #!/bin/sh
2
+
3
+ scancel $(echo $* | grep -Eo '[0-9]+')
@@ -0,0 +1,76 @@
1
+
2
+ rule create_beast_xml:
3
+ input:
4
+ alignment = config["alignment"],
5
+ output:
6
+ beast_XML_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.xml",
7
+ params:
8
+ template = TEMPLATE_DIR / "beast_xml_template.jinja",
9
+ date_delimiter = "\|" if config.get("date_delimiter") == "|" else config.get("date_delimiter"),
10
+ date_index = config.get("date_index", -1),
11
+ groups = " ".join(config.get("group")),
12
+ clock = lambda wildcards: wildcards.clock.split("_")[0],
13
+ rate_gamma_prior_shape = config.get("rate_gamma_prior_shape"),
14
+ rate_gamma_prior_scale = config.get("rate_gamma_prior_scale"),
15
+ chain_length = config.get("chain_length"),
16
+ samples = config.get("samples"),
17
+ mle = lambda wildcards: "--mle" if "mle" in wildcards.name else "",
18
+ mle_chain_length = f"--mle-chain-length {config['marginal_likelihood'].get('chain_length')}",
19
+ mle_path_steps = f"--mle-path-steps {config['marginal_likelihood'].get('paths')}",
20
+ no_trace = lambda wildcards: "--no-trace" if "mle" in wildcards.name else "",
21
+ no_trees = lambda wildcards: "--no-trees" if not config.get("trees") or "mle" in wildcards.name else "",
22
+ fixed_tree = f'--fixed-tree {config.get("fixed_tree")}' if config.get("fixed_tree") else "",
23
+ shell:
24
+ """
25
+ python {SCRIPT_DIR}/populate_beast_template.py \
26
+ {params.template} \
27
+ --output {output.beast_XML_file} \
28
+ --alignment {input.alignment} \
29
+ --date-delimiter {params.date_delimiter} \
30
+ --date-index {params.date_index} \
31
+ --groups {params.groups} \
32
+ --clock {params.clock} \
33
+ --rate-gamma-prior-shape {params.rate_gamma_prior_shape} \
34
+ --rate-gamma-prior-scale {params.rate_gamma_prior_scale} \
35
+ --chain-length {params.chain_length} \
36
+ --samples {params.samples} \
37
+ {params.mle} \
38
+ {params.mle_chain_length} \
39
+ {params.mle_path_steps} \
40
+ {params.no_trace} \
41
+ {params.no_trees} \
42
+ {params.fixed_tree}
43
+ """
44
+
45
+ TREES = {"beast_trees_file": CLOCK_DIR / "{clock}" / "{name}" / "{name}.trees"} if config.get("trees") else {}
46
+
47
+ rule run_beast:
48
+ input:
49
+ beast_XML_file = rules.create_beast_xml.output.beast_XML_file,
50
+ output:
51
+ beast_stdout_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.stdout",
52
+ beast_log_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.log",
53
+ **TREES,
54
+ threads: config["beast"].get("threads")
55
+ resources:
56
+ **config["beast"].get("resources", {}),
57
+ envmodules:
58
+ *config["beast"].get("envmodules", []),
59
+ conda:
60
+ "../envs/beast.yml"
61
+ shell:
62
+ """
63
+ beast -working -overwrite -beagle_GPU -threads {threads} {input.beast_XML_file} > {output.beast_stdout_file}
64
+ """
65
+
66
+ MLE_OUT_DIR = OUT_DIR / "mle" / "{clock}"
67
+
68
+ use rule create_beast_xml as create_mle_xml with:
69
+ output:
70
+ beast_XML_file = MLE_OUT_DIR / "{name}.xml",
71
+
72
+ use rule run_beast as run_mle_beast with:
73
+ input:
74
+ beast_XML_file = rules.create_mle_xml.output.beast_XML_file,
75
+ output:
76
+ beast_stdout_file = MLE_OUT_DIR / "{name}.stdout",
@@ -0,0 +1,10 @@
1
+ # convert config to yaml
2
+ import yaml
3
+
4
+ rule write_config_to_file:
5
+ output: OUT_DIR / "config.yaml"
6
+ params: yaml_config=yaml.dump(config)
7
+ shell:
8
+ """
9
+ echo "{params.yaml_config}" > {output}
10
+ """
@@ -0,0 +1,95 @@
1
+ MLE_DIR = OUT_DIR / "mle"
2
+
3
+ rule extract_mle:
4
+ input:
5
+ expand(OUT_DIR / "mle" / "{clock}" / "{clock}_mle_{duplicate}.stdout", clock=clocks, duplicate=mle_duplicates),
6
+ output:
7
+ MLE_DIR / "mle.svg",
8
+ conda:
9
+ "../envs/python.yml"
10
+ shell:
11
+ """
12
+ python {SCRIPT_DIR}/extract_mle.py {MLE_DIR}
13
+ """
14
+
15
+ rule plot_traces:
16
+ """
17
+ Makes trace plots from the beast log file.
18
+ """
19
+ input:
20
+ rules.run_beast.output.beast_log_file,
21
+ output:
22
+ directory(CLOCK_DIR / "{clock}" / "{name}" / "{name}_trace_plots/"),
23
+ conda:
24
+ "../envs/plot_traces.yml"
25
+ shell:
26
+ """
27
+ ${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/plot_traces.py {input} {output}
28
+ """
29
+
30
+ rule summary:
31
+ """
32
+ Makes combined summarys from the beast log file.
33
+ """
34
+ input:
35
+ PER_CLOCK_LOG_FILES,
36
+ output:
37
+ posterior_svg=CLOCK_DIR / "{clock}" / "{clock}-summary.csv",
38
+ params:
39
+ output=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}" / f"{wildcards.clock}-summary.csv",
40
+ conda:
41
+ "../envs/arviz.yml"
42
+ shell:
43
+ """
44
+ ${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/arviz_output.py summary {input} {params.output}
45
+ """
46
+
47
+ rule plot_flc_rates:
48
+ """
49
+ Makes plots from the flc clock files.
50
+ """
51
+ input:
52
+ lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates if "flc" in wildcards.clock],
53
+ output:
54
+ rate_svg=CLOCK_DIR / "{clock}" / "{clock}-violin.svg"
55
+ params:
56
+ directory=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}",
57
+ gamma_shape=rate_gamma_prior_shape,
58
+ gamma_scale=rate_gamma_prior_scale,
59
+ conda:
60
+ "../envs/arviz.yml"
61
+ shell:
62
+ """
63
+ ${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/arviz_output.py rates {input} {params.directory} --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
64
+ """
65
+
66
+ use rule plot_flc_rates as plot_rates with:
67
+ input:
68
+ expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.log", clock=clocks, duplicate=duplicates),
69
+ output:
70
+ clocks_violin=CLOCK_DIR / "clocks-violin.svg",
71
+ clocks_trace=CLOCK_DIR / "clocks-trace.svg",
72
+ params:
73
+ directory=CLOCK_DIR,
74
+ gamma_shape=rate_gamma_prior_shape,
75
+ gamma_scale=rate_gamma_prior_scale,
76
+
77
+ rule calculate_odds:
78
+ input:
79
+ lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates if "flc" in wildcards.clock],
80
+ output:
81
+ rate_svg=CLOCK_DIR / "{clock}" / "{clock}-odds.csv"
82
+ params:
83
+ directory=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}",
84
+ gamma_shape=rate_gamma_prior_shape,
85
+ gamma_scale=rate_gamma_prior_scale,
86
+ conda:
87
+ "../envs/python.yml"
88
+ shell:
89
+ """
90
+ ${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/calculate_odds.py {input} {output} --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
91
+ for file in {input}
92
+ do
93
+ ${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/calculate_odds.py $file ${{file%.log}}-odds.csv --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
94
+ done
95
+ """
@@ -0,0 +1,68 @@
1
+
2
+ rule max_clade_credibility_tree:
3
+ """
4
+ Makes trace plots from the beast log file.
5
+ """
6
+ input:
7
+ rules.run_beast.output.beast_trees_file,
8
+ output:
9
+ CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.nexus",
10
+ params:
11
+ burnin = int(int(config['samples']) * 0.1),
12
+ conda:
13
+ "../envs/beast.yml"
14
+ shell:
15
+ """
16
+ treeannotator -burninTrees {params.burnin} -heights {wildcards.heights} {input} {output}
17
+ """
18
+
19
+
20
+ rule max_clade_credibility_tree_newick:
21
+ """
22
+ Makes trace plots from the beast log file.
23
+ """
24
+ input:
25
+ rules.max_clade_credibility_tree.output,
26
+ output:
27
+ CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.nwk",
28
+ conda:
29
+ "../envs/phylo.yml"
30
+ shell:
31
+ "${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/tree_converter.py {input} {output} --node-label posterior"
32
+
33
+
34
+ rule max_clade_credibility_tree_render:
35
+ """
36
+ Renders the MCC tree in SVG format.
37
+ """
38
+ input:
39
+ rules.max_clade_credibility_tree.output,
40
+ output:
41
+ CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.svg",
42
+ params:
43
+ mrsd = most_recent_sampling_date,
44
+ conda:
45
+ "../envs/ggtree.yml"
46
+ shell:
47
+ "${{CONDA_PREFIX}}/bin/RScript {SCRIPT_DIR}/plotMCCtree.R --input {input} --output {output} --mrsd {params.mrsd}"
48
+
49
+
50
+ rule rate_quantile_analysis:
51
+ input:
52
+ rules.run_beast.output.beast_trees_file,
53
+ output:
54
+ csv = CLOCK_DIR / "{clock}" / "{name}" / "{name}.rate_quantiles.csv",
55
+ svg = CLOCK_DIR / "{clock}" / "{name}" / "{name}.rate_quantiles.svg",
56
+ params:
57
+ groups = " ".join(f"-g {group}" for group in config['group']),
58
+ conda:
59
+ "../envs/phylo.yml"
60
+ shell:
61
+ """
62
+ python {SCRIPT_DIR}/phylo_rate_quantile_analysis.py \
63
+ {input} \
64
+ --output-csv {output.csv} \
65
+ --output-plot {output.svg} \
66
+ {params.groups}
67
+ """
68
+