episodic 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- episodic/__about__.py +4 -0
- episodic/__init__.py +3 -0
- episodic/__main__.py +9 -0
- episodic/cli/__init__.py +8 -0
- episodic/snk.yaml +125 -0
- episodic/workflow/Snakefile +107 -0
- episodic/workflow/envs/arviz.yml +12 -0
- episodic/workflow/envs/beast.yml +6 -0
- episodic/workflow/envs/ggtree.yml +11 -0
- episodic/workflow/envs/phylo.yml +12 -0
- episodic/workflow/envs/plot_traces.yml +11 -0
- episodic/workflow/envs/python.yml +12 -0
- episodic/workflow/profiles/slurm/config.yaml +36 -0
- episodic/workflow/profiles/slurm/slurm-cancle +3 -0
- episodic/workflow/rules/beast.smk +76 -0
- episodic/workflow/rules/config.smk +10 -0
- episodic/workflow/rules/report.smk +95 -0
- episodic/workflow/rules/tree.smk +68 -0
- episodic/workflow/scripts/arviz_output.py +166 -0
- episodic/workflow/scripts/calculate_odds.py +72 -0
- episodic/workflow/scripts/densitree.R +50 -0
- episodic/workflow/scripts/extract_mle.py +91 -0
- episodic/workflow/scripts/phylo_rate_quantile_analysis.py +103 -0
- episodic/workflow/scripts/plotMCCtree.R +48 -0
- episodic/workflow/scripts/plot_traces.py +184 -0
- episodic/workflow/scripts/populate_beast_template.py +168 -0
- episodic/workflow/scripts/tree_converter.py +49 -0
- episodic/workflow/templates/beast_xml_template.jinja +514 -0
- episodic/workflow/utils.py +12 -0
- episodic-0.0.1.dist-info/METADATA +45 -0
- episodic-0.0.1.dist-info/RECORD +34 -0
- episodic-0.0.1.dist-info/WHEEL +4 -0
- episodic-0.0.1.dist-info/entry_points.txt +2 -0
- episodic-0.0.1.dist-info/licenses/LICENSE.txt +9 -0
episodic/__about__.py
ADDED
episodic/__init__.py
ADDED
episodic/__main__.py
ADDED
episodic/cli/__init__.py
ADDED
episodic/snk.yaml
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
logo: Episodic
|
|
2
|
+
tagline: A pipeline for fitting and testing Fixed Local Clock (FLC) molecular clock models for episodic evolution.
|
|
3
|
+
annotations:
|
|
4
|
+
alignment:
|
|
5
|
+
type: Path
|
|
6
|
+
help: "Path to a fasta file containing the alignment."
|
|
7
|
+
required: true
|
|
8
|
+
group:
|
|
9
|
+
type: List[str]
|
|
10
|
+
help: "Group sequences containing this value (in the header) to define a FLC. Can specify multiple."
|
|
11
|
+
required: true
|
|
12
|
+
date_delimiter:
|
|
13
|
+
type: str
|
|
14
|
+
help: "Delimiter to use to split the date from the rest of the header."
|
|
15
|
+
required: false
|
|
16
|
+
default: '"|"'
|
|
17
|
+
date_index:
|
|
18
|
+
type: int
|
|
19
|
+
help: "Index of the date in the header. 0-based."
|
|
20
|
+
required: false
|
|
21
|
+
default: -1
|
|
22
|
+
newick:
|
|
23
|
+
type: Path
|
|
24
|
+
help: "Path to a newick tree file. If provided topology will be fixed."
|
|
25
|
+
required: false
|
|
26
|
+
clock:
|
|
27
|
+
type: List[str]
|
|
28
|
+
help: "Clock models to test. Options are 'strict', 'relaxed', and 'flc-stem'. Can specify multiple."
|
|
29
|
+
required: false
|
|
30
|
+
default: ['strict', 'relaxed', 'flc-stem']
|
|
31
|
+
chain_length:
|
|
32
|
+
type: int
|
|
33
|
+
help: "Length of the chain."
|
|
34
|
+
required: false
|
|
35
|
+
default: 10000000
|
|
36
|
+
samples:
|
|
37
|
+
type: int
|
|
38
|
+
help: "Number of samples to draw from the chain."
|
|
39
|
+
required: false
|
|
40
|
+
default: 10000
|
|
41
|
+
rate_gamma_prior_shape:
|
|
42
|
+
type: float
|
|
43
|
+
help: "Shape parameter for the gamma prior on the rate."
|
|
44
|
+
required: false
|
|
45
|
+
default: 0.5
|
|
46
|
+
rate_gamma_prior_scale:
|
|
47
|
+
type: float
|
|
48
|
+
help: "Scale parameter for the gamma prior on the rate."
|
|
49
|
+
required: false
|
|
50
|
+
default: 0.1
|
|
51
|
+
outdir:
|
|
52
|
+
type: Path
|
|
53
|
+
help: "Path to the output directory."
|
|
54
|
+
required: false
|
|
55
|
+
default: results
|
|
56
|
+
dated:
|
|
57
|
+
type: bool
|
|
58
|
+
help: Create a timestamped output subdirectory
|
|
59
|
+
required: false
|
|
60
|
+
default: false
|
|
61
|
+
duplicates:
|
|
62
|
+
type: int
|
|
63
|
+
help: "Number of duplicate runs. To test for convergence."
|
|
64
|
+
required: false
|
|
65
|
+
default: 2
|
|
66
|
+
trees:
|
|
67
|
+
type: bool
|
|
68
|
+
help: "If false, the trees will not be saved."
|
|
69
|
+
required: false
|
|
70
|
+
default: true
|
|
71
|
+
mcc_tree:
|
|
72
|
+
heights:
|
|
73
|
+
type: List[str]
|
|
74
|
+
help: "Height to use for the MCC tree. Can specify multiple. 'mean' (default), 'median', 'keep' or 'ca'"
|
|
75
|
+
required: false
|
|
76
|
+
default: ['mean']
|
|
77
|
+
marginal_likelihood:
|
|
78
|
+
estimate:
|
|
79
|
+
type: bool
|
|
80
|
+
help: "If true, the PS/SS will be used to estimate the marginal likelihood."
|
|
81
|
+
required: false
|
|
82
|
+
default: True
|
|
83
|
+
paths:
|
|
84
|
+
type: int
|
|
85
|
+
help: "Number of paths to use for the marginal likelihood estimation."
|
|
86
|
+
required: false
|
|
87
|
+
default: 100
|
|
88
|
+
chain_length:
|
|
89
|
+
type: int
|
|
90
|
+
help: "Length of the chain for the marginal likelihood estimation."
|
|
91
|
+
required: false
|
|
92
|
+
default: 1000000
|
|
93
|
+
duplicates:
|
|
94
|
+
type: int
|
|
95
|
+
help: "Number of duplicate MLE runs."
|
|
96
|
+
required: false
|
|
97
|
+
default: 3
|
|
98
|
+
fit_clocks:
|
|
99
|
+
type: bool
|
|
100
|
+
help: "If true, BEAST will be used to fit the clocks. Use --no-fit-clocks to only run the MLE analysis."
|
|
101
|
+
required: false
|
|
102
|
+
default: true
|
|
103
|
+
beast:
|
|
104
|
+
threads:
|
|
105
|
+
type: int
|
|
106
|
+
help: "Number of threads to use for BEAST."
|
|
107
|
+
required: false
|
|
108
|
+
default: 4
|
|
109
|
+
resources:
|
|
110
|
+
runtime:
|
|
111
|
+
help: "Runtime in minutes."
|
|
112
|
+
default: 10080 # 7 days
|
|
113
|
+
mem_mb:
|
|
114
|
+
help: "Memory to request."
|
|
115
|
+
default: 400M
|
|
116
|
+
partition:
|
|
117
|
+
help: "Partition to submit to."
|
|
118
|
+
default: gpu-a100
|
|
119
|
+
gres:
|
|
120
|
+
help: "GPU resource to request."
|
|
121
|
+
default: gpu:1
|
|
122
|
+
envmodules:
|
|
123
|
+
default:
|
|
124
|
+
- "GCC/11.3.0"
|
|
125
|
+
- "beagle-lib/3.1.2-CUDA-11.7.0"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from utils import decimal_year_to_date
|
|
2
|
+
from scripts.populate_beast_template import taxa_from_fasta
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
SNAKE_DIR = Path(workflow.basedir)
|
|
6
|
+
TEMPLATE_DIR = SNAKE_DIR / "templates"
|
|
7
|
+
SCRIPT_DIR = SNAKE_DIR / "scripts"
|
|
8
|
+
|
|
9
|
+
OUT_DIR=Path(config["outdir"])
|
|
10
|
+
if config["dated"]:
|
|
11
|
+
# create timestamped output directory
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
OUT_DIR = OUT_DIR / datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
14
|
+
CLOCK_DIR = OUT_DIR / "clocks"
|
|
15
|
+
|
|
16
|
+
MLE: bool = config["marginal_likelihood"].get("estimate")
|
|
17
|
+
fit_clocks: bool = config["fit_clocks"]
|
|
18
|
+
rate_gamma_prior_scale=config["rate_gamma_prior_scale"]
|
|
19
|
+
rate_gamma_prior_shape=config["rate_gamma_prior_shape"]
|
|
20
|
+
clocks = expand("{clock}_{rate_gamma_prior_shape}_{rate_gamma_prior_scale}", clock=config["clock"], rate_gamma_prior_shape=rate_gamma_prior_shape, rate_gamma_prior_scale=rate_gamma_prior_scale)
|
|
21
|
+
flc_clocks = [c for c in clocks if "flc" in c]
|
|
22
|
+
relaxed_clocks = [c for c in clocks if "relaxed" in c]
|
|
23
|
+
alignment_path=config["alignment"]
|
|
24
|
+
date_delimiter=config["date_delimiter"]
|
|
25
|
+
date_index=config["date_index"]
|
|
26
|
+
|
|
27
|
+
duplicates = range(1, config["duplicates"] + 1) if fit_clocks else []
|
|
28
|
+
mle_duplicates = range(1, config["marginal_likelihood"].get("duplicates") + 1) if MLE else []
|
|
29
|
+
|
|
30
|
+
ALL_LOG_FILES = expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.log", clock=clocks, duplicate=duplicates)
|
|
31
|
+
PER_CLOCK_LOG_FILES = lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates]
|
|
32
|
+
|
|
33
|
+
TAXA = taxa_from_fasta(
|
|
34
|
+
alignment_path,
|
|
35
|
+
date_delimiter=date_delimiter,
|
|
36
|
+
date_index=date_index,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
most_recent_sampling_date = decimal_year_to_date(min(TAXA, key=lambda taxa: taxa.date).date)
|
|
40
|
+
|
|
41
|
+
print(f"Running Episodic with {len(TAXA)} taxa")
|
|
42
|
+
print(f"Most recent sampling date: {most_recent_sampling_date}")
|
|
43
|
+
|
|
44
|
+
include: "rules/beast.smk"
|
|
45
|
+
include: "rules/config.smk"
|
|
46
|
+
include: "rules/report.smk"
|
|
47
|
+
include: "rules/tree.smk"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
CLOCK_FILES = []
|
|
53
|
+
|
|
54
|
+
CLOCK_FILES.extend(
|
|
55
|
+
[
|
|
56
|
+
expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}_trace_plots", clock=clocks, duplicate=duplicates),
|
|
57
|
+
expand(CLOCK_DIR / "{clock}" / "{clock}-summary.csv", clock=clocks),
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
CLOCK_FILES.extend(
|
|
61
|
+
[CLOCK_DIR / f"{clock}" / f"{clock}-violin.svg" for clock in flc_clocks],
|
|
62
|
+
)
|
|
63
|
+
CLOCK_FILES.extend(
|
|
64
|
+
[CLOCK_DIR / f"{clock}" / f"{clock}-odds.csv" for clock in flc_clocks],
|
|
65
|
+
)
|
|
66
|
+
CLOCK_FILES.extend(
|
|
67
|
+
[CLOCK_DIR / "clocks-violin.svg",
|
|
68
|
+
CLOCK_DIR / "clocks-trace.svg",]
|
|
69
|
+
)
|
|
70
|
+
if config.get("trees"):
|
|
71
|
+
CLOCK_FILES.extend(
|
|
72
|
+
expand(
|
|
73
|
+
CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.mcc.{heights}.{ext}",
|
|
74
|
+
clock=clocks,
|
|
75
|
+
duplicate=duplicates,
|
|
76
|
+
heights=config["mcc_tree"].get("heights", "mean"),
|
|
77
|
+
ext=["nwk", "svg"]
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
CLOCK_FILES.extend(
|
|
81
|
+
expand(
|
|
82
|
+
CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.rate_quantiles.{ext}",
|
|
83
|
+
clock=clocks,
|
|
84
|
+
duplicate=duplicates,
|
|
85
|
+
ext=["csv", "svg"]
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
OUTPUT_FILES = [
|
|
92
|
+
OUT_DIR / "config.yaml",
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
if fit_clocks:
|
|
96
|
+
OUTPUT_FILES.extend(
|
|
97
|
+
CLOCK_FILES
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if MLE:
|
|
101
|
+
OUTPUT_FILES.append(
|
|
102
|
+
OUT_DIR / "mle" / "mle.svg"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
rule all:
|
|
106
|
+
input:
|
|
107
|
+
*OUTPUT_FILES
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Note that we use `threads` here as SLURM `--cpus-per-task`.
|
|
2
|
+
cluster: >-
|
|
3
|
+
mkdir -p logs && sbatch
|
|
4
|
+
$(if [[ '{resources.account}' ]]; then echo '-A {resources.account}'; fi)
|
|
5
|
+
$(if [[ '{resources.partition}' ]]; then echo '-p {resources.partition}'; fi)
|
|
6
|
+
--parsable
|
|
7
|
+
--time={resources.runtime}
|
|
8
|
+
--mem={resources.mem_mb}
|
|
9
|
+
-N {resources.nodes}
|
|
10
|
+
-n {resources.tasks_per_node}
|
|
11
|
+
-c {threads}
|
|
12
|
+
-o logs/{rule}-{wildcards}.out -e logs/{rule}-{wildcards}.err
|
|
13
|
+
$(if [[ '{resources.qos}' ]]; then echo '-q {resources.qos}'; fi)
|
|
14
|
+
$(if [[ '{resources.gres}' ]]; then echo '--gres={resources.gres}'; fi)
|
|
15
|
+
{resources.extra}
|
|
16
|
+
default-resources:
|
|
17
|
+
- account=''
|
|
18
|
+
- partition=''
|
|
19
|
+
- runtime=15
|
|
20
|
+
- mem_mb=4000
|
|
21
|
+
- nodes=1
|
|
22
|
+
- tasks_per_node=1
|
|
23
|
+
- qos='covid19'
|
|
24
|
+
- gres=''
|
|
25
|
+
- extra=''
|
|
26
|
+
cluster-cancel: "scancel {jobid}"
|
|
27
|
+
jobs: 50
|
|
28
|
+
use-conda: true
|
|
29
|
+
use-envmodules: true
|
|
30
|
+
printshellcmds: true
|
|
31
|
+
rerun-incomplete: true
|
|
32
|
+
keep-going: true
|
|
33
|
+
local-cores: 1
|
|
34
|
+
max-jobs-per-second: 10
|
|
35
|
+
max-status-checks-per-second: 1
|
|
36
|
+
latency-wait: 30
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
|
|
2
|
+
rule create_beast_xml:
|
|
3
|
+
input:
|
|
4
|
+
alignment = config["alignment"],
|
|
5
|
+
output:
|
|
6
|
+
beast_XML_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.xml",
|
|
7
|
+
params:
|
|
8
|
+
template = TEMPLATE_DIR / "beast_xml_template.jinja",
|
|
9
|
+
date_delimiter = "\|" if config.get("date_delimiter") == "|" else config.get("date_delimiter"),
|
|
10
|
+
date_index = config.get("date_index", -1),
|
|
11
|
+
groups = " ".join(config.get("group")),
|
|
12
|
+
clock = lambda wildcards: wildcards.clock.split("_")[0],
|
|
13
|
+
rate_gamma_prior_shape = config.get("rate_gamma_prior_shape"),
|
|
14
|
+
rate_gamma_prior_scale = config.get("rate_gamma_prior_scale"),
|
|
15
|
+
chain_length = config.get("chain_length"),
|
|
16
|
+
samples = config.get("samples"),
|
|
17
|
+
mle = lambda wildcards: "--mle" if "mle" in wildcards.name else "",
|
|
18
|
+
mle_chain_length = f"--mle-chain-length {config['marginal_likelihood'].get('chain_length')}",
|
|
19
|
+
mle_path_steps = f"--mle-path-steps {config['marginal_likelihood'].get('paths')}",
|
|
20
|
+
no_trace = lambda wildcards: "--no-trace" if "mle" in wildcards.name else "",
|
|
21
|
+
no_trees = lambda wildcards: "--no-trees" if not config.get("trees") or "mle" in wildcards.name else "",
|
|
22
|
+
fixed_tree = f'--fixed-tree {config.get("fixed_tree")}' if config.get("fixed_tree") else "",
|
|
23
|
+
shell:
|
|
24
|
+
"""
|
|
25
|
+
python {SCRIPT_DIR}/populate_beast_template.py \
|
|
26
|
+
{params.template} \
|
|
27
|
+
--output {output.beast_XML_file} \
|
|
28
|
+
--alignment {input.alignment} \
|
|
29
|
+
--date-delimiter {params.date_delimiter} \
|
|
30
|
+
--date-index {params.date_index} \
|
|
31
|
+
--groups {params.groups} \
|
|
32
|
+
--clock {params.clock} \
|
|
33
|
+
--rate-gamma-prior-shape {params.rate_gamma_prior_shape} \
|
|
34
|
+
--rate-gamma-prior-scale {params.rate_gamma_prior_scale} \
|
|
35
|
+
--chain-length {params.chain_length} \
|
|
36
|
+
--samples {params.samples} \
|
|
37
|
+
{params.mle} \
|
|
38
|
+
{params.mle_chain_length} \
|
|
39
|
+
{params.mle_path_steps} \
|
|
40
|
+
{params.no_trace} \
|
|
41
|
+
{params.no_trees} \
|
|
42
|
+
{params.fixed_tree}
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
TREES = {"beast_trees_file": CLOCK_DIR / "{clock}" / "{name}" / "{name}.trees"} if config.get("trees") else {}
|
|
46
|
+
|
|
47
|
+
rule run_beast:
|
|
48
|
+
input:
|
|
49
|
+
beast_XML_file = rules.create_beast_xml.output.beast_XML_file,
|
|
50
|
+
output:
|
|
51
|
+
beast_stdout_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.stdout",
|
|
52
|
+
beast_log_file = CLOCK_DIR / "{clock}" / "{name}" / "{name}.log",
|
|
53
|
+
**TREES,
|
|
54
|
+
threads: config["beast"].get("threads")
|
|
55
|
+
resources:
|
|
56
|
+
**config["beast"].get("resources", {}),
|
|
57
|
+
envmodules:
|
|
58
|
+
*config["beast"].get("envmodules", []),
|
|
59
|
+
conda:
|
|
60
|
+
"../envs/beast.yml"
|
|
61
|
+
shell:
|
|
62
|
+
"""
|
|
63
|
+
beast -working -overwrite -beagle_GPU -threads {threads} {input.beast_XML_file} > {output.beast_stdout_file}
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
MLE_OUT_DIR = OUT_DIR / "mle" / "{clock}"
|
|
67
|
+
|
|
68
|
+
use rule create_beast_xml as create_mle_xml with:
|
|
69
|
+
output:
|
|
70
|
+
beast_XML_file = MLE_OUT_DIR / "{name}.xml",
|
|
71
|
+
|
|
72
|
+
use rule run_beast as run_mle_beast with:
|
|
73
|
+
input:
|
|
74
|
+
beast_XML_file = rules.create_mle_xml.output.beast_XML_file,
|
|
75
|
+
output:
|
|
76
|
+
beast_stdout_file = MLE_OUT_DIR / "{name}.stdout",
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
MLE_DIR = OUT_DIR / "mle"
|
|
2
|
+
|
|
3
|
+
rule extract_mle:
|
|
4
|
+
input:
|
|
5
|
+
expand(OUT_DIR / "mle" / "{clock}" / "{clock}_mle_{duplicate}.stdout", clock=clocks, duplicate=mle_duplicates),
|
|
6
|
+
output:
|
|
7
|
+
MLE_DIR / "mle.svg",
|
|
8
|
+
conda:
|
|
9
|
+
"../envs/python.yml"
|
|
10
|
+
shell:
|
|
11
|
+
"""
|
|
12
|
+
python {SCRIPT_DIR}/extract_mle.py {MLE_DIR}
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
rule plot_traces:
|
|
16
|
+
"""
|
|
17
|
+
Makes trace plots from the beast log file.
|
|
18
|
+
"""
|
|
19
|
+
input:
|
|
20
|
+
rules.run_beast.output.beast_log_file,
|
|
21
|
+
output:
|
|
22
|
+
directory(CLOCK_DIR / "{clock}" / "{name}" / "{name}_trace_plots/"),
|
|
23
|
+
conda:
|
|
24
|
+
"../envs/plot_traces.yml"
|
|
25
|
+
shell:
|
|
26
|
+
"""
|
|
27
|
+
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/plot_traces.py {input} {output}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
rule summary:
|
|
31
|
+
"""
|
|
32
|
+
Makes combined summarys from the beast log file.
|
|
33
|
+
"""
|
|
34
|
+
input:
|
|
35
|
+
PER_CLOCK_LOG_FILES,
|
|
36
|
+
output:
|
|
37
|
+
posterior_svg=CLOCK_DIR / "{clock}" / "{clock}-summary.csv",
|
|
38
|
+
params:
|
|
39
|
+
output=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}" / f"{wildcards.clock}-summary.csv",
|
|
40
|
+
conda:
|
|
41
|
+
"../envs/arviz.yml"
|
|
42
|
+
shell:
|
|
43
|
+
"""
|
|
44
|
+
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/arviz_output.py summary {input} {params.output}
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
rule plot_flc_rates:
|
|
48
|
+
"""
|
|
49
|
+
Makes plots from the flc clock files.
|
|
50
|
+
"""
|
|
51
|
+
input:
|
|
52
|
+
lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates if "flc" in wildcards.clock],
|
|
53
|
+
output:
|
|
54
|
+
rate_svg=CLOCK_DIR / "{clock}" / "{clock}-violin.svg"
|
|
55
|
+
params:
|
|
56
|
+
directory=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}",
|
|
57
|
+
gamma_shape=rate_gamma_prior_shape,
|
|
58
|
+
gamma_scale=rate_gamma_prior_scale,
|
|
59
|
+
conda:
|
|
60
|
+
"../envs/arviz.yml"
|
|
61
|
+
shell:
|
|
62
|
+
"""
|
|
63
|
+
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/arviz_output.py rates {input} {params.directory} --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
use rule plot_flc_rates as plot_rates with:
|
|
67
|
+
input:
|
|
68
|
+
expand(CLOCK_DIR / "{clock}" / "{clock}_{duplicate}" / "{clock}_{duplicate}.log", clock=clocks, duplicate=duplicates),
|
|
69
|
+
output:
|
|
70
|
+
clocks_violin=CLOCK_DIR / "clocks-violin.svg",
|
|
71
|
+
clocks_trace=CLOCK_DIR / "clocks-trace.svg",
|
|
72
|
+
params:
|
|
73
|
+
directory=CLOCK_DIR,
|
|
74
|
+
gamma_shape=rate_gamma_prior_shape,
|
|
75
|
+
gamma_scale=rate_gamma_prior_scale,
|
|
76
|
+
|
|
77
|
+
rule calculate_odds:
|
|
78
|
+
input:
|
|
79
|
+
lambda wildcards: [CLOCK_DIR / wildcards.clock / f"{wildcards.clock}_{duplicate}" / f"{wildcards.clock}_{duplicate}.log" for duplicate in duplicates if "flc" in wildcards.clock],
|
|
80
|
+
output:
|
|
81
|
+
rate_svg=CLOCK_DIR / "{clock}" / "{clock}-odds.csv"
|
|
82
|
+
params:
|
|
83
|
+
directory=lambda wildcards: CLOCK_DIR / f"{wildcards.clock}",
|
|
84
|
+
gamma_shape=rate_gamma_prior_shape,
|
|
85
|
+
gamma_scale=rate_gamma_prior_scale,
|
|
86
|
+
conda:
|
|
87
|
+
"../envs/python.yml"
|
|
88
|
+
shell:
|
|
89
|
+
"""
|
|
90
|
+
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/calculate_odds.py {input} {output} --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
|
|
91
|
+
for file in {input}
|
|
92
|
+
do
|
|
93
|
+
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/calculate_odds.py $file ${{file%.log}}-odds.csv --gamma-shape {params.gamma_shape} --gamma-scale {params.gamma_scale}
|
|
94
|
+
done
|
|
95
|
+
"""
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
|
|
2
|
+
rule max_clade_credibility_tree:
|
|
3
|
+
"""
|
|
4
|
+
Makes trace plots from the beast log file.
|
|
5
|
+
"""
|
|
6
|
+
input:
|
|
7
|
+
rules.run_beast.output.beast_trees_file,
|
|
8
|
+
output:
|
|
9
|
+
CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.nexus",
|
|
10
|
+
params:
|
|
11
|
+
burnin = int(int(config['samples']) * 0.1),
|
|
12
|
+
conda:
|
|
13
|
+
"../envs/beast.yml"
|
|
14
|
+
shell:
|
|
15
|
+
"""
|
|
16
|
+
treeannotator -burninTrees {params.burnin} -heights {wildcards.heights} {input} {output}
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
rule max_clade_credibility_tree_newick:
|
|
21
|
+
"""
|
|
22
|
+
Makes trace plots from the beast log file.
|
|
23
|
+
"""
|
|
24
|
+
input:
|
|
25
|
+
rules.max_clade_credibility_tree.output,
|
|
26
|
+
output:
|
|
27
|
+
CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.nwk",
|
|
28
|
+
conda:
|
|
29
|
+
"../envs/phylo.yml"
|
|
30
|
+
shell:
|
|
31
|
+
"${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/tree_converter.py {input} {output} --node-label posterior"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
rule max_clade_credibility_tree_render:
|
|
35
|
+
"""
|
|
36
|
+
Renders the MCC tree in SVG format.
|
|
37
|
+
"""
|
|
38
|
+
input:
|
|
39
|
+
rules.max_clade_credibility_tree.output,
|
|
40
|
+
output:
|
|
41
|
+
CLOCK_DIR / "{clock}" / "{name}" / "{name}.mcc.{heights}.svg",
|
|
42
|
+
params:
|
|
43
|
+
mrsd = most_recent_sampling_date,
|
|
44
|
+
conda:
|
|
45
|
+
"../envs/ggtree.yml"
|
|
46
|
+
shell:
|
|
47
|
+
"${{CONDA_PREFIX}}/bin/RScript {SCRIPT_DIR}/plotMCCtree.R --input {input} --output {output} --mrsd {params.mrsd}"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
rule rate_quantile_analysis:
|
|
51
|
+
input:
|
|
52
|
+
rules.run_beast.output.beast_trees_file,
|
|
53
|
+
output:
|
|
54
|
+
csv = CLOCK_DIR / "{clock}" / "{name}" / "{name}.rate_quantiles.csv",
|
|
55
|
+
svg = CLOCK_DIR / "{clock}" / "{name}" / "{name}.rate_quantiles.svg",
|
|
56
|
+
params:
|
|
57
|
+
groups = " ".join(f"-g {group}" for group in config['group']),
|
|
58
|
+
conda:
|
|
59
|
+
"../envs/phylo.yml"
|
|
60
|
+
shell:
|
|
61
|
+
"""
|
|
62
|
+
python {SCRIPT_DIR}/phylo_rate_quantile_analysis.py \
|
|
63
|
+
{input} \
|
|
64
|
+
--output-csv {output.csv} \
|
|
65
|
+
--output-plot {output.svg} \
|
|
66
|
+
{params.groups}
|
|
67
|
+
"""
|
|
68
|
+
|