bdext 0.1.64__tar.gz → 0.1.66__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdext-0.1.66/PKG-INFO +229 -0
- bdext-0.1.66/README.md +211 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/__init__.py +1 -3
- bdext-0.1.66/bdeissct_dl/bdeissct_model.py +78 -0
- bdext-0.1.66/bdeissct_dl/dl_model.py +89 -0
- bdext-0.1.66/bdeissct_dl/estimator.py +74 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/model_serializer.py +3 -33
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/scaler_fitting.py +3 -6
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/sumstat_checker.py +2 -2
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/training.py +9 -30
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/tree_encoder.py +13 -32
- bdext-0.1.66/bdext.egg-info/PKG-INFO +229 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdext.egg-info/SOURCES.txt +0 -7
- {bdext-0.1.64 → bdext-0.1.66}/bdext.egg-info/entry_points.txt +0 -2
- {bdext-0.1.64 → bdext-0.1.66}/setup.py +1 -6
- bdext-0.1.64/LICENSE +0 -674
- bdext-0.1.64/PKG-INFO +0 -167
- bdext-0.1.64/README.md +0 -149
- bdext-0.1.64/bdeissct_dl/bdeissct_model.py +0 -132
- bdext-0.1.64/bdeissct_dl/dl_model.py +0 -201
- bdext-0.1.64/bdeissct_dl/estimator.py +0 -174
- bdext-0.1.64/bdeissct_dl/estimator_ct.py +0 -63
- bdext-0.1.64/bdeissct_dl/main_covid.py +0 -76
- bdext-0.1.64/bdeissct_dl/model_finder.py +0 -47
- bdext-0.1.64/bdeissct_dl/pinball_loss.py +0 -48
- bdext-0.1.64/bdeissct_dl/train_ct.py +0 -125
- bdext-0.1.64/bdext.egg-info/PKG-INFO +0 -167
- {bdext-0.1.64 → bdext-0.1.66}/bdeissct_dl/tree_manager.py +0 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdext.egg-info/dependency_links.txt +0 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdext.egg-info/requires.txt +0 -0
- {bdext-0.1.64 → bdext-0.1.66}/bdext.egg-info/top_level.txt +0 -0
- {bdext-0.1.64 → bdext-0.1.66}/setup.cfg +0 -0
bdext-0.1.66/PKG-INFO
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: bdext
|
|
3
|
+
Version: 0.1.66
|
|
4
|
+
Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
|
|
5
|
+
Home-page: https://github.com/modpath/bdeissct
|
|
6
|
+
Author: Anna Zhukova
|
|
7
|
+
Author-email: anna.zhukova@pasteur.fr
|
|
8
|
+
License: UNKNOWN
|
|
9
|
+
Description: # bdext
|
|
10
|
+
|
|
11
|
+
The bdext package provides scripts to train and assess
|
|
12
|
+
Deep-Learning-enables estimators of BD(EI)(SS)(CT) model parameters from phylogenetic trees
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
[//]: # ([](https://doi.org/10.1093/sysbio/syad059))
|
|
17
|
+
[//]: # ([](https://github.com/evolbioinfo/bdext/releases))
|
|
18
|
+
[](https://pypi.org/project/bdext/)
|
|
19
|
+
[](https://pypi.org/project/bdext)
|
|
20
|
+
[](https://hub.docker.com/r/evolbioinfo/bdext/tags)
|
|
21
|
+
|
|
22
|
+
## BDEISS-CT model
|
|
23
|
+
|
|
24
|
+
The Birth-Death (BD) Exposed-Infectious (EI) with SuperSpreading (SS) and Contact-Tracing (CT) model (BDEISS-CT)
|
|
25
|
+
can be described with the following 8 parameters:
|
|
26
|
+
|
|
27
|
+
* average reproduction number R;
|
|
28
|
+
* average total infection duration d;
|
|
29
|
+
* incubation period d<sub>inc</sub>;
|
|
30
|
+
* sampling probability ρ;
|
|
31
|
+
* fraction of superspreaders f<sub>S</sub>;
|
|
32
|
+
* super-spreading transmission increase X<sub>S</sub>;
|
|
33
|
+
* contact tracing probability υ;
|
|
34
|
+
* contact-traced removal speed up X<sub>C</sub>.
|
|
35
|
+
|
|
36
|
+
Setting d<sub>inc</sub>=0 removes incubation (EI), setting f<sub>S</sub>=0 removes superspreading (SS), while setting υ=0 removes contact-tracing (CT).
|
|
37
|
+
|
|
38
|
+
For identifiability, we require the sampling probability ρ to be given by the user.
|
|
39
|
+
The other parameters are estimated from a time-scaled phylogenetic tree.
|
|
40
|
+
|
|
41
|
+
[//]: # (## BDEISS-CT parameter estimator)
|
|
42
|
+
|
|
43
|
+
[//]: # ()
|
|
44
|
+
[//]: # (The bdeissct_dl package provides deep-learning-based BDEISS-CT model parameter estimator )
|
|
45
|
+
|
|
46
|
+
[//]: # (from a user-supplied time-scaled phylogenetic tree. )
|
|
47
|
+
|
|
48
|
+
[//]: # (User must also provide a value for one of the three BD model parameters (λ, ψ, or ρ). )
|
|
49
|
+
|
|
50
|
+
[//]: # (We recommend providing the sampling probability ρ, )
|
|
51
|
+
|
|
52
|
+
[//]: # (which could be estimated as the number of tree tips divided by the number of declared cases for the same time period.)
|
|
53
|
+
|
|
54
|
+
[//]: # ()
|
|
55
|
+
[//]: # ()
|
|
56
|
+
[//]: # (## Input data)
|
|
57
|
+
|
|
58
|
+
[//]: # (One needs to supply a time-scaled phylogenetic tree in newick format. )
|
|
59
|
+
|
|
60
|
+
[//]: # (In the examples below we will use an HIV tree reconstructed from 200 sequences, )
|
|
61
|
+
|
|
62
|
+
[//]: # (published in [[Rasmussen _et al._ PLoS Comput. Biol. 2017]](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005448), )
|
|
63
|
+
|
|
64
|
+
[//]: # (which you can find at [PairTree GitHub](https://github.com/davidrasm/PairTree) )
|
|
65
|
+
|
|
66
|
+
[//]: # (and in [hiv_zurich/Zurich.nwk](hiv_zurich/Zurich.nwk). )
|
|
67
|
+
|
|
68
|
+
[//]: # ()
|
|
69
|
+
[//]: # (## Installation)
|
|
70
|
+
|
|
71
|
+
[//]: # ()
|
|
72
|
+
[//]: # (There are 4 alternative ways to run __bdeissct_dl__ on your computer: )
|
|
73
|
+
|
|
74
|
+
[//]: # (with [docker](https://www.docker.com/community-edition), )
|
|
75
|
+
|
|
76
|
+
[//]: # ([apptainer](https://apptainer.org/),)
|
|
77
|
+
|
|
78
|
+
[//]: # (in Python3, or via command line (requires installation with Python3).)
|
|
79
|
+
|
|
80
|
+
[//]: # ()
|
|
81
|
+
[//]: # ()
|
|
82
|
+
[//]: # ()
|
|
83
|
+
[//]: # (### Run in python3 or command-line (for linux systems, recommended Ubuntu 21 or newer versions))
|
|
84
|
+
|
|
85
|
+
[//]: # ()
|
|
86
|
+
[//]: # (You could either install python (version 3.9 or higher) system-wide and then install bdeissct_dl via pip:)
|
|
87
|
+
|
|
88
|
+
[//]: # (```bash)
|
|
89
|
+
|
|
90
|
+
[//]: # (sudo apt install -y python3 python3-pip python3-setuptools python3-distutils)
|
|
91
|
+
|
|
92
|
+
[//]: # (pip3 install bdeissct_dl)
|
|
93
|
+
|
|
94
|
+
[//]: # (```)
|
|
95
|
+
|
|
96
|
+
[//]: # ()
|
|
97
|
+
[//]: # (or alternatively, you could install python (version 3.9 or higher) and bdeissct_dl via [conda](https://conda.io/docs/) (make sure that conda is installed first). )
|
|
98
|
+
|
|
99
|
+
[//]: # (Here we will create a conda environment called _phyloenv_:)
|
|
100
|
+
|
|
101
|
+
[//]: # (```bash)
|
|
102
|
+
|
|
103
|
+
[//]: # (conda create --name phyloenv python=3.12)
|
|
104
|
+
|
|
105
|
+
[//]: # (conda activate phyloenv)
|
|
106
|
+
|
|
107
|
+
[//]: # (pip install bdeissct_dl)
|
|
108
|
+
|
|
109
|
+
[//]: # (```)
|
|
110
|
+
|
|
111
|
+
[//]: # ()
|
|
112
|
+
[//]: # ()
|
|
113
|
+
[//]: # (#### Basic usage in a command line)
|
|
114
|
+
|
|
115
|
+
[//]: # (If you installed __bdeissct_dl__ in a conda environment (here named _phyloenv_), do not forget to first activate it, e.g.)
|
|
116
|
+
|
|
117
|
+
[//]: # ()
|
|
118
|
+
[//]: # (```bash)
|
|
119
|
+
|
|
120
|
+
[//]: # (conda activate phyloenv)
|
|
121
|
+
|
|
122
|
+
[//]: # (```)
|
|
123
|
+
|
|
124
|
+
[//]: # ()
|
|
125
|
+
[//]: # (Run the following command to estimate the BDEISS_CT parameters and their 95% CIs for this tree, assuming the sampling probability of 0.25, )
|
|
126
|
+
|
|
127
|
+
[//]: # (and save the estimated parameters to a comma-separated file estimates.csv.)
|
|
128
|
+
|
|
129
|
+
[//]: # (```bash)
|
|
130
|
+
|
|
131
|
+
[//]: # (bdeissct_infer --nwk Zurich.nwk --ci --p 0.25 --log estimates.csv)
|
|
132
|
+
|
|
133
|
+
[//]: # (```)
|
|
134
|
+
|
|
135
|
+
[//]: # ()
|
|
136
|
+
[//]: # (#### Help)
|
|
137
|
+
|
|
138
|
+
[//]: # ()
|
|
139
|
+
[//]: # (To see detailed options, run:)
|
|
140
|
+
|
|
141
|
+
[//]: # (```bash)
|
|
142
|
+
|
|
143
|
+
[//]: # (bdeissct_infer --help)
|
|
144
|
+
|
|
145
|
+
[//]: # (```)
|
|
146
|
+
|
|
147
|
+
[//]: # ()
|
|
148
|
+
[//]: # ()
|
|
149
|
+
[//]: # (### Run with docker)
|
|
150
|
+
|
|
151
|
+
[//]: # ()
|
|
152
|
+
[//]: # (#### Basic usage)
|
|
153
|
+
|
|
154
|
+
[//]: # (Once [docker](https://www.docker.com/community-edition) is installed, )
|
|
155
|
+
|
|
156
|
+
[//]: # (run the following command to estimate BDEISS-CT model parameters:)
|
|
157
|
+
|
|
158
|
+
[//]: # (```bash)
|
|
159
|
+
|
|
160
|
+
[//]: # (docker run -v <path_to_the_folder_containing_the_tree>:/data:rw -t evolbioinfo/bdeissct --nwk /data/Zurich.nwk --ci --p 0.25 --log /data/estimates.csv)
|
|
161
|
+
|
|
162
|
+
[//]: # (```)
|
|
163
|
+
|
|
164
|
+
[//]: # ()
|
|
165
|
+
[//]: # (This will produce a comma-separated file estimates.csv in the <path_to_the_folder_containing_the_tree> folder,)
|
|
166
|
+
|
|
167
|
+
[//]: # ( containing the estimated parameter values and their 95% CIs (can be viewed with a text editor, Excel or Libre Office Calc).)
|
|
168
|
+
|
|
169
|
+
[//]: # ()
|
|
170
|
+
[//]: # (#### Help)
|
|
171
|
+
|
|
172
|
+
[//]: # ()
|
|
173
|
+
[//]: # (To see advanced options, run)
|
|
174
|
+
|
|
175
|
+
[//]: # (```bash)
|
|
176
|
+
|
|
177
|
+
[//]: # (docker run -t evolbioinfo/bdeissct -h)
|
|
178
|
+
|
|
179
|
+
[//]: # (```)
|
|
180
|
+
|
|
181
|
+
[//]: # ()
|
|
182
|
+
[//]: # ()
|
|
183
|
+
[//]: # ()
|
|
184
|
+
[//]: # (### Run with apptainer)
|
|
185
|
+
|
|
186
|
+
[//]: # ()
|
|
187
|
+
[//]: # (#### Basic usage)
|
|
188
|
+
|
|
189
|
+
[//]: # (Once [apptainer](https://apptainer.org/docs/user/latest/quick_start.html#installation) is installed, )
|
|
190
|
+
|
|
191
|
+
[//]: # (run the following command to estimate BDEISS-CT model parameters (from the folder where the Zurich.nwk tree is contained):)
|
|
192
|
+
|
|
193
|
+
[//]: # ()
|
|
194
|
+
[//]: # (```bash)
|
|
195
|
+
|
|
196
|
+
[//]: # (apptainer run docker://evolbioinfo/bdeissct --nwk Zurich.nwk --ci --p 0.25 --log estimates.csv)
|
|
197
|
+
|
|
198
|
+
[//]: # (```)
|
|
199
|
+
|
|
200
|
+
[//]: # ()
|
|
201
|
+
[//]: # (This will produce a comma-separated file estimates.csv,)
|
|
202
|
+
|
|
203
|
+
[//]: # ( containing the estimated parameter values and their 95% CIs (can be viewed with a text editor, Excel or Libre Office Calc).)
|
|
204
|
+
|
|
205
|
+
[//]: # ()
|
|
206
|
+
[//]: # ()
|
|
207
|
+
[//]: # (#### Help)
|
|
208
|
+
|
|
209
|
+
[//]: # ()
|
|
210
|
+
[//]: # (To see advanced options, run)
|
|
211
|
+
|
|
212
|
+
[//]: # (```bash)
|
|
213
|
+
|
|
214
|
+
[//]: # (apptainer run docker://evolbioinfo/bdeissct -h)
|
|
215
|
+
|
|
216
|
+
[//]: # (```)
|
|
217
|
+
|
|
218
|
+
[//]: # ()
|
|
219
|
+
[//]: # ()
|
|
220
|
+
|
|
221
|
+
Keywords: phylogenetics,birth-death model,incubation,super-spreading,contact tracing
|
|
222
|
+
Platform: UNKNOWN
|
|
223
|
+
Classifier: Development Status :: 4 - Beta
|
|
224
|
+
Classifier: Environment :: Console
|
|
225
|
+
Classifier: Intended Audience :: Developers
|
|
226
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
227
|
+
Classifier: Topic :: Software Development
|
|
228
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
229
|
+
Description-Content-Type: text/markdown
|
bdext-0.1.66/README.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# bdext
|
|
2
|
+
|
|
3
|
+
The bdext package provides scripts to train and assess
|
|
4
|
+
Deep-Learning-enables estimators of BD(EI)(SS)(CT) model parameters from phylogenetic trees
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
[//]: # ([](https://doi.org/10.1093/sysbio/syad059))
|
|
9
|
+
[//]: # ([](https://github.com/evolbioinfo/bdext/releases))
|
|
10
|
+
[](https://pypi.org/project/bdext/)
|
|
11
|
+
[](https://pypi.org/project/bdext)
|
|
12
|
+
[](https://hub.docker.com/r/evolbioinfo/bdext/tags)
|
|
13
|
+
|
|
14
|
+
## BDEISS-CT model
|
|
15
|
+
|
|
16
|
+
The Birth-Death (BD) Exposed-Infectious (EI) with SuperSpreading (SS) and Contact-Tracing (CT) model (BDEISS-CT)
|
|
17
|
+
can be described with the following 8 parameters:
|
|
18
|
+
|
|
19
|
+
* average reproduction number R;
|
|
20
|
+
* average total infection duration d;
|
|
21
|
+
* incubation period d<sub>inc</sub>;
|
|
22
|
+
* sampling probability ρ;
|
|
23
|
+
* fraction of superspreaders f<sub>S</sub>;
|
|
24
|
+
* super-spreading transmission increase X<sub>S</sub>;
|
|
25
|
+
* contact tracing probability υ;
|
|
26
|
+
* contact-traced removal speed up X<sub>C</sub>.
|
|
27
|
+
|
|
28
|
+
Setting d<sub>inc</sub>=0 removes incubation (EI), setting f<sub>S</sub>=0 removes superspreading (SS), while setting υ=0 removes contact-tracing (CT).
|
|
29
|
+
|
|
30
|
+
For identifiability, we require the sampling probability ρ to be given by the user.
|
|
31
|
+
The other parameters are estimated from a time-scaled phylogenetic tree.
|
|
32
|
+
|
|
33
|
+
[//]: # (## BDEISS-CT parameter estimator)
|
|
34
|
+
|
|
35
|
+
[//]: # ()
|
|
36
|
+
[//]: # (The bdeissct_dl package provides deep-learning-based BDEISS-CT model parameter estimator )
|
|
37
|
+
|
|
38
|
+
[//]: # (from a user-supplied time-scaled phylogenetic tree. )
|
|
39
|
+
|
|
40
|
+
[//]: # (User must also provide a value for one of the three BD model parameters (λ, ψ, or ρ). )
|
|
41
|
+
|
|
42
|
+
[//]: # (We recommend providing the sampling probability ρ, )
|
|
43
|
+
|
|
44
|
+
[//]: # (which could be estimated as the number of tree tips divided by the number of declared cases for the same time period.)
|
|
45
|
+
|
|
46
|
+
[//]: # ()
|
|
47
|
+
[//]: # ()
|
|
48
|
+
[//]: # (## Input data)
|
|
49
|
+
|
|
50
|
+
[//]: # (One needs to supply a time-scaled phylogenetic tree in newick format. )
|
|
51
|
+
|
|
52
|
+
[//]: # (In the examples below we will use an HIV tree reconstructed from 200 sequences, )
|
|
53
|
+
|
|
54
|
+
[//]: # (published in [[Rasmussen _et al._ PLoS Comput. Biol. 2017]](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005448), )
|
|
55
|
+
|
|
56
|
+
[//]: # (which you can find at [PairTree GitHub](https://github.com/davidrasm/PairTree) )
|
|
57
|
+
|
|
58
|
+
[//]: # (and in [hiv_zurich/Zurich.nwk](hiv_zurich/Zurich.nwk). )
|
|
59
|
+
|
|
60
|
+
[//]: # ()
|
|
61
|
+
[//]: # (## Installation)
|
|
62
|
+
|
|
63
|
+
[//]: # ()
|
|
64
|
+
[//]: # (There are 4 alternative ways to run __bdeissct_dl__ on your computer: )
|
|
65
|
+
|
|
66
|
+
[//]: # (with [docker](https://www.docker.com/community-edition), )
|
|
67
|
+
|
|
68
|
+
[//]: # ([apptainer](https://apptainer.org/),)
|
|
69
|
+
|
|
70
|
+
[//]: # (in Python3, or via command line (requires installation with Python3).)
|
|
71
|
+
|
|
72
|
+
[//]: # ()
|
|
73
|
+
[//]: # ()
|
|
74
|
+
[//]: # ()
|
|
75
|
+
[//]: # (### Run in python3 or command-line (for linux systems, recommended Ubuntu 21 or newer versions))
|
|
76
|
+
|
|
77
|
+
[//]: # ()
|
|
78
|
+
[//]: # (You could either install python (version 3.9 or higher) system-wide and then install bdeissct_dl via pip:)
|
|
79
|
+
|
|
80
|
+
[//]: # (```bash)
|
|
81
|
+
|
|
82
|
+
[//]: # (sudo apt install -y python3 python3-pip python3-setuptools python3-distutils)
|
|
83
|
+
|
|
84
|
+
[//]: # (pip3 install bdeissct_dl)
|
|
85
|
+
|
|
86
|
+
[//]: # (```)
|
|
87
|
+
|
|
88
|
+
[//]: # ()
|
|
89
|
+
[//]: # (or alternatively, you could install python (version 3.9 or higher) and bdeissct_dl via [conda](https://conda.io/docs/) (make sure that conda is installed first). )
|
|
90
|
+
|
|
91
|
+
[//]: # (Here we will create a conda environment called _phyloenv_:)
|
|
92
|
+
|
|
93
|
+
[//]: # (```bash)
|
|
94
|
+
|
|
95
|
+
[//]: # (conda create --name phyloenv python=3.12)
|
|
96
|
+
|
|
97
|
+
[//]: # (conda activate phyloenv)
|
|
98
|
+
|
|
99
|
+
[//]: # (pip install bdeissct_dl)
|
|
100
|
+
|
|
101
|
+
[//]: # (```)
|
|
102
|
+
|
|
103
|
+
[//]: # ()
|
|
104
|
+
[//]: # ()
|
|
105
|
+
[//]: # (#### Basic usage in a command line)
|
|
106
|
+
|
|
107
|
+
[//]: # (If you installed __bdeissct_dl__ in a conda environment (here named _phyloenv_), do not forget to first activate it, e.g.)
|
|
108
|
+
|
|
109
|
+
[//]: # ()
|
|
110
|
+
[//]: # (```bash)
|
|
111
|
+
|
|
112
|
+
[//]: # (conda activate phyloenv)
|
|
113
|
+
|
|
114
|
+
[//]: # (```)
|
|
115
|
+
|
|
116
|
+
[//]: # ()
|
|
117
|
+
[//]: # (Run the following command to estimate the BDEISS_CT parameters and their 95% CIs for this tree, assuming the sampling probability of 0.25, )
|
|
118
|
+
|
|
119
|
+
[//]: # (and save the estimated parameters to a comma-separated file estimates.csv.)
|
|
120
|
+
|
|
121
|
+
[//]: # (```bash)
|
|
122
|
+
|
|
123
|
+
[//]: # (bdeissct_infer --nwk Zurich.nwk --ci --p 0.25 --log estimates.csv)
|
|
124
|
+
|
|
125
|
+
[//]: # (```)
|
|
126
|
+
|
|
127
|
+
[//]: # ()
|
|
128
|
+
[//]: # (#### Help)
|
|
129
|
+
|
|
130
|
+
[//]: # ()
|
|
131
|
+
[//]: # (To see detailed options, run:)
|
|
132
|
+
|
|
133
|
+
[//]: # (```bash)
|
|
134
|
+
|
|
135
|
+
[//]: # (bdeissct_infer --help)
|
|
136
|
+
|
|
137
|
+
[//]: # (```)
|
|
138
|
+
|
|
139
|
+
[//]: # ()
|
|
140
|
+
[//]: # ()
|
|
141
|
+
[//]: # (### Run with docker)
|
|
142
|
+
|
|
143
|
+
[//]: # ()
|
|
144
|
+
[//]: # (#### Basic usage)
|
|
145
|
+
|
|
146
|
+
[//]: # (Once [docker](https://www.docker.com/community-edition) is installed, )
|
|
147
|
+
|
|
148
|
+
[//]: # (run the following command to estimate BDEISS-CT model parameters:)
|
|
149
|
+
|
|
150
|
+
[//]: # (```bash)
|
|
151
|
+
|
|
152
|
+
[//]: # (docker run -v <path_to_the_folder_containing_the_tree>:/data:rw -t evolbioinfo/bdeissct --nwk /data/Zurich.nwk --ci --p 0.25 --log /data/estimates.csv)
|
|
153
|
+
|
|
154
|
+
[//]: # (```)
|
|
155
|
+
|
|
156
|
+
[//]: # ()
|
|
157
|
+
[//]: # (This will produce a comma-separated file estimates.csv in the <path_to_the_folder_containing_the_tree> folder,)
|
|
158
|
+
|
|
159
|
+
[//]: # ( containing the estimated parameter values and their 95% CIs (can be viewed with a text editor, Excel or Libre Office Calc).)
|
|
160
|
+
|
|
161
|
+
[//]: # ()
|
|
162
|
+
[//]: # (#### Help)
|
|
163
|
+
|
|
164
|
+
[//]: # ()
|
|
165
|
+
[//]: # (To see advanced options, run)
|
|
166
|
+
|
|
167
|
+
[//]: # (```bash)
|
|
168
|
+
|
|
169
|
+
[//]: # (docker run -t evolbioinfo/bdeissct -h)
|
|
170
|
+
|
|
171
|
+
[//]: # (```)
|
|
172
|
+
|
|
173
|
+
[//]: # ()
|
|
174
|
+
[//]: # ()
|
|
175
|
+
[//]: # ()
|
|
176
|
+
[//]: # (### Run with apptainer)
|
|
177
|
+
|
|
178
|
+
[//]: # ()
|
|
179
|
+
[//]: # (#### Basic usage)
|
|
180
|
+
|
|
181
|
+
[//]: # (Once [apptainer](https://apptainer.org/docs/user/latest/quick_start.html#installation) is installed, )
|
|
182
|
+
|
|
183
|
+
[//]: # (run the following command to estimate BDEISS-CT model parameters (from the folder where the Zurich.nwk tree is contained):)
|
|
184
|
+
|
|
185
|
+
[//]: # ()
|
|
186
|
+
[//]: # (```bash)
|
|
187
|
+
|
|
188
|
+
[//]: # (apptainer run docker://evolbioinfo/bdeissct --nwk Zurich.nwk --ci --p 0.25 --log estimates.csv)
|
|
189
|
+
|
|
190
|
+
[//]: # (```)
|
|
191
|
+
|
|
192
|
+
[//]: # ()
|
|
193
|
+
[//]: # (This will produce a comma-separated file estimates.csv,)
|
|
194
|
+
|
|
195
|
+
[//]: # ( containing the estimated parameter values and their 95% CIs (can be viewed with a text editor, Excel or Libre Office Calc).)
|
|
196
|
+
|
|
197
|
+
[//]: # ()
|
|
198
|
+
[//]: # ()
|
|
199
|
+
[//]: # (#### Help)
|
|
200
|
+
|
|
201
|
+
[//]: # ()
|
|
202
|
+
[//]: # (To see advanced options, run)
|
|
203
|
+
|
|
204
|
+
[//]: # (```bash)
|
|
205
|
+
|
|
206
|
+
[//]: # (apptainer run docker://evolbioinfo/bdeissct -h)
|
|
207
|
+
|
|
208
|
+
[//]: # (```)
|
|
209
|
+
|
|
210
|
+
[//]: # ()
|
|
211
|
+
[//]: # ()
|
|
@@ -7,12 +7,10 @@ warnings.filterwarnings('ignore', r'divide by zero encountered in log')
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
MODEL_PATH = os.path.join(os.path.dirname(__file__), 'models')
|
|
10
|
-
TRAINING_PATH = os.path.join(os.path.dirname(__file__), 'data')
|
|
11
|
-
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
EPOCHS = 1000
|
|
15
|
-
BATCH_SIZE =
|
|
13
|
+
BATCH_SIZE = 8192
|
|
16
14
|
|
|
17
15
|
|
|
18
16
|
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
LA = 'la'
|
|
4
|
+
PSI = 'psi'
|
|
5
|
+
RHO = 'rho'
|
|
6
|
+
INFECTIOUS_TIME = 'd_I'
|
|
7
|
+
REPRODUCTIVE_NUMBER = 'R'
|
|
8
|
+
INFECTION_DURATION = 'd'
|
|
9
|
+
|
|
10
|
+
MU = 'mu'
|
|
11
|
+
INCUBATION_PERIOD = 'd_E'
|
|
12
|
+
|
|
13
|
+
F_S = 'f_S'
|
|
14
|
+
X_S = 'X_S'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
X_C = 'X_C'
|
|
18
|
+
UPSILON = 'upsilon'
|
|
19
|
+
|
|
20
|
+
KAPPA = 'kappa'
|
|
21
|
+
REMOVAL_TIME_AFTER_NOTIFICATION = 'd_C'
|
|
22
|
+
|
|
23
|
+
RATE_PARAMETERS = (LA, PSI, MU)
|
|
24
|
+
TIME_PARAMETERS = (INCUBATION_PERIOD, INFECTIOUS_TIME, REMOVAL_TIME_AFTER_NOTIFICATION, INFECTION_DURATION)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DEFAULT_MIN_PROB = 1e-6
|
|
29
|
+
DEFAULT_MAX_PROB = 1
|
|
30
|
+
DEFAULT_MIN_RATE = 1e-3
|
|
31
|
+
DEFAULT_MAX_RATE = 1e3
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
BD = 'BD'
|
|
35
|
+
BDCT = 'BDCT'
|
|
36
|
+
|
|
37
|
+
BDEI = 'BDEI'
|
|
38
|
+
BDEICT = 'BDEICT'
|
|
39
|
+
|
|
40
|
+
BDSS = 'BDSS'
|
|
41
|
+
BDSSCT = 'BDSSCT'
|
|
42
|
+
|
|
43
|
+
BDEISS = 'BDEISS'
|
|
44
|
+
BDEISSCT = 'BDEISSCT'
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
MODEL_FINDER = 'MF'
|
|
49
|
+
|
|
50
|
+
MODELS = (BD, BDCT, \
|
|
51
|
+
BDEI, BDEICT, \
|
|
52
|
+
BDSS, BDSSCT, \
|
|
53
|
+
BDEISS, BDEISSCT)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
TARGET_CT_COLUMNS = (UPSILON, X_C)
|
|
57
|
+
TARGET_INCUBATION_COLUMNS = (INCUBATION_PERIOD,)
|
|
58
|
+
TARGET_SS_COLUMNS = (F_S, X_S)
|
|
59
|
+
TARGET_COLUMNS_BD = (REPRODUCTIVE_NUMBER, INFECTION_DURATION)
|
|
60
|
+
TARGET_COLUMNS_BDCT = TARGET_COLUMNS_BD + TARGET_CT_COLUMNS
|
|
61
|
+
TARGET_COLUMNS_BDEI = TARGET_COLUMNS_BD + TARGET_INCUBATION_COLUMNS
|
|
62
|
+
TARGET_COLUMNS_BDEICT = TARGET_COLUMNS_BDEI + TARGET_CT_COLUMNS
|
|
63
|
+
TARGET_COLUMNS_BDSS = TARGET_COLUMNS_BD + TARGET_SS_COLUMNS
|
|
64
|
+
TARGET_COLUMNS_BDSSCT = TARGET_COLUMNS_BDSS + TARGET_CT_COLUMNS
|
|
65
|
+
TARGET_COLUMNS_BDEISS = TARGET_COLUMNS_BDEI + TARGET_SS_COLUMNS
|
|
66
|
+
TARGET_COLUMNS_BDEISSCT = TARGET_COLUMNS_BDEISS + TARGET_CT_COLUMNS
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
MODEL2TARGET_COLUMNS = defaultdict(lambda: TARGET_COLUMNS_BDEISSCT)
|
|
70
|
+
MODEL2TARGET_COLUMNS.update({BD: TARGET_COLUMNS_BD,
|
|
71
|
+
BDEI: TARGET_COLUMNS_BDEI,
|
|
72
|
+
BDSS: TARGET_COLUMNS_BDSS,
|
|
73
|
+
BDEISS: TARGET_COLUMNS_BDEISS,
|
|
74
|
+
BDCT: TARGET_COLUMNS_BDCT,
|
|
75
|
+
BDEICT: TARGET_COLUMNS_BDEICT,
|
|
76
|
+
BDSSCT: TARGET_COLUMNS_BDSSCT,
|
|
77
|
+
BDEISSCT: TARGET_COLUMNS_BDEISSCT
|
|
78
|
+
})
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import tensorflow as tf
|
|
2
|
+
from tensorflow.python.keras.utils.generic_utils import register_keras_serializable
|
|
3
|
+
|
|
4
|
+
from bdeissct_dl.bdeissct_model import F_S, UPSILON, REPRODUCTIVE_NUMBER, \
|
|
5
|
+
INFECTION_DURATION, X_S, X_C, RHO, INCUBATION_PERIOD
|
|
6
|
+
|
|
7
|
+
LEARNING_RATE = 0.001
|
|
8
|
+
|
|
9
|
+
LOSS_WEIGHTS = {
|
|
10
|
+
REPRODUCTIVE_NUMBER: 1,
|
|
11
|
+
INFECTION_DURATION: 1,
|
|
12
|
+
INCUBATION_PERIOD: 1,
|
|
13
|
+
F_S: 200, # as it is a value between 0 and 0.5, we multiply by 200 to scale it to [0, 100]
|
|
14
|
+
UPSILON: 100,
|
|
15
|
+
X_C: 1,
|
|
16
|
+
X_S: 1
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
@register_keras_serializable(package="bdeissct_dl", name="half_sigmoid")
|
|
20
|
+
def half_sigmoid(x):
|
|
21
|
+
return 0.5 * tf.sigmoid(x) # range ~ [0, 0.5)
|
|
22
|
+
|
|
23
|
+
@register_keras_serializable(package="bdeissct_dl", name="relu_plus_one")
|
|
24
|
+
def relu_plus_one(x):
|
|
25
|
+
return 1 + tf.nn.relu(x) # range ~ [1, infinity)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
LOSS_FUNCTIONS = {
|
|
30
|
+
REPRODUCTIVE_NUMBER: "mean_absolute_percentage_error",
|
|
31
|
+
INFECTION_DURATION: "mean_absolute_percentage_error",
|
|
32
|
+
INCUBATION_PERIOD: "mean_absolute_percentage_error",
|
|
33
|
+
UPSILON: 'mae',
|
|
34
|
+
RHO: 'mean_absolute_percentage_error',
|
|
35
|
+
X_C: "mean_absolute_percentage_error",
|
|
36
|
+
F_S: 'mae',
|
|
37
|
+
X_S: "mean_absolute_percentage_error",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def build_model(target_columns, n_x, optimizer=None, metrics=None):
|
|
42
|
+
"""
|
|
43
|
+
Build a FFNN of funnel shape with 4 hidden layers.
|
|
44
|
+
We use a 50% dropout after the first 2 hidden layers.
|
|
45
|
+
This architecture follows the PhyloDeep paper [Voznica et al. Nature 2022].
|
|
46
|
+
|
|
47
|
+
:param n_x: input size (number of features)
|
|
48
|
+
:param optimizer: by default Adam with learning rate of 0.001
|
|
49
|
+
:param metrics: evaluation metrics, by default no metrics
|
|
50
|
+
:return: the model instance: tf.keras.models.Sequential
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
inputs = tf.keras.Input(shape=(n_x,))
|
|
54
|
+
|
|
55
|
+
# (Your hidden layers go here)
|
|
56
|
+
x = tf.keras.layers.Dense(128, activation='elu', name=f'layer1_dense256_elu')(inputs)
|
|
57
|
+
x = tf.keras.layers.Dropout(0.5, name='dropout1_50')(x)
|
|
58
|
+
x = tf.keras.layers.Dense(64, activation='elu', name=f'layer2_dense128_elu')(x)
|
|
59
|
+
x = tf.keras.layers.Dropout(0.5, name='dropout2_50')(x)
|
|
60
|
+
x = tf.keras.layers.Dense(32, activation='elu', name=f'layer3_dense64elu')(x)
|
|
61
|
+
x = tf.keras.layers.Dense(16, activation='elu', name=f'layer4_dense32_elu')(x)
|
|
62
|
+
|
|
63
|
+
outputs = {}
|
|
64
|
+
|
|
65
|
+
if REPRODUCTIVE_NUMBER in target_columns:
|
|
66
|
+
outputs[REPRODUCTIVE_NUMBER] = tf.keras.layers.Dense(1, activation="softplus", name=REPRODUCTIVE_NUMBER)(x) # positive values only
|
|
67
|
+
if INFECTION_DURATION in target_columns:
|
|
68
|
+
outputs[INFECTION_DURATION] = tf.keras.layers.Dense(1, activation="softplus", name=INFECTION_DURATION)(x) # positive values only
|
|
69
|
+
if INCUBATION_PERIOD in target_columns:
|
|
70
|
+
outputs[INCUBATION_PERIOD] = tf.keras.layers.Dense(1, activation="softplus", name=INCUBATION_PERIOD)(x) # positive values only
|
|
71
|
+
if F_S in target_columns:
|
|
72
|
+
outputs[F_S] = tf.keras.layers.Dense(1, activation=half_sigmoid, name="FS_logits")(x)
|
|
73
|
+
if X_S in target_columns:
|
|
74
|
+
outputs[X_S] = tf.keras.layers.Dense(1, activation=relu_plus_one, name="XS_logits")(x)
|
|
75
|
+
if UPSILON in target_columns:
|
|
76
|
+
outputs[UPSILON] = tf.keras.layers.Dense(1, activation="sigmoid", name="ups_logits")(x)
|
|
77
|
+
if X_C in target_columns:
|
|
78
|
+
outputs[X_C] = tf.keras.layers.Dense(1, activation=relu_plus_one, name="XC_logits")(x)
|
|
79
|
+
|
|
80
|
+
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
|
|
81
|
+
|
|
82
|
+
if optimizer is None:
|
|
83
|
+
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
|
|
84
|
+
|
|
85
|
+
model.compile(optimizer=optimizer,
|
|
86
|
+
loss={col: LOSS_FUNCTIONS[col] for col in outputs.keys()},
|
|
87
|
+
loss_weights={col: LOSS_WEIGHTS[col] for col in outputs.keys()},
|
|
88
|
+
metrics=metrics)
|
|
89
|
+
return model
|