bdext 0.1.70__py3-none-any.whl → 0.1.72__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bdeissct_dl/bdeissct_model.py +2 -1
- bdeissct_dl/dl_model.py +18 -26
- bdeissct_dl/scaler_fitting.py +0 -1
- bdeissct_dl/training.py +13 -12
- bdeissct_dl/tree_encoder.py +39 -20
- {bdext-0.1.70.dist-info → bdext-0.1.72.dist-info}/METADATA +13 -6
- bdext-0.1.72.dist-info/RECORD +17 -0
- {bdext-0.1.70.dist-info → bdext-0.1.72.dist-info}/WHEEL +1 -1
- {bdext-0.1.70.dist-info → bdext-0.1.72.dist-info}/entry_points.txt +0 -1
- bdext-0.1.70.dist-info/RECORD +0 -17
- {bdext-0.1.70.dist-info → bdext-0.1.72.dist-info/licenses}/LICENSE +0 -0
- {bdext-0.1.70.dist-info → bdext-0.1.72.dist-info}/top_level.txt +0 -0
bdeissct_dl/bdeissct_model.py
CHANGED
|
@@ -9,6 +9,7 @@ INFECTION_DURATION = 'd'
|
|
|
9
9
|
|
|
10
10
|
MU = 'mu'
|
|
11
11
|
INCUBATION_PERIOD = 'd_E'
|
|
12
|
+
INCUBATION_FRACTION = 'f_E'
|
|
12
13
|
|
|
13
14
|
F_S = 'f_S'
|
|
14
15
|
X_S = 'X_S'
|
|
@@ -54,7 +55,7 @@ MODELS = (BD, BDCT, \
|
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
TARGET_CT_COLUMNS = (UPSILON, X_C)
|
|
57
|
-
TARGET_INCUBATION_COLUMNS = (
|
|
58
|
+
TARGET_INCUBATION_COLUMNS = (INCUBATION_FRACTION,)
|
|
58
59
|
TARGET_SS_COLUMNS = (F_S, X_S)
|
|
59
60
|
TARGET_COLUMNS_BD = (REPRODUCTIVE_NUMBER, INFECTION_DURATION)
|
|
60
61
|
TARGET_COLUMNS_BDCT = TARGET_COLUMNS_BD + TARGET_CT_COLUMNS
|
bdeissct_dl/dl_model.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
1
3
|
import tensorflow as tf
|
|
2
4
|
from tensorflow.python.keras.utils.generic_utils import register_keras_serializable
|
|
3
5
|
|
|
4
6
|
from bdeissct_dl.bdeissct_model import F_S, UPSILON, REPRODUCTIVE_NUMBER, \
|
|
5
|
-
INFECTION_DURATION, X_S, X_C,
|
|
6
|
-
|
|
7
|
-
from collections import defaultdict
|
|
7
|
+
INFECTION_DURATION, X_S, X_C, INCUBATION_FRACTION
|
|
8
8
|
|
|
9
|
-
LEARNING_RATE = 0.
|
|
9
|
+
LEARNING_RATE = 0.01
|
|
10
10
|
|
|
11
11
|
@register_keras_serializable(package="bdeissct_dl", name="half_sigmoid")
|
|
12
12
|
def half_sigmoid(x):
|
|
@@ -19,16 +19,6 @@ def relu_plus_one(x):
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
LOSS_FUNCTIONS = defaultdict(lambda: "mean_squared_error")
|
|
22
|
-
LOSS_FUNCTIONS.update({
|
|
23
|
-
REPRODUCTIVE_NUMBER: "mean_squared_error",
|
|
24
|
-
INFECTION_DURATION: "mean_squared_error",
|
|
25
|
-
INCUBATION_PERIOD: "mean_squared_error",
|
|
26
|
-
UPSILON: 'mean_squared_error',
|
|
27
|
-
RHO: 'mean_squared_error',
|
|
28
|
-
X_C: "mean_squared_error",
|
|
29
|
-
F_S: 'mean_squared_error',
|
|
30
|
-
X_S: "mean_squared_error",
|
|
31
|
-
})
|
|
32
22
|
|
|
33
23
|
|
|
34
24
|
def build_model(target_columns, n_x, optimizer=None, metrics=None):
|
|
@@ -46,12 +36,14 @@ def build_model(target_columns, n_x, optimizer=None, metrics=None):
|
|
|
46
36
|
inputs = tf.keras.Input(shape=(n_x,))
|
|
47
37
|
|
|
48
38
|
# (Your hidden layers go here)
|
|
49
|
-
x = tf.keras.layers.Dense(128, activation='elu', name=f'
|
|
50
|
-
x = tf.keras.layers.Dropout(0.5, name='dropout1_50')(x)
|
|
51
|
-
x = tf.keras.layers.Dense(64, activation='elu', name=f'
|
|
52
|
-
x = tf.keras.layers.Dropout(0.5, name='dropout2_50')(x)
|
|
53
|
-
x = tf.keras.layers.Dense(32, activation='elu', name=f'
|
|
54
|
-
x = tf.keras.layers.Dense(16, activation='elu', name=f'
|
|
39
|
+
x = tf.keras.layers.Dense(128, activation='elu', name=f'layer1_dense128_elu')(inputs)
|
|
40
|
+
# x = tf.keras.layers.Dropout(0.5, name='dropout1_50')(x)
|
|
41
|
+
x = tf.keras.layers.Dense(64, activation='elu', name=f'layer2_dense64_elu')(x)
|
|
42
|
+
# x = tf.keras.layers.Dropout(0.5, name='dropout2_50')(x)
|
|
43
|
+
x = tf.keras.layers.Dense(32, activation='elu', name=f'layer3_dense32elu')(x)
|
|
44
|
+
x = tf.keras.layers.Dense(16, activation='elu', name=f'layer4_dense16_elu')(x)
|
|
45
|
+
x = tf.keras.layers.Dense(8, activation='elu', name=f'layer5_dense8_elu')(x)
|
|
46
|
+
x = tf.keras.layers.Dense(4, activation='elu', name=f'layer5_dense4_elu')(x)
|
|
55
47
|
|
|
56
48
|
outputs = {}
|
|
57
49
|
|
|
@@ -59,16 +51,16 @@ def build_model(target_columns, n_x, optimizer=None, metrics=None):
|
|
|
59
51
|
outputs[REPRODUCTIVE_NUMBER] = tf.keras.layers.Dense(1, activation="relu", name=REPRODUCTIVE_NUMBER)(x) # positive values only
|
|
60
52
|
if INFECTION_DURATION in target_columns:
|
|
61
53
|
outputs[INFECTION_DURATION] = tf.keras.layers.Dense(1, activation="relu", name=INFECTION_DURATION)(x) # positive values only
|
|
62
|
-
if
|
|
63
|
-
outputs[
|
|
54
|
+
if INCUBATION_FRACTION in target_columns:
|
|
55
|
+
outputs[INCUBATION_FRACTION] = tf.keras.layers.Dense(1, activation="sigmoid", name=INCUBATION_FRACTION)(x) # positive values only
|
|
64
56
|
if F_S in target_columns:
|
|
65
|
-
outputs[F_S] = tf.keras.layers.Dense(1, activation=half_sigmoid, name=
|
|
57
|
+
outputs[F_S] = tf.keras.layers.Dense(1, activation=half_sigmoid, name=F_S)(x)
|
|
66
58
|
if X_S in target_columns:
|
|
67
|
-
outputs[X_S] = tf.keras.layers.Dense(1, activation=relu_plus_one, name=
|
|
59
|
+
outputs[X_S] = tf.keras.layers.Dense(1, activation=relu_plus_one, name=X_S)(x)
|
|
68
60
|
if UPSILON in target_columns:
|
|
69
|
-
outputs[UPSILON] = tf.keras.layers.Dense(1, activation="sigmoid", name=
|
|
61
|
+
outputs[UPSILON] = tf.keras.layers.Dense(1, activation="sigmoid", name=UPSILON)(x)
|
|
70
62
|
if X_C in target_columns:
|
|
71
|
-
outputs[X_C] = tf.keras.layers.Dense(1, activation=relu_plus_one, name=
|
|
63
|
+
outputs[X_C] = tf.keras.layers.Dense(1, activation=relu_plus_one, name=X_C)(x)
|
|
72
64
|
|
|
73
65
|
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
|
|
74
66
|
|
bdeissct_dl/scaler_fitting.py
CHANGED
|
@@ -27,7 +27,6 @@ def main():
|
|
|
27
27
|
parser = \
|
|
28
28
|
argparse.ArgumentParser(description="Fit a BD(EI)(SS)(CT) data scaler.")
|
|
29
29
|
parser.add_argument('--train_data', type=str, nargs='+',
|
|
30
|
-
# default=[f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/training/500_1000/{model}/{i}/trees.csv.xz' for i in range(120) for model in [BD, BDCT, BDEI, BDEICT, BDSS, BDSSCT, BDEISS, BDEISSCT]],
|
|
31
30
|
help="path to the files where the encoded training data are stored")
|
|
32
31
|
parser.add_argument('--model_path', default=MODEL_PATH, type=str,
|
|
33
32
|
help="path to the folder where the scaler should be stored.")
|
bdeissct_dl/training.py
CHANGED
|
@@ -6,17 +6,17 @@ import pandas as pd
|
|
|
6
6
|
import tensorflow as tf
|
|
7
7
|
|
|
8
8
|
from bdeissct_dl import MODEL_PATH, BATCH_SIZE, EPOCHS
|
|
9
|
-
from bdeissct_dl.bdeissct_model import MODEL2TARGET_COLUMNS, UPSILON, X_C, KAPPA,
|
|
9
|
+
from bdeissct_dl.bdeissct_model import MODEL2TARGET_COLUMNS, UPSILON, X_C, KAPPA, INCUBATION_FRACTION, F_S, \
|
|
10
10
|
X_S, TARGET_COLUMNS_BDCT, REPRODUCTIVE_NUMBER, INFECTION_DURATION
|
|
11
11
|
from bdeissct_dl.dl_model import build_model
|
|
12
12
|
from bdeissct_dl.model_serializer import save_model_keras, load_scaler_numpy, \
|
|
13
13
|
load_model_keras
|
|
14
14
|
from bdeissct_dl.tree_encoder import SCALING_FACTOR, STATS
|
|
15
15
|
|
|
16
|
-
FEATURE_COLUMNS = [_ for _ in STATS if _ not in {'n_trees', 'n_tips', 'n_inodes', 'len_forest',
|
|
16
|
+
FEATURE_COLUMNS = [_ for _ in STATS if _ not in {#'n_trees', 'n_tips', 'n_inodes', 'len_forest',
|
|
17
17
|
REPRODUCTIVE_NUMBER, INFECTION_DURATION,
|
|
18
18
|
UPSILON, X_C, KAPPA,
|
|
19
|
-
|
|
19
|
+
INCUBATION_FRACTION,
|
|
20
20
|
F_S, X_S,
|
|
21
21
|
SCALING_FACTOR}]
|
|
22
22
|
|
|
@@ -110,8 +110,8 @@ def get_train_data(target_columns, columns_x, columns_y, file_pattern=None, file
|
|
|
110
110
|
if X_C in target_columns:
|
|
111
111
|
train_labels[X_C] = Y[:, col_i]
|
|
112
112
|
col_i += 1
|
|
113
|
-
if
|
|
114
|
-
train_labels[
|
|
113
|
+
if INCUBATION_FRACTION in target_columns:
|
|
114
|
+
train_labels[INCUBATION_FRACTION] = Y[:, col_i]
|
|
115
115
|
col_i += 1
|
|
116
116
|
if F_S in target_columns:
|
|
117
117
|
train_labels[F_S] = Y[:, col_i]
|
|
@@ -141,14 +141,8 @@ def main():
|
|
|
141
141
|
parser = \
|
|
142
142
|
argparse.ArgumentParser(description="Train a BD(EI)(SS)(CT) model.")
|
|
143
143
|
parser.add_argument('--train_data', type=str, nargs='+',
|
|
144
|
-
# default=[f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/train/2000_5000/BDEI/{i}/trees.csv.xz' for i in range(100)] \
|
|
145
|
-
# + [f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/training/2000_5000/BD/{i}/trees.csv.xz' for i in range(10)]
|
|
146
|
-
# ,
|
|
147
144
|
help="path to the files where the encoded training data are stored")
|
|
148
145
|
parser.add_argument('--val_data', type=str, nargs='+',
|
|
149
|
-
# default=[f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/train/2000_5000/BDEI/{i}/trees.csv.xz' for i in range(100, 120)] \
|
|
150
|
-
# + [f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/train/2000_5000/BD/{i}/trees.csv.xz' for i in range(10, 12)]
|
|
151
|
-
# ,
|
|
152
146
|
help="path to the files where the encoded validation data are stored")
|
|
153
147
|
|
|
154
148
|
parser.add_argument('--epochs', type=int, default=EPOCHS, help='number of epochs to train the model')
|
|
@@ -178,6 +172,13 @@ def main():
|
|
|
178
172
|
|
|
179
173
|
|
|
180
174
|
for col, y_idx in y_col2index.items():
|
|
175
|
+
try:
|
|
176
|
+
if load_model_keras(path=params.model_path, model_name=f'{params.model_name}.{col}'):
|
|
177
|
+
print(f'Model {params.model_name}.{col} already exists at {params.model_path}. Skipping training for this target.')
|
|
178
|
+
continue
|
|
179
|
+
except:
|
|
180
|
+
pass
|
|
181
|
+
|
|
181
182
|
print(f'Training to predict {col} with {params.model_name}...')
|
|
182
183
|
|
|
183
184
|
if params.base_model_name is not None:
|
|
@@ -194,7 +195,7 @@ def main():
|
|
|
194
195
|
scaler_x=scaler_x, batch_size=BATCH_SIZE, shuffle=True)
|
|
195
196
|
|
|
196
197
|
#early stopping to avoid overfitting
|
|
197
|
-
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=
|
|
198
|
+
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
|
|
198
199
|
|
|
199
200
|
#Training of the Network, with an independent validation set
|
|
200
201
|
model.fit(ds_train, verbose=1, epochs=params.epochs, validation_data=ds_val, callbacks=[early_stop])
|
bdeissct_dl/tree_encoder.py
CHANGED
|
@@ -3,17 +3,19 @@ import os
|
|
|
3
3
|
from glob import iglob
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from treesimulator.mtbd_models import INCUBATION_FRACTION
|
|
6
7
|
from treesumstats import FeatureCalculator, FeatureRegistry, FeatureManager
|
|
7
8
|
from treesumstats.balance_sumstats import BalanceFeatureCalculator
|
|
8
9
|
from treesumstats.basic_sumstats import BasicFeatureCalculator
|
|
9
10
|
from treesumstats.branch_sumstats import BranchFeatureCalculator
|
|
10
11
|
from treesumstats.event_time_sumstats import EventTimeFeatureCalculator
|
|
11
12
|
from treesumstats.ltt_sumstats import LTTFeatureCalculator
|
|
13
|
+
from treesumstats.resolution_sumstats import ResolutionFeatureCalculator
|
|
12
14
|
from treesumstats.subtree_sumstats import SubtreeFeatureCalculator
|
|
13
15
|
from treesumstats.transmission_chain_sumstats import TransmissionChainFeatureCalculator
|
|
14
16
|
|
|
15
17
|
from bdeissct_dl.bdeissct_model import RHO, UPSILON, X_C, KAPPA, F_S, X_S, RATE_PARAMETERS, \
|
|
16
|
-
TIME_PARAMETERS, INFECTION_DURATION, REPRODUCTIVE_NUMBER, INCUBATION_PERIOD
|
|
18
|
+
TIME_PARAMETERS, INFECTION_DURATION, REPRODUCTIVE_NUMBER, INCUBATION_FRACTION, INCUBATION_PERIOD
|
|
17
19
|
from bdeissct_dl.tree_manager import read_forest, rescale_forest_to_avg_brlen
|
|
18
20
|
|
|
19
21
|
TARGET_AVG_BL = 1
|
|
@@ -88,7 +90,7 @@ class BDEISSCTFeatureCalculator(FeatureCalculator):
|
|
|
88
90
|
pass
|
|
89
91
|
|
|
90
92
|
def feature_names(self):
|
|
91
|
-
return [REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO,
|
|
93
|
+
return [REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO, INCUBATION_FRACTION, F_S, X_S, UPSILON, X_C, KAPPA, \
|
|
92
94
|
SCALING_FACTOR]
|
|
93
95
|
|
|
94
96
|
def set_forest(self, forest, **kwargs):
|
|
@@ -116,8 +118,8 @@ class BDEISSCTFeatureCalculator(FeatureCalculator):
|
|
|
116
118
|
return 'reproduction number.'
|
|
117
119
|
if INFECTION_DURATION == feature_name:
|
|
118
120
|
return 'infection duration.'
|
|
119
|
-
if
|
|
120
|
-
return 'incubation
|
|
121
|
+
if INCUBATION_FRACTION == feature_name:
|
|
122
|
+
return 'incubation fraction.'
|
|
121
123
|
return None
|
|
122
124
|
|
|
123
125
|
|
|
@@ -128,6 +130,7 @@ FeatureRegistry.register(TransmissionChainFeatureCalculator(CHAIN_LEN, percentil
|
|
|
128
130
|
FeatureRegistry.register(LTTFeatureCalculator(N_LTT_COORDINATES))
|
|
129
131
|
FeatureRegistry.register(BalanceFeatureCalculator())
|
|
130
132
|
FeatureRegistry.register(SubtreeFeatureCalculator())
|
|
133
|
+
FeatureRegistry.register(ResolutionFeatureCalculator())
|
|
131
134
|
FeatureRegistry.register(BDEISSCTFeatureCalculator())
|
|
132
135
|
|
|
133
136
|
BRLEN_STATS = ['brlen_inode_mean', 'brlen_inode_median', 'brlen_inode_var',
|
|
@@ -198,24 +201,29 @@ BALANCE_STATS = ['colless_normalized',
|
|
|
198
201
|
'frac_inodes_imbalanced', 'imbalance_avg']
|
|
199
202
|
|
|
200
203
|
TOPOLOGY_STATS = ['frac_tips_in_2', 'frac_tips_in_3L', 'frac_tips_in_4L', 'frac_tips_in_4B', 'frac_tips_in_O',
|
|
204
|
+
'frac_tips_in_3U', 'frac_tips_in_4U', 'frac_tips_in_4U3U1', 'frac_tips_in_4U211',
|
|
201
205
|
'frac_inodes_with_sibling_inodes']
|
|
202
206
|
|
|
203
|
-
TIME_DIFF_STATS = ['time_diff_in_2_real_mean', 'time_diff_in_3L_real_mean', 'time_diff_in_I_real_mean',
|
|
204
|
-
'time_diff_in_2_real_min', 'time_diff_in_3L_real_min', 'time_diff_in_I_real_min',
|
|
205
|
-
'time_diff_in_2_real_max', 'time_diff_in_3L_real_max', 'time_diff_in_I_real_max',
|
|
206
|
-
'time_diff_in_2_real_var', 'time_diff_in_3L_real_var', 'time_diff_in_I_real_var',
|
|
207
|
-
'time_diff_in_2_real_median', 'time_diff_in_3L_real_median', 'time_diff_in_I_real_median',
|
|
207
|
+
TIME_DIFF_STATS = ['time_diff_in_2_real_mean', 'time_diff_in_3L_real_mean', 'time_diff_in_3U_real_mean', 'time_diff_in_4U_real_mean', 'time_diff_in_I_real_mean',
|
|
208
|
+
'time_diff_in_2_real_min', 'time_diff_in_3L_real_min', 'time_diff_in_3U_real_min', 'time_diff_in_4U_real_min', 'time_diff_in_I_real_min',
|
|
209
|
+
'time_diff_in_2_real_max', 'time_diff_in_3L_real_max', 'time_diff_in_3U_real_max', 'time_diff_in_4U_real_max', 'time_diff_in_I_real_max',
|
|
210
|
+
'time_diff_in_2_real_var', 'time_diff_in_3L_real_var', 'time_diff_in_3U_real_var', 'time_diff_in_4U_real_var', 'time_diff_in_I_real_var',
|
|
211
|
+
'time_diff_in_2_real_median', 'time_diff_in_3L_real_median', 'time_diff_in_3U_real_median', 'time_diff_in_4U_real_median', 'time_diff_in_I_real_median',
|
|
208
212
|
#
|
|
209
|
-
'time_diff_in_2_random_mean', 'time_diff_in_3L_random_mean', 'time_diff_in_I_random_mean',
|
|
210
|
-
'time_diff_in_2_random_min', 'time_diff_in_3L_random_min', 'time_diff_in_I_random_min',
|
|
211
|
-
'time_diff_in_2_random_max', 'time_diff_in_3L_random_max', 'time_diff_in_I_random_max',
|
|
212
|
-
'time_diff_in_2_random_var', 'time_diff_in_3L_random_var', 'time_diff_in_I_random_var',
|
|
213
|
-
'time_diff_in_2_random_median', 'time_diff_in_3L_random_median', 'time_diff_in_I_random_median',
|
|
213
|
+
'time_diff_in_2_random_mean', 'time_diff_in_3L_random_mean', 'time_diff_in_3U_random_mean', 'time_diff_in_4U_random_mean', 'time_diff_in_I_random_mean',
|
|
214
|
+
'time_diff_in_2_random_min', 'time_diff_in_3L_random_min', 'time_diff_in_3U_random_min', 'time_diff_in_4U_random_min', 'time_diff_in_I_random_min',
|
|
215
|
+
'time_diff_in_2_random_max', 'time_diff_in_3L_random_max', 'time_diff_in_3U_random_max', 'time_diff_in_4U_random_max', 'time_diff_in_I_random_max',
|
|
216
|
+
'time_diff_in_2_random_var', 'time_diff_in_3L_random_var', 'time_diff_in_3U_random_var', 'time_diff_in_4U_random_var', 'time_diff_in_I_random_var',
|
|
217
|
+
'time_diff_in_2_random_median', 'time_diff_in_3L_random_median', 'time_diff_in_3U_random_median', 'time_diff_in_4U_random_median', 'time_diff_in_I_random_median',
|
|
214
218
|
#
|
|
215
219
|
'time_diff_in_2_real_perc1', 'time_diff_in_2_real_perc5', 'time_diff_in_2_real_perc10',
|
|
216
220
|
'time_diff_in_2_real_perc25',
|
|
217
221
|
'time_diff_in_3L_real_perc1', 'time_diff_in_3L_real_perc5', 'time_diff_in_3L_real_perc10',
|
|
218
222
|
'time_diff_in_3L_real_perc25',
|
|
223
|
+
'time_diff_in_3U_real_perc1', 'time_diff_in_3U_real_perc5', 'time_diff_in_3U_real_perc10',
|
|
224
|
+
'time_diff_in_3U_real_perc25',
|
|
225
|
+
'time_diff_in_4U_real_perc1', 'time_diff_in_4U_real_perc5', 'time_diff_in_4U_real_perc10',
|
|
226
|
+
'time_diff_in_4U_real_perc25',
|
|
219
227
|
'time_diff_in_I_real_perc75', 'time_diff_in_I_real_perc90', 'time_diff_in_I_real_perc95',
|
|
220
228
|
'time_diff_in_I_real_perc99',
|
|
221
229
|
#
|
|
@@ -223,22 +231,33 @@ TIME_DIFF_STATS = ['time_diff_in_2_real_mean', 'time_diff_in_3L_real_mean', 'tim
|
|
|
223
231
|
'time_diff_in_2_random_perc25',
|
|
224
232
|
'time_diff_in_3L_random_perc1', 'time_diff_in_3L_random_perc5', 'time_diff_in_3L_random_perc10',
|
|
225
233
|
'time_diff_in_3L_random_perc25',
|
|
234
|
+
'time_diff_in_3U_random_perc1', 'time_diff_in_3U_random_perc5', 'time_diff_in_3U_random_perc10',
|
|
235
|
+
'time_diff_in_3U_random_perc25',
|
|
236
|
+
'time_diff_in_4U_random_perc1', 'time_diff_in_4U_random_perc5', 'time_diff_in_4U_random_perc10',
|
|
237
|
+
'time_diff_in_4U_random_perc25',
|
|
226
238
|
'time_diff_in_I_random_perc75', 'time_diff_in_I_random_perc90', 'time_diff_in_I_random_perc95',
|
|
227
239
|
'time_diff_in_I_random_perc99',
|
|
228
240
|
#
|
|
229
241
|
'time_diff_in_2_random_vs_real_frac_less', 'time_diff_in_3L_random_vs_real_frac_less',
|
|
242
|
+
'time_diff_in_3U_random_vs_real_frac_less', 'time_diff_in_4U_random_vs_real_frac_less',
|
|
230
243
|
'time_diff_in_I_random_vs_real_frac_more',
|
|
231
244
|
'time_diff_in_2_random_vs_real_pval_less', 'time_diff_in_3L_random_vs_real_pval_less',
|
|
245
|
+
'time_diff_in_3U_random_vs_real_pval_less', 'time_diff_in_4U_random_vs_real_pval_less',
|
|
232
246
|
'time_diff_in_I_random_vs_real_pval_more']
|
|
233
247
|
|
|
248
|
+
RESOLUTION_STATS = ['n_children_mean',
|
|
249
|
+
'n_children_var',
|
|
250
|
+
'frac_inodes_resolved',
|
|
251
|
+
'frac_inodes_resolved_non_zero']
|
|
252
|
+
|
|
234
253
|
EPI_STATS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO,
|
|
235
254
|
UPSILON, X_C, KAPPA,
|
|
236
|
-
|
|
255
|
+
INCUBATION_FRACTION,
|
|
237
256
|
F_S, X_S]
|
|
238
257
|
|
|
239
|
-
STATS = ['n_tips'] \
|
|
240
|
-
+ BRLEN_STATS + TIME_STATS + CHAIN_STATS + LTT_STATS + BALANCE_STATS + TOPOLOGY_STATS
|
|
241
|
-
+ EPI_STATS + [SCALING_FACTOR]
|
|
258
|
+
STATS = ['n_tips', 'n_inodes'] \
|
|
259
|
+
+ BRLEN_STATS + TIME_STATS + CHAIN_STATS + LTT_STATS + BALANCE_STATS + TOPOLOGY_STATS \
|
|
260
|
+
+ TIME_DIFF_STATS + RESOLUTION_STATS + EPI_STATS + [SCALING_FACTOR]
|
|
242
261
|
|
|
243
262
|
def forest2sumstat_df(forest, rho, R=0, d=0, x_c=0, upsilon=0, kappa=1, d_inc=0, f_ss=0, x_ss=1,
|
|
244
263
|
target_avg_brlen=TARGET_AVG_BL):
|
|
@@ -267,7 +286,7 @@ def forest2sumstat_df(forest, rho, R=0, d=0, x_c=0, upsilon=0, kappa=1, d_inc=0,
|
|
|
267
286
|
|
|
268
287
|
kwargs = {SCALING_FACTOR: scaling_factor,
|
|
269
288
|
REPRODUCTIVE_NUMBER: R, INFECTION_DURATION: d, RHO: rho,
|
|
270
|
-
|
|
289
|
+
INCUBATION_FRACTION: d_inc / d,
|
|
271
290
|
F_S: f_ss, X_S: x_ss,
|
|
272
291
|
X_C: x_c, UPSILON: upsilon, KAPPA: kappa}
|
|
273
292
|
scale(kwargs, scaling_factor)
|
|
@@ -322,7 +341,7 @@ def save_forests_as_sumstats(output, nwks=None, logs=None, patterns=None, target
|
|
|
322
341
|
kwargs = {SCALING_FACTOR: scaling_factor}
|
|
323
342
|
kwargs[REPRODUCTIVE_NUMBER], kwargs[INFECTION_DURATION], kwargs[RHO] = R, d, rho
|
|
324
343
|
kwargs[UPSILON], kwargs[KAPPA], kwargs[X_C] = upsilon, kappa, x_c
|
|
325
|
-
kwargs[
|
|
344
|
+
kwargs[INCUBATION_FRACTION] = d_inc / d
|
|
326
345
|
kwargs[F_S], kwargs[X_S] = f_ss, x_ss
|
|
327
346
|
|
|
328
347
|
scale(kwargs, scaling_factor)
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: bdext
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.72
|
|
4
4
|
Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
|
|
5
5
|
Home-page: https://github.com/modpath/bdeissct
|
|
6
6
|
Author: Anna Zhukova
|
|
7
7
|
Author-email: anna.zhukova@pasteur.fr
|
|
8
|
-
License: UNKNOWN
|
|
9
8
|
Keywords: phylogenetics,birth-death model,incubation,super-spreading,contact tracing
|
|
10
|
-
Platform: UNKNOWN
|
|
11
9
|
Classifier: Development Status :: 4 - Beta
|
|
12
10
|
Classifier: Environment :: Console
|
|
13
11
|
Classifier: Intended Audience :: Developers
|
|
@@ -15,6 +13,7 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
15
13
|
Classifier: Topic :: Software Development
|
|
16
14
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
15
|
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
18
17
|
Requires-Dist: tensorflow==2.19.0
|
|
19
18
|
Requires-Dist: six
|
|
20
19
|
Requires-Dist: ete3
|
|
@@ -24,6 +23,16 @@ Requires-Dist: biopython
|
|
|
24
23
|
Requires-Dist: scikit-learn==1.5.2
|
|
25
24
|
Requires-Dist: pandas==2.2.3
|
|
26
25
|
Requires-Dist: treesumstats==0.7
|
|
26
|
+
Dynamic: author
|
|
27
|
+
Dynamic: author-email
|
|
28
|
+
Dynamic: classifier
|
|
29
|
+
Dynamic: description
|
|
30
|
+
Dynamic: description-content-type
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: keywords
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
Dynamic: requires-dist
|
|
35
|
+
Dynamic: summary
|
|
27
36
|
|
|
28
37
|
# bdext
|
|
29
38
|
|
|
@@ -236,5 +245,3 @@ The other parameters are estimated from a time-scaled phylogenetic tree.
|
|
|
236
245
|
|
|
237
246
|
[//]: # ()
|
|
238
247
|
[//]: # ()
|
|
239
|
-
|
|
240
|
-
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
README.md,sha256=Ngj8bt0Yu3LUsvwblmMtUqqjvGyqxv6ku2_cYCb5_DQ,6539
|
|
2
|
+
bdeissct_dl/__init__.py,sha256=QPEiIP-xVqGQgydeqN_9AZgT26IYWeJC4-JlHnd8Rjo,296
|
|
3
|
+
bdeissct_dl/bdeissct_model.py,sha256=um1nEQf4uym_jkrkuUjpvIbVc7VRfmAY3gnW9xgXv6I,2016
|
|
4
|
+
bdeissct_dl/dl_model.py,sha256=gl6uBK6rwEJxWgzInQfyn-1UWbePQJPDWwc7Lwq5F0U,3250
|
|
5
|
+
bdeissct_dl/estimator.py,sha256=QBWA8R0pBPZPd3JvItdJS2lN1J3VqvdJqBMzCi-NADs,3336
|
|
6
|
+
bdeissct_dl/model_serializer.py,sha256=s1yBzQjhtr-w7eT8bTsNkG9_xnYRZrUc3HkeOzNZpQY,2464
|
|
7
|
+
bdeissct_dl/scaler_fitting.py,sha256=wvHLtLmg5QP58NKSUnYBOQ4TzAtTAi_AfLVaxKXfJzM,1522
|
|
8
|
+
bdeissct_dl/sumstat_checker.py,sha256=TQ0nb86-BXmusqgMnOJusLpR4ul3N3Hi886IWUovrMI,1846
|
|
9
|
+
bdeissct_dl/training.py,sha256=EvD1n3uiaUb8gubxwhP1kt4xUW2hokHB7ywoUScCtmI,7979
|
|
10
|
+
bdeissct_dl/tree_encoder.py,sha256=WAwn3e1lPiksZNCnwTt9wsoEX3rgF8O0b2vOx7g0gUY,20286
|
|
11
|
+
bdeissct_dl/tree_manager.py,sha256=UXxUVmEkxwUhKpJeACVgiXZ8Kp1o_hiv8Qb80b6qmVU,11814
|
|
12
|
+
bdext-0.1.72.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
+
bdext-0.1.72.dist-info/METADATA,sha256=5ix9OE4DIpC4K4xoW61JNPePZro3J2K5tt1AcGR6puc,7676
|
|
14
|
+
bdext-0.1.72.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
15
|
+
bdext-0.1.72.dist-info/entry_points.txt,sha256=DP-XVnUjSLJt-PHOJUurpkEUkkicdtGoEuGVeVb0gGg,207
|
|
16
|
+
bdext-0.1.72.dist-info/top_level.txt,sha256=z4dadFfcLghr4lwROy7QR3zEICpa-eCPT6mmcoHeEJY,12
|
|
17
|
+
bdext-0.1.72.dist-info/RECORD,,
|
bdext-0.1.70.dist-info/RECORD
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
README.md,sha256=Ngj8bt0Yu3LUsvwblmMtUqqjvGyqxv6ku2_cYCb5_DQ,6539
|
|
2
|
-
bdeissct_dl/__init__.py,sha256=QPEiIP-xVqGQgydeqN_9AZgT26IYWeJC4-JlHnd8Rjo,296
|
|
3
|
-
bdeissct_dl/bdeissct_model.py,sha256=sQclYN5V8utw6wEMDN0_Ua-0NeuyuWHG_e0_jQIUe8Q,1986
|
|
4
|
-
bdeissct_dl/dl_model.py,sha256=wpwlUVy6kOhPIsT1zg-Us2_bdnntxdnCbNQB4UxYzTg,3433
|
|
5
|
-
bdeissct_dl/estimator.py,sha256=QBWA8R0pBPZPd3JvItdJS2lN1J3VqvdJqBMzCi-NADs,3336
|
|
6
|
-
bdeissct_dl/model_serializer.py,sha256=s1yBzQjhtr-w7eT8bTsNkG9_xnYRZrUc3HkeOzNZpQY,2464
|
|
7
|
-
bdeissct_dl/scaler_fitting.py,sha256=9X0O7-Wc9xGTI-iF-Pfp1PPoW7j01wZUfJVZf8ky-IU,1752
|
|
8
|
-
bdeissct_dl/sumstat_checker.py,sha256=TQ0nb86-BXmusqgMnOJusLpR4ul3N3Hi886IWUovrMI,1846
|
|
9
|
-
bdeissct_dl/training.py,sha256=H5wA3V72nhc9Km7kvKmzjCYw0N1itMGDbj9c-Uat5BU,8350
|
|
10
|
-
bdeissct_dl/tree_encoder.py,sha256=V-7_Kis9x9JacI_mF7rWRGGKvxn7AWFCto7LkgRawBw,18286
|
|
11
|
-
bdeissct_dl/tree_manager.py,sha256=UXxUVmEkxwUhKpJeACVgiXZ8Kp1o_hiv8Qb80b6qmVU,11814
|
|
12
|
-
bdext-0.1.70.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
-
bdext-0.1.70.dist-info/METADATA,sha256=USWAUX3zunofN9x2-6E63lFVrNAdRx2cZf2Sc8HKGe8,7479
|
|
14
|
-
bdext-0.1.70.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
15
|
-
bdext-0.1.70.dist-info/entry_points.txt,sha256=lcAwyk-Fc0G_w4Ex7KDivh7h1tzSA99PRMcy971b-nM,208
|
|
16
|
-
bdext-0.1.70.dist-info/top_level.txt,sha256=z4dadFfcLghr4lwROy7QR3zEICpa-eCPT6mmcoHeEJY,12
|
|
17
|
-
bdext-0.1.70.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|