bdext 0.1.61__py3-none-any.whl → 0.1.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -129,5 +129,5 @@ for model in (BDEISSCT, BDEISSCT1, BDEISSCT2, BDEISSCT2000):
129
129
 
130
130
 
131
131
 
132
- CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, UPSILON, X_C, F_E, F_S, X_S, LA]
132
+ CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, LA, F_E, F_S, X_S, UPSILON, X_C]
133
133
  CT_RATE_COLUMNS = [PSI, RHO]
@@ -7,12 +7,7 @@ from bdeissct_dl.model_serializer import load_model_keras, load_scaler_numpy
7
7
 
8
8
  def predict_parameters(df, model_path=MODEL_PATH):
9
9
  feature_columns = CT_EPI_COLUMNS
10
- x_indices = []
11
- for i, col in enumerate(df.columns):
12
- if col in feature_columns:
13
- x_indices.append(i)
14
-
15
- X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
10
+ X = df.loc[:, feature_columns].to_numpy(dtype=float, na_value=0)
16
11
 
17
12
  # Standardization of the input features with a
18
13
  # standard scaler
@@ -58,7 +53,6 @@ def main():
58
53
  )
59
54
  parser.add_argument('--log', default=None, type=str, help="output log file")
60
55
  parser.add_argument('--sumstats', default=None, type=str, help="input file(s) with epi parameters")
61
- parser.add_argument('--ci', action='store_true', help="calculate CIs")
62
56
  params = parser.parse_args()
63
57
 
64
58
  df = pd.read_csv(params.sumstats)
Binary file
Binary file
Binary file
@@ -10,16 +10,13 @@ from bdeissct_dl.model_serializer import save_scaler_joblib, save_scaler_numpy
10
10
  from bdeissct_dl.training import get_data_characteristics
11
11
 
12
12
 
13
- def fit_scalers(paths, x_indices, y_indices, scaler_x=None, scaler_y=None):
13
+ def fit_scalers(paths, x_indices, scaler_x=None):
14
14
  # First pass: calculate mean and var
15
15
  for path in paths:
16
16
  df = pd.read_csv(path)
17
17
  if scaler_x:
18
18
  X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
19
19
  scaler_x.partial_fit(X)
20
- if scaler_y:
21
- Y = df.iloc[:, y_indices].to_numpy(dtype=float, na_value=0)
22
- scaler_y.partial_fit(Y)
23
20
 
24
21
 
25
22
  def main():
@@ -41,17 +38,14 @@ def main():
41
38
 
42
39
  os.makedirs(params.model_path, exist_ok=True)
43
40
 
44
- scaler_x, scaler_y = StandardScaler(), None
45
- x_indices, y_indices, _ = \
41
+ scaler_x = StandardScaler()
42
+ x_indices, _ = \
46
43
  get_data_characteristics(paths=params.train_data, target_columns=TARGET_COLUMNS_BDEISSCT)
47
- fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
44
+ fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
48
45
 
49
46
  if scaler_x is not None:
50
47
  save_scaler_joblib(scaler_x, params.model_path, suffix='x')
51
48
  save_scaler_numpy(scaler_x, params.model_path, suffix='x')
52
- if scaler_y is not None:
53
- save_scaler_joblib(scaler_y, params.model_path, suffix='y')
54
- save_scaler_numpy(scaler_y, params.model_path, suffix='y')
55
49
 
56
50
 
57
51
  if '__main__' == __name__:
bdeissct_dl/train_ct.py CHANGED
@@ -86,12 +86,12 @@ def main():
86
86
  np.random.shuffle(params.val_data)
87
87
 
88
88
 
89
- x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data,
89
+ x_indices, y_col2index = get_data_characteristics(paths=params.train_data,
90
90
  feature_columns=feature_columns,
91
91
  target_columns=target_columns)
92
92
 
93
- scaler_x, scaler_y = StandardScaler(), None
94
- fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
93
+ scaler_x = StandardScaler()
94
+ fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
95
95
 
96
96
  if scaler_x is not None:
97
97
  save_scaler_joblib(scaler_x, params.model_path, suffix='ct.x')
bdeissct_dl/training.py CHANGED
@@ -54,20 +54,20 @@ def get_test_data(dfs=None, paths=None, scaler_x=None):
54
54
 
55
55
 
56
56
  def get_data_characteristics(paths, target_columns=TARGET_COLUMNS_BDCT, feature_columns=None):
57
- x_indices = []
58
- y_indices = []
59
- col2index = {}
57
+ col2index_y = {}
58
+ col2index_x = {}
60
59
 
61
60
  df = pd.read_csv(paths[0])
62
- feature_columns = set(get_X_columns(df.columns)) if feature_columns is None else set(feature_columns)
63
- target_columns = set(target_columns) if target_columns is not None else set()
61
+ feature_columns = get_X_columns(df.columns) if feature_columns is None else feature_columns
62
+ feature_column_set = set(feature_columns)
63
+ target_columns = target_columns if target_columns is not None else []
64
+ target_column_set = set(target_columns)
64
65
  for i, col in enumerate(df.columns):
65
- if col in feature_columns:
66
- x_indices.append(i)
67
- if col in target_columns:
68
- y_indices.append(i)
69
- col2index[col] = i
70
- return x_indices, y_indices, col2index
66
+ if col in feature_column_set:
67
+ col2index_x[col] = i
68
+ if col in target_column_set:
69
+ col2index_y[col] = i
70
+ return [col2index_x[_] for _ in feature_columns], col2index_y
71
71
 
72
72
 
73
73
  def get_train_data(target_columns, columns_x, columns_y, file_pattern=None, filenames=None, scaler_x=None, \
@@ -192,7 +192,7 @@ def main():
192
192
  np.random.shuffle(params.val_data)
193
193
 
194
194
 
195
- x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
195
+ x_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
196
196
 
197
197
  scaler_x = load_scaler_numpy(params.model_path, suffix='x')
198
198
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bdext
3
- Version: 0.1.61
3
+ Version: 0.1.63
4
4
  Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
5
5
  Home-page: https://github.com/modpath/bdeissct
6
6
  Author: Anna Zhukova
@@ -0,0 +1,29 @@
1
+ README.md,sha256=Fk-VeZXo2zltZ9NXdFnnHS3ETwtiBPRiZ86ttUQUgTE,5894
2
+ bdeissct_dl/__init__.py,sha256=glAW73vlD9Abbb_Fto-sKys078qVEn5iTcx2Mq3Z72s,361
3
+ bdeissct_dl/bdeissct_model.py,sha256=--dnDxvvWcFb45bKIGv8-GUAcSnXA_x6p4D9MFsyXPk,3566
4
+ bdeissct_dl/dl_model.py,sha256=q8RFbbd2n52Y0_HMKSEHdIgBouE4LbHL5CZO3M_3G3E,7364
5
+ bdeissct_dl/estimator.py,sha256=UIQAy62I0_2HKagfck9WBGbbHU6l_WfRypP9CyeHrFE,7313
6
+ bdeissct_dl/estimator_ct.py,sha256=P5Up-NnAYS2Nen5_KPEYP04VXr5hm1Dtjq5TuW5OHjM,2604
7
+ bdeissct_dl/main_covid.py,sha256=wCeJgc4XzEPjcVCaQt_-zGiVX0wdpQhFq2qh9xQzc7w,2882
8
+ bdeissct_dl/model_finder.py,sha256=TC6EcIE6rBO6PefaYfn2reEl6HPkORbIgTzQGsDoXcU,1763
9
+ bdeissct_dl/model_serializer.py,sha256=Ojhy-fUElc4NClhJaF0EWnrAJGK0omk8e7cwwrBk3Yk,3768
10
+ bdeissct_dl/pinball_loss.py,sha256=Xg2jFDlwPOt0AhBY574lWB0yRyuA0bQRiyOYHGL6P54,1602
11
+ bdeissct_dl/scaler_fitting.py,sha256=SdU9DHj9JUAONfbcIfrvK5OYgzcwHlbVQP2rJknF8tQ,1975
12
+ bdeissct_dl/sumstat_checker.py,sha256=BHMgoqjymHc3Ic-Sv7m-zuvCeLlCt4OCDVaKDltSII0,1867
13
+ bdeissct_dl/train_ct.py,sha256=AYhotLFlb_OIoUQzir6SrvCmI1p0FhemjWkdNz0dkTQ,5392
14
+ bdeissct_dl/training.py,sha256=QBjly8e_8yqvxDbHGPw0F4LeL8BbA6UQ8SANFc0NCYY,8990
15
+ bdeissct_dl/tree_encoder.py,sha256=WrrSk_HXOF_rAKHpU7u9_i_gCmp5tz7Tk_Jvbwx0N_g,19193
16
+ bdeissct_dl/tree_manager.py,sha256=UXxUVmEkxwUhKpJeACVgiXZ8Kp1o_hiv8Qb80b6qmVU,11814
17
+ bdeissct_dl/models/CT.psi.keras,sha256=iBrIAvvry6Vew_YhJ_2kjKI3IHm2nH-MtKo1jsQSsC4,40136
18
+ bdeissct_dl/models/CT.rho.keras,sha256=CHiIFf2f22Hx_2JdPA7iT0epAqQHYWpRlzFHejp34e0,40135
19
+ bdeissct_dl/models/data_scalerct.x.gz,sha256=MvqalwyVbvXTvv03usmBt6TMyfc2ecr1Xv1XvRaLCa8,682
20
+ bdeissct_dl/models/data_scalerct.x_mean.npy,sha256=8WalRktKWWvrgAM5ViBVo2fuSfq7zTrCIVtwSeQFzM0,200
21
+ bdeissct_dl/models/data_scalerct.x_n_samples_seen.txt,sha256=APPIZMPUPTCdiHMNx0SrYFFojbLlqbaF5PPZIshGgX8,6
22
+ bdeissct_dl/models/data_scalerct.x_scale.npy,sha256=yg7kI49ycLpVepmb4LXzU9MDlXz7jILAFVna5rPkdDM,200
23
+ bdeissct_dl/models/data_scalerct.x_var.npy,sha256=wWyJxoBhm3BKZ5zi0PVejGCnZaooKj7MjaNv_W0uyrE,200
24
+ bdext-0.1.63.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
25
+ bdext-0.1.63.dist-info/METADATA,sha256=-3CPE2SesHL44cFjx1MyCzoApT-6bgAgTp_OT_DkjMY,6834
26
+ bdext-0.1.63.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
27
+ bdext-0.1.63.dist-info/entry_points.txt,sha256=En4b5js0-eCuBp0Jiqye0fte6svXbXSLiSJOW_KdzV4,286
28
+ bdext-0.1.63.dist-info/top_level.txt,sha256=z4dadFfcLghr4lwROy7QR3zEICpa-eCPT6mmcoHeEJY,12
29
+ bdext-0.1.63.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
- from bdeissct_dl.bdeissct_model import REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO, F_E, F_S, X_S, UPSILON, X_C
5
- from bdeissct_dl.tree_encoder import SCALING_FACTOR
6
-
7
- for model in ('BD', 'BDCT', 'BDEI', 'BDEICT', 'BDSS', 'BDSSCT', 'BDEISS', 'BDEISSCT'):
8
- df = pd.read_csv(f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/test/200_500/{model}/trees.csv.xz')
9
- print(model, REPRODUCTIVE_NUMBER, np.quantile(df[REPRODUCTIVE_NUMBER], [0, 0.5, 1]))
10
- print(model, INFECTION_DURATION, np.quantile(df[INFECTION_DURATION] * df[SCALING_FACTOR], [0, 0.5, 1]))
11
- print(model, RHO, np.quantile(df[RHO], [0, 0.5, 1]))
12
- if 'EI' in model:
13
- print(model, F_E, np.quantile(df[F_E], [0, 0.5, 1]))
14
- print(df[df[F_E] > 1].index)
15
- if 'SS' in model:
16
- print(model, F_S, np.quantile(df[F_S], [0, 0.5, 1]))
17
- print(model, X_S, np.quantile(df[X_S], [0, 0.5, 1]))
18
- if 'CT' in model:
19
- print(model, UPSILON, np.quantile(df[UPSILON], [0, 0.5, 1]))
20
- print(model, X_C, np.quantile(df[X_C], [0, 0.5, 1]))
21
- print('---')
@@ -1,23 +0,0 @@
1
- README.md,sha256=Fk-VeZXo2zltZ9NXdFnnHS3ETwtiBPRiZ86ttUQUgTE,5894
2
- bdeissct_dl/__init__.py,sha256=glAW73vlD9Abbb_Fto-sKys078qVEn5iTcx2Mq3Z72s,361
3
- bdeissct_dl/assess_R_CT.py,sha256=MMozDi4zMeNkzIbg8fYYl0-H_ncHtiQUJR8EkVKLV10,1095
4
- bdeissct_dl/bdeissct_model.py,sha256=4jJs_8NyvrHUve2VeIg9Hj2mc78a7ZuU6xBiI0ZkaMs,3566
5
- bdeissct_dl/dl_model.py,sha256=q8RFbbd2n52Y0_HMKSEHdIgBouE4LbHL5CZO3M_3G3E,7364
6
- bdeissct_dl/estimator.py,sha256=UIQAy62I0_2HKagfck9WBGbbHU6l_WfRypP9CyeHrFE,7313
7
- bdeissct_dl/estimator_ct.py,sha256=YgbFN1vY86Lcp_46QG1kd38CC4K2Crv2S8Hv7-Cz0Xg,2802
8
- bdeissct_dl/main_covid.py,sha256=wCeJgc4XzEPjcVCaQt_-zGiVX0wdpQhFq2qh9xQzc7w,2882
9
- bdeissct_dl/model_finder.py,sha256=TC6EcIE6rBO6PefaYfn2reEl6HPkORbIgTzQGsDoXcU,1763
10
- bdeissct_dl/model_serializer.py,sha256=Ojhy-fUElc4NClhJaF0EWnrAJGK0omk8e7cwwrBk3Yk,3768
11
- bdeissct_dl/pinball_loss.py,sha256=Xg2jFDlwPOt0AhBY574lWB0yRyuA0bQRiyOYHGL6P54,1602
12
- bdeissct_dl/scaler_fitting.py,sha256=wHKCfsFl9ig6U5W17imuKSEn9Xg295S37q0gqF-LRFI,2361
13
- bdeissct_dl/sumstat_checker.py,sha256=BHMgoqjymHc3Ic-Sv7m-zuvCeLlCt4OCDVaKDltSII0,1867
14
- bdeissct_dl/train_ct.py,sha256=axo4OjeCYbx-VBvVaCspnTmqdrORZ1f87ANvFBLvNqM,5459
15
- bdeissct_dl/training.py,sha256=WC0epgi7kW0ER6hxjoxwEW15UbFkv1OHDqrBZg4R3uk,8944
16
- bdeissct_dl/tree_encoder.py,sha256=WrrSk_HXOF_rAKHpU7u9_i_gCmp5tz7Tk_Jvbwx0N_g,19193
17
- bdeissct_dl/tree_manager.py,sha256=UXxUVmEkxwUhKpJeACVgiXZ8Kp1o_hiv8Qb80b6qmVU,11814
18
- bdext-0.1.61.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
- bdext-0.1.61.dist-info/METADATA,sha256=JHeS2OB4iplQF2vHlESLzDJn3kAoitp2M8yCIKgZIac,6834
20
- bdext-0.1.61.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
21
- bdext-0.1.61.dist-info/entry_points.txt,sha256=En4b5js0-eCuBp0Jiqye0fte6svXbXSLiSJOW_KdzV4,286
22
- bdext-0.1.61.dist-info/top_level.txt,sha256=z4dadFfcLghr4lwROy7QR3zEICpa-eCPT6mmcoHeEJY,12
23
- bdext-0.1.61.dist-info/RECORD,,
File without changes