bdext 0.1.61__tar.gz → 0.1.62__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {bdext-0.1.61/bdext.egg-info → bdext-0.1.62}/PKG-INFO +1 -1
  2. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/bdeissct_model.py +1 -1
  3. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/estimator_ct.py +1 -7
  4. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/scaler_fitting.py +4 -10
  5. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/train_ct.py +3 -3
  6. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/training.py +12 -12
  7. {bdext-0.1.61 → bdext-0.1.62/bdext.egg-info}/PKG-INFO +1 -1
  8. {bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/SOURCES.txt +0 -1
  9. {bdext-0.1.61 → bdext-0.1.62}/setup.py +7 -7
  10. bdext-0.1.61/bdeissct_dl/assess_R_CT.py +0 -21
  11. {bdext-0.1.61 → bdext-0.1.62}/LICENSE +0 -0
  12. {bdext-0.1.61 → bdext-0.1.62}/README.md +0 -0
  13. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/__init__.py +0 -0
  14. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/dl_model.py +0 -0
  15. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/estimator.py +0 -0
  16. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/main_covid.py +0 -0
  17. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/model_finder.py +0 -0
  18. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/model_serializer.py +0 -0
  19. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/pinball_loss.py +0 -0
  20. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/sumstat_checker.py +0 -0
  21. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/tree_encoder.py +0 -0
  22. {bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/tree_manager.py +0 -0
  23. {bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/dependency_links.txt +0 -0
  24. {bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/entry_points.txt +0 -0
  25. {bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/requires.txt +0 -0
  26. {bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/top_level.txt +0 -0
  27. {bdext-0.1.61 → bdext-0.1.62}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bdext
3
- Version: 0.1.61
3
+ Version: 0.1.62
4
4
  Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
5
5
  Home-page: https://github.com/modpath/bdeissct
6
6
  Author: Anna Zhukova
@@ -129,5 +129,5 @@ for model in (BDEISSCT, BDEISSCT1, BDEISSCT2, BDEISSCT2000):
129
129
 
130
130
 
131
131
 
132
- CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, UPSILON, X_C, F_E, F_S, X_S, LA]
132
+ CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, LA, F_E, F_S, X_S, UPSILON, X_C]
133
133
  CT_RATE_COLUMNS = [PSI, RHO]
@@ -7,12 +7,7 @@ from bdeissct_dl.model_serializer import load_model_keras, load_scaler_numpy
7
7
 
8
8
  def predict_parameters(df, model_path=MODEL_PATH):
9
9
  feature_columns = CT_EPI_COLUMNS
10
- x_indices = []
11
- for i, col in enumerate(df.columns):
12
- if col in feature_columns:
13
- x_indices.append(i)
14
-
15
- X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
10
+ X = df.loc[:, feature_columns].to_numpy(dtype=float, na_value=0)
16
11
 
17
12
  # Standardization of the input features with a
18
13
  # standard scaler
@@ -58,7 +53,6 @@ def main():
58
53
  )
59
54
  parser.add_argument('--log', default=None, type=str, help="output log file")
60
55
  parser.add_argument('--sumstats', default=None, type=str, help="input file(s) with epi parameters")
61
- parser.add_argument('--ci', action='store_true', help="calculate CIs")
62
56
  params = parser.parse_args()
63
57
 
64
58
  df = pd.read_csv(params.sumstats)
@@ -10,16 +10,13 @@ from bdeissct_dl.model_serializer import save_scaler_joblib, save_scaler_numpy
10
10
  from bdeissct_dl.training import get_data_characteristics
11
11
 
12
12
 
13
- def fit_scalers(paths, x_indices, y_indices, scaler_x=None, scaler_y=None):
13
+ def fit_scalers(paths, x_indices, scaler_x=None):
14
14
  # First pass: calculate mean and var
15
15
  for path in paths:
16
16
  df = pd.read_csv(path)
17
17
  if scaler_x:
18
18
  X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
19
19
  scaler_x.partial_fit(X)
20
- if scaler_y:
21
- Y = df.iloc[:, y_indices].to_numpy(dtype=float, na_value=0)
22
- scaler_y.partial_fit(Y)
23
20
 
24
21
 
25
22
  def main():
@@ -41,17 +38,14 @@ def main():
41
38
 
42
39
  os.makedirs(params.model_path, exist_ok=True)
43
40
 
44
- scaler_x, scaler_y = StandardScaler(), None
45
- x_indices, y_indices, _ = \
41
+ scaler_x = StandardScaler()
42
+ x_indices, _ = \
46
43
  get_data_characteristics(paths=params.train_data, target_columns=TARGET_COLUMNS_BDEISSCT)
47
- fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
44
+ fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
48
45
 
49
46
  if scaler_x is not None:
50
47
  save_scaler_joblib(scaler_x, params.model_path, suffix='x')
51
48
  save_scaler_numpy(scaler_x, params.model_path, suffix='x')
52
- if scaler_y is not None:
53
- save_scaler_joblib(scaler_y, params.model_path, suffix='y')
54
- save_scaler_numpy(scaler_y, params.model_path, suffix='y')
55
49
 
56
50
 
57
51
  if '__main__' == __name__:
@@ -86,12 +86,12 @@ def main():
86
86
  np.random.shuffle(params.val_data)
87
87
 
88
88
 
89
- x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data,
89
+ x_indices, y_col2index = get_data_characteristics(paths=params.train_data,
90
90
  feature_columns=feature_columns,
91
91
  target_columns=target_columns)
92
92
 
93
- scaler_x, scaler_y = StandardScaler(), None
94
- fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
93
+ scaler_x = StandardScaler()
94
+ fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
95
95
 
96
96
  if scaler_x is not None:
97
97
  save_scaler_joblib(scaler_x, params.model_path, suffix='ct.x')
@@ -54,20 +54,20 @@ def get_test_data(dfs=None, paths=None, scaler_x=None):
54
54
 
55
55
 
56
56
  def get_data_characteristics(paths, target_columns=TARGET_COLUMNS_BDCT, feature_columns=None):
57
- x_indices = []
58
- y_indices = []
59
- col2index = {}
57
+ col2index_y = {}
58
+ col2index_x = {}
60
59
 
61
60
  df = pd.read_csv(paths[0])
62
- feature_columns = set(get_X_columns(df.columns)) if feature_columns is None else set(feature_columns)
63
- target_columns = set(target_columns) if target_columns is not None else set()
61
+ feature_columns = get_X_columns(df.columns) if feature_columns is None else feature_columns
62
+ feature_column_set = set(feature_columns)
63
+ target_columns = target_columns if target_columns is not None else []
64
+ target_column_set = set(target_columns)
64
65
  for i, col in enumerate(df.columns):
65
- if col in feature_columns:
66
- x_indices.append(i)
67
- if col in target_columns:
68
- y_indices.append(i)
69
- col2index[col] = i
70
- return x_indices, y_indices, col2index
66
+ if col in feature_column_set:
67
+ col2index_x[col] = i
68
+ if col in target_column_set:
69
+ col2index_y[col] = i
70
+ return [col2index_x[_] for _ in feature_columns], col2index_y
71
71
 
72
72
 
73
73
  def get_train_data(target_columns, columns_x, columns_y, file_pattern=None, filenames=None, scaler_x=None, \
@@ -192,7 +192,7 @@ def main():
192
192
  np.random.shuffle(params.val_data)
193
193
 
194
194
 
195
- x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
195
+ x_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
196
196
 
197
197
  scaler_x = load_scaler_numpy(params.model_path, suffix='x')
198
198
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bdext
3
- Version: 0.1.61
3
+ Version: 0.1.62
4
4
  Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
5
5
  Home-page: https://github.com/modpath/bdeissct
6
6
  Author: Anna Zhukova
@@ -2,7 +2,6 @@ LICENSE
2
2
  README.md
3
3
  setup.py
4
4
  bdeissct_dl/__init__.py
5
- bdeissct_dl/assess_R_CT.py
6
5
  bdeissct_dl/bdeissct_model.py
7
6
  bdeissct_dl/dl_model.py
8
7
  bdeissct_dl/estimator.py
@@ -9,12 +9,12 @@ setup(
9
9
  long_description_content_type='text/markdown',
10
10
  include_package_data=True,
11
11
  package_data={'bdeissct_dl': [os.path.join('..', 'README.md'),
12
- # os.path.join('models', '*', '*.keras'),
13
- # os.path.join('models', '*', '*.h5'),
14
- # os.path.join('models', '*', '*.json'),
15
- # os.path.join('models', '*', '*.txt'),
16
- # os.path.join('models', '*', '*.npy'),
17
- # os.path.join('models', '*', '*.gz'),
12
+ os.path.join('models', '*', '*.keras'),
13
+ os.path.join('models', '*', '*.h5'),
14
+ os.path.join('models', '*', '*.json'),
15
+ os.path.join('models', '*', '*.txt'),
16
+ os.path.join('models', '*', '*.npy'),
17
+ os.path.join('models', '*', '*.gz'),
18
18
  os.path.join('..', 'LICENCE')]},
19
19
  classifiers=[
20
20
  'Development Status :: 4 - Beta',
@@ -24,7 +24,7 @@ setup(
24
24
  'Topic :: Software Development',
25
25
  'Topic :: Software Development :: Libraries :: Python Modules',
26
26
  ],
27
- version='0.1.61',
27
+ version='0.1.62',
28
28
  description='Estimation of BDEISS-CT parameters from phylogenetic trees.',
29
29
  author='Anna Zhukova',
30
30
  author_email='anna.zhukova@pasteur.fr',
@@ -1,21 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
- from bdeissct_dl.bdeissct_model import REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO, F_E, F_S, X_S, UPSILON, X_C
5
- from bdeissct_dl.tree_encoder import SCALING_FACTOR
6
-
7
- for model in ('BD', 'BDCT', 'BDEI', 'BDEICT', 'BDSS', 'BDSSCT', 'BDEISS', 'BDEISSCT'):
8
- df = pd.read_csv(f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/test/200_500/{model}/trees.csv.xz')
9
- print(model, REPRODUCTIVE_NUMBER, np.quantile(df[REPRODUCTIVE_NUMBER], [0, 0.5, 1]))
10
- print(model, INFECTION_DURATION, np.quantile(df[INFECTION_DURATION] * df[SCALING_FACTOR], [0, 0.5, 1]))
11
- print(model, RHO, np.quantile(df[RHO], [0, 0.5, 1]))
12
- if 'EI' in model:
13
- print(model, F_E, np.quantile(df[F_E], [0, 0.5, 1]))
14
- print(df[df[F_E] > 1].index)
15
- if 'SS' in model:
16
- print(model, F_S, np.quantile(df[F_S], [0, 0.5, 1]))
17
- print(model, X_S, np.quantile(df[X_S], [0, 0.5, 1]))
18
- if 'CT' in model:
19
- print(model, UPSILON, np.quantile(df[UPSILON], [0, 0.5, 1]))
20
- print(model, X_C, np.quantile(df[X_C], [0, 0.5, 1]))
21
- print('---')
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes