PyPI - bdext - Versions diffs - 0.1.61__tar.gz → 0.1.62__tar.gz - Mend

bdext 0.1.61tar.gz → 0.1.62tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{bdext-0.1.61/bdext.egg-info → bdext-0.1.62}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bdext
-Version: 0.1.61
+Version: 0.1.62
 Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
 Home-page: https://github.com/modpath/bdeissct
 Author: Anna Zhukova

{bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/bdeissct_model.py RENAMED Viewed

@@ -129,5 +129,5 @@ for model in (BDEISSCT, BDEISSCT1, BDEISSCT2, BDEISSCT2000):
-CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, UPSILON, X_C, F_E, F_S, X_S, LA]
+CT_EPI_COLUMNS = [REPRODUCTIVE_NUMBER, INFECTION_DURATION, SAMPLING_FRACTION, LA, F_E, F_S, X_S, UPSILON, X_C]
 CT_RATE_COLUMNS = [PSI, RHO]

{bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/estimator_ct.py RENAMED Viewed

@@ -7,12 +7,7 @@ from bdeissct_dl.model_serializer import load_model_keras, load_scaler_numpy
 def predict_parameters(df, model_path=MODEL_PATH):
     feature_columns = CT_EPI_COLUMNS
-    x_indices = []
-    for i, col in enumerate(df.columns):
-        if col in feature_columns:
-            x_indices.append(i)
-    X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
+    X = df.loc[:, feature_columns].to_numpy(dtype=float, na_value=0)
     # Standardization of the input features with a
     # standard scaler
@@ -58,7 +53,6 @@ def main():
                              )
     parser.add_argument('--log', default=None, type=str, help="output log file")
     parser.add_argument('--sumstats', default=None, type=str, help="input file(s) with epi parameters")
-    parser.add_argument('--ci', action='store_true', help="calculate CIs")
     params = parser.parse_args()
     df = pd.read_csv(params.sumstats)

{bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/scaler_fitting.py RENAMED Viewed

@@ -10,16 +10,13 @@ from bdeissct_dl.model_serializer import save_scaler_joblib, save_scaler_numpy
 from bdeissct_dl.training import get_data_characteristics
-def fit_scalers(paths, x_indices, y_indices, scaler_x=None, scaler_y=None):
+def fit_scalers(paths, x_indices, scaler_x=None):
     # First pass: calculate mean and var
     for path in paths:
         df = pd.read_csv(path)
         if scaler_x:
             X = df.iloc[:, x_indices].to_numpy(dtype=float, na_value=0)
             scaler_x.partial_fit(X)
-        if scaler_y:
-            Y = df.iloc[:, y_indices].to_numpy(dtype=float, na_value=0)
-            scaler_y.partial_fit(Y)
 def main():
@@ -41,17 +38,14 @@ def main():
     os.makedirs(params.model_path, exist_ok=True)
-    scaler_x, scaler_y = StandardScaler(), None
-    x_indices, y_indices, _ = \
+    scaler_x = StandardScaler()
+    x_indices, _ = \
         get_data_characteristics(paths=params.train_data, target_columns=TARGET_COLUMNS_BDEISSCT)
-    fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
+    fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
     if scaler_x is not None:
         save_scaler_joblib(scaler_x, params.model_path, suffix='x')
         save_scaler_numpy(scaler_x, params.model_path, suffix='x')
-    if scaler_y is not None:
-        save_scaler_joblib(scaler_y, params.model_path, suffix='y')
-        save_scaler_numpy(scaler_y, params.model_path, suffix='y')
 if '__main__' == __name__:

{bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/train_ct.py RENAMED Viewed

@@ -86,12 +86,12 @@ def main():
         np.random.shuffle(params.val_data)
-    x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data,
+    x_indices, y_col2index = get_data_characteristics(paths=params.train_data,
                                                                  feature_columns=feature_columns,
                                                                  target_columns=target_columns)
-    scaler_x, scaler_y = StandardScaler(), None
-    fit_scalers(paths=params.train_data, x_indices=x_indices, y_indices=y_indices, scaler_x=scaler_x, scaler_y=scaler_y)
+    scaler_x = StandardScaler()
+    fit_scalers(paths=params.train_data, x_indices=x_indices, scaler_x=scaler_x)
     if scaler_x is not None:
         save_scaler_joblib(scaler_x, params.model_path, suffix='ct.x')

{bdext-0.1.61 → bdext-0.1.62}/bdeissct_dl/training.py RENAMED Viewed

@@ -54,20 +54,20 @@ def get_test_data(dfs=None, paths=None, scaler_x=None):
 def get_data_characteristics(paths, target_columns=TARGET_COLUMNS_BDCT, feature_columns=None):
-    x_indices = []
-    y_indices = []
-    col2index = {}
+    col2index_y = {}
+    col2index_x = {}
     df = pd.read_csv(paths[0])
-    feature_columns = set(get_X_columns(df.columns)) if feature_columns is None else set(feature_columns)
-    target_columns = set(target_columns) if target_columns is not None else set()
+    feature_columns = get_X_columns(df.columns) if feature_columns is None else feature_columns
+    feature_column_set = set(feature_columns)
+    target_columns = target_columns if target_columns is not None else []
+    target_column_set = set(target_columns)
     for i, col in enumerate(df.columns):
-        if col in feature_columns:
-            x_indices.append(i)
-        if col in target_columns:
-            y_indices.append(i)
-            col2index[col] = i
-    return x_indices, y_indices, col2index
+        if col in feature_column_set:
+            col2index_x[col] = i
+        if col in target_column_set:
+            col2index_y[col] = i
+    return [col2index_x[_] for _ in feature_columns], col2index_y
 def get_train_data(target_columns, columns_x, columns_y, file_pattern=None, filenames=None, scaler_x=None, \
@@ -192,7 +192,7 @@ def main():
         np.random.shuffle(params.val_data)
-    x_indices, y_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
+    x_indices, y_col2index = get_data_characteristics(paths=params.train_data, target_columns=target_columns)
     scaler_x = load_scaler_numpy(params.model_path, suffix='x')

{bdext-0.1.61 → bdext-0.1.62/bdext.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bdext
-Version: 0.1.61
+Version: 0.1.62
 Summary: Estimation of BDEISS-CT parameters from phylogenetic trees.
 Home-page: https://github.com/modpath/bdeissct
 Author: Anna Zhukova

{bdext-0.1.61 → bdext-0.1.62}/bdext.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,7 +2,6 @@ LICENSE
 README.md
 setup.py
 bdeissct_dl/__init__.py
-bdeissct_dl/assess_R_CT.py
 bdeissct_dl/bdeissct_model.py
 bdeissct_dl/dl_model.py
 bdeissct_dl/estimator.py

{bdext-0.1.61 → bdext-0.1.62}/setup.py RENAMED Viewed

@@ -9,12 +9,12 @@ setup(
     long_description_content_type='text/markdown',
     include_package_data=True,
     package_data={'bdeissct_dl': [os.path.join('..', 'README.md'),
-                            # os.path.join('models', '*', '*.keras'),
-                            # os.path.join('models', '*', '*.h5'),
-                            # os.path.join('models', '*', '*.json'),
-                            # os.path.join('models', '*', '*.txt'),
-                            # os.path.join('models', '*', '*.npy'),
-                            # os.path.join('models', '*', '*.gz'),
+                            os.path.join('models', '*', '*.keras'),
+                            os.path.join('models', '*', '*.h5'),
+                            os.path.join('models', '*', '*.json'),
+                            os.path.join('models', '*', '*.txt'),
+                            os.path.join('models', '*', '*.npy'),
+                            os.path.join('models', '*', '*.gz'),
                             os.path.join('..', 'LICENCE')]},
     classifiers=[
         'Development Status :: 4 - Beta',
@@ -24,7 +24,7 @@ setup(
         'Topic :: Software Development',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
-    version='0.1.61',
+    version='0.1.62',
     description='Estimation of BDEISS-CT parameters from phylogenetic trees.',
     author='Anna Zhukova',
     author_email='anna.zhukova@pasteur.fr',

bdext-0.1.61/bdeissct_dl/assess_R_CT.py DELETED Viewed

@@ -1,21 +0,0 @@
-import numpy as np
-import pandas as pd
-from bdeissct_dl.bdeissct_model import REPRODUCTIVE_NUMBER, INFECTION_DURATION, RHO, F_E, F_S, X_S, UPSILON, X_C
-from bdeissct_dl.tree_encoder import SCALING_FACTOR
-for model in ('BD', 'BDCT', 'BDEI', 'BDEICT', 'BDSS', 'BDSSCT', 'BDEISS', 'BDEISSCT'):
-    df = pd.read_csv(f'/home/azhukova/projects/bdeissct_dl/simulations_bdeissct/test/200_500/{model}/trees.csv.xz')
-    print(model, REPRODUCTIVE_NUMBER, np.quantile(df[REPRODUCTIVE_NUMBER], [0, 0.5, 1]))
-    print(model, INFECTION_DURATION, np.quantile(df[INFECTION_DURATION] * df[SCALING_FACTOR], [0, 0.5, 1]))
-    print(model, RHO, np.quantile(df[RHO], [0, 0.5, 1]))
-    if 'EI' in model:
-        print(model, F_E, np.quantile(df[F_E], [0, 0.5, 1]))
-        print(df[df[F_E] > 1].index)
-    if 'SS' in model:
-        print(model, F_S, np.quantile(df[F_S], [0, 0.5, 1]))
-        print(model, X_S, np.quantile(df[X_S], [0, 0.5, 1]))
-    if 'CT' in model:
-        print(model, UPSILON, np.quantile(df[UPSILON], [0, 0.5, 1]))
-        print(model, X_C, np.quantile(df[X_C], [0, 0.5, 1]))
-    print('---')