PyPI - sciml - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

sciml 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

sciml/__init__.py +1 -1
sciml/ccc.py +36 -0
sciml/metrics.py +123 -0
sciml/models.py +14 -7
sciml/pipelines.py +47 -256
sciml/regress2.py +217 -0
{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/METADATA +1 -1
sciml-0.0.10.dist-info/RECORD +11 -0
sciml/utils.py +0 -46
sciml-0.0.8.dist-info/RECORD +0 -9
{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/LICENSE +0 -0
{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/WHEEL +0 -0
{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/top_level.txt +0 -0

sciml/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
 # coding: utf-8
-__all__ = ["utils", "pipelines", "models"]
+__all__ = ["pipelines", "models", "metrics", "regress2", "ccc"]

sciml/ccc.py ADDED Viewed

@@ -0,0 +1,36 @@
+# https://rowannicholls.github.io/python/statistics/agreement/correlation_coefficients.html#lins-concordance-correlation-coefficient-ccc
+# Lin LIK (1989). “A concordance correlation coefficient to evaluate reproducibility”. Biometrics. 45 (1):255-268.
+import numpy as np
+import pandas as pd
+def concordance_correlation_coefficient(y_true, y_pred):
+    """Concordance correlation coefficient."""
+    # Remove NaNs
+    df = pd.DataFrame({
+        'y_true': y_true,
+        'y_pred': y_pred
+    })
+    df = df.dropna()
+    y_true = df['y_true']
+    y_pred = df['y_pred']
+    # Pearson product-moment correlation coefficients
+    cor = np.corrcoef(y_true, y_pred)[0][1]
+    # Mean
+    mean_true = np.mean(y_true)
+    mean_pred = np.mean(y_pred)
+    # Variance
+    var_true = np.var(y_true)
+    var_pred = np.var(y_pred)
+    # Standard deviation
+    sd_true = np.std(y_true)
+    sd_pred = np.std(y_pred)
+    # Calculate CCC
+    numerator = 2 * cor * sd_true * sd_pred
+    denominator = var_true + var_pred + (mean_true - mean_pred)**2
+    return numerator / denominator
+# y_true = [3, -0.5, 2, 7, np.NaN]
+# y_pred = [2.5, 0.0, 2, 8, 3]
+# ccc = concordance_correlation_coefficient(y_true, y_pred)
+# print(ccc)

sciml/metrics.py ADDED Viewed

@@ -0,0 +1,123 @@
+import numpy as np
+import pandas as pd
+from scipy import stats
+from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance
+def stats_summary(df):
+    min_ = df.min().to_frame().T
+    Q1 = df.quantile(0.25).to_frame().T
+    median_ = df.quantile(0.5).to_frame().T
+    mean_ = df.mean().to_frame().T
+    Q3 = df.quantile(0.75).to_frame().T
+    max_ = df.max().to_frame().T
+    df_stats = pd.concat([min_, Q1, median_, mean_, Q3, max_])
+    df_stats.index = ["Min", "Q1", "Median", "Mean", "Q3", "Max"]
+    return df_stats
+def stats_measures(x, y, return_dict = False):
+    slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
+    mse = mean_squared_error(x, y)
+    r2 = rvalue ** 2
+    rmse = np.sqrt(mse)
+    mbe = (y - x).mean()
+    if return_dict:
+        return {
+            "R2": r2,
+            "SLOPE": slope,
+            "RMSE": rmse,
+            "MBE": mbe
+        }
+    else:
+        return [r2, slope, rmse, mbe]
+def stats_measures_full(x, y):
+    # from sklearn.metrics import mean_absolute_percentage_error
+    slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
+    mse = mean_squared_error(x, y)
+    r2 = rvalue ** 2
+    rmse = np.sqrt(mse)
+    mbe = (y - x).mean()
+    # ----------------------------------------------------------------
+    pearsonr = stats.pearsonr(x, y)
+    evs = explained_variance_score(x, y)
+    me = max_error(x, y)
+    mae = mean_absolute_error(x, y)
+    msle = mean_squared_log_error(x, y)
+    meae = median_absolute_error(x, y)
+    r2_score = r2_score(x, y)
+    mpd = mean_poisson_deviance(x, y)
+    mgd = mean_gamma_deviance(x, y)
+    mtd = mean_tweedie_deviance(x, y)
+    return {
+        "R2": r2,
+        "SLOPE": slope,
+        "RMSE": rmse,
+        "MBE": mbe,
+        "INTERCEPT": intercept,
+        "PVALUE": pvalue,
+        "STDERR": stderr,
+        "PEARSON": pearsonr,
+        "EXPLAINED_VARIANCE": evs,
+        "MAXERR": me,
+        "MAE": mae,
+        "MSLE": msle,
+        "MEDIAN_AE": meae,
+        "R2_SCORE": r2_score,
+        "MPD": mpd,
+        "MGD": mgd,
+        "MTD": mtd
+    }
+def stats_measures_df(df, name1, name2, return_dict = False):
+    slope, intercept, rvalue, pvalue, stderr = stats.linregress(df[name1], df[name2])
+    mse = mean_squared_error(df[name1], df[name2])
+    r2 = rvalue ** 2
+    rmse = np.sqrt(mse)
+    mbe = (df[name2] - df[name1]).mean()
+    if return_dict:
+        return {
+            "R2": r2,
+            "SLOPE": slope,
+            "RMSE": rmse,
+            "MBE": mbe
+        }
+    else:
+        return [r2, slope, rmse, mbe]
+def get_r2(x, y):
+    try:
+        x_bar = x.mean()
+    except:
+        x_bar = np.mean(x)
+    r2 = 1 - np.sum((x - y)**2) / np.sum((x - x_bar)**2)
+    return r2
+def get_rmse(observations, estimates):
+    return np.sqrt(((estimates - observations) ** 2).mean())
+def calculate_R2(y_true, y_pred):
+    """
+    Calculate the R^2 (coefficient of determination).
+    Args:
+        y_true (array-like): Actual values of the dependent variable.
+        y_pred (array-like): Predicted values of the dependent variable.
+    Returns:
+        float: The R^2 value.
+    """
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    # Residual sum of squares
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    # Total sum of squares
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    # R^2 calculation
+    R2 = 1 - (ss_res / ss_tot)
+    return R2

sciml/models.py CHANGED Viewed

@@ -142,6 +142,9 @@ class SmartForest:
                     params['tree_method'] = 'hist'
                     params['device'] = 'cuda'
+                params = params.copy()  # Prevent modification from affecting the next loop iteration
+                params['random_state'] = i  # Use a different random seed for each model to enhance diversity
                 model = XGBRegressor(**params)
                 model.fit(X, y)
@@ -220,11 +223,15 @@ class SmartForest:
 """
 # ============================== Test Example ==============================
+import warnings
+import numpy as np
 from sklearn.datasets import load_diabetes
+from sklearn.datasets import fetch_california_housing
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
-warnings.simplefilter('ignore')
-X, y = load_diabetes(return_X_y=True)
+# X, y = load_diabetes(return_X_y=True) # Using diabetes dataset
+X, y = fetch_california_housing(return_X_y=True) # Using house price dataset
 X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
 # Hyperparameter grid
@@ -244,7 +251,7 @@ param_grid = {
 }
 # Create the model with Multi-Grained Scanning enabled (with window sizes 2 and 3)
-df_reg = SmartForest(
+regr = SmartForest(
     n_estimators_per_layer = 5,
     max_layers = 10,
     early_stopping_rounds = 5,
@@ -256,14 +263,14 @@ df_reg = SmartForest(
     verbose = 1
 )
-df_reg.fit(X_train, y_train, X_val, y_val)
+regr.fit(X_train, y_train, X_val, y_val)
 # Predict on validation set and evaluate
-y_pred = df_reg.predict(X_val)
+y_pred = regr.predict(X_val)
 rmse = np.sqrt(mean_squared_error(y_val, y_pred))
 print("\nFinal RMSE:", rmse)
 # Output best model and RMSE
-best_model, best_rmse = df_reg.get_best_model()
+best_model, best_rmse = regr.get_best_model()
 print("\nBest validation RMSE:", best_rmse)
 """

sciml/pipelines.py CHANGED Viewed

@@ -155,7 +155,7 @@ try:
     from tensorflow.keras import models
     # from keras.layers import Dropout
     from keras.callbacks import EarlyStopping
-    from scitbx.stutils import *
+    from scitbx.utils import *
 except Exception as e:
     print(e)
@@ -173,263 +173,54 @@ def train_lstm(X_train, y_train, nfeature, ntime, verbose = 2, epochs = 200, bat
     model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size, verbose=verbose)
     return model
-'''
-# ========================================================================================================
-import numpy as np
-from xgboost import XGBRegressor
-from sklearn.metrics import mean_squared_error
-class XGBoostDeepForestRegressor:
-    def __init__(self, n_estimators_per_layer=2, max_layers=20, early_stopping_rounds=2):
-        self.n_estimators_per_layer = n_estimators_per_layer
-        self.max_layers = max_layers
-        self.early_stopping_rounds = early_stopping_rounds
-        self.layers = []
-    def _fit_layer(self, X, y):
-        layer = []
-        layer_outputs = []
-        for _ in range(self.n_estimators_per_layer):
-            reg = XGBRegressor()
-            reg.fit(X, y)
-            preds = reg.predict(X).reshape(-1, 1)
-            layer.append(reg)
-            layer_outputs.append(preds)
-        output = np.hstack(layer_outputs)
-        return layer, output
-    def fit(self, X, y, X_val=None, y_val=None):
-        X_current = X.copy()
-        best_rmse = float("inf")
-        no_improve_rounds = 0
-        for layer_index in range(self.max_layers):
-            print(f"Training Layer {layer_index + 1}")
-            layer, output = self._fit_layer(X_current, y)
-            self.layers.append(layer)
-            X_current = np.hstack([X_current, output])
-            if X_val is not None:
-                y_pred = self.predict(X_val)
-                # rmse = mean_squared_error(y_val, y_pred, squared=False)
-                rmse = np.sqrt(mean_squared_error(y_val, y_pred))
-                print(f"Validation RMSE: {rmse:.4f}")
-                if rmse < best_rmse:
-                    best_rmse = rmse
-                    no_improve_rounds = 0
-                else:
-                    no_improve_rounds += 1
-                    if no_improve_rounds >= self.early_stopping_rounds:
-                        print("Early stopping triggered.")
-                        break
-    def predict(self, X):
-        X_current = X.copy()
-        for layer in self.layers:
-            layer_outputs = []
-            for reg in layer:
-                n_features = reg.n_features_in_
-                preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
-                layer_outputs.append(preds)
-            output = np.hstack(layer_outputs)
-            X_current = np.hstack([X_current, output])
-        # Final prediction = average of last layer regressors
-        final_outputs = []
-        for reg in self.layers[-1]:
-            n_features = reg.n_features_in_
-            final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
-        return np.mean(np.hstack(final_outputs), axis=1)
-from sklearn.datasets import load_diabetes
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error
-X, y = load_diabetes(return_X_y=True)
-X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
-df_reg = XGBoostDeepForestRegressor(n_estimators_per_layer=2, max_layers=5)
-df_reg.fit(X_train, y_train, X_val, y_val)
-y_pred = df_reg.predict(X_val)
-# rmse = mean_squared_error(y_val, y_pred, squared=False)
-rmse = np.sqrt(mean_squared_error(y_val, y_pred))
-print("Final RMSE:", rmse)
-# ----------------------------------------------------------------------------------------------------
+# ===============================================================================================================================
+# Training utils
 import numpy as np
-from xgboost import XGBRegressor
-from sklearn.metrics import mean_squared_error
-import itertools
-class XGBoostDeepForestRegressor:
-    def __init__(self, n_estimators_per_layer=2, max_layers=20, early_stopping_rounds=2, param_grid=None, use_gpu=True, gpu_id=0):
-        self.n_estimators_per_layer = n_estimators_per_layer
-        self.max_layers = max_layers
-        self.early_stopping_rounds = early_stopping_rounds
-        self.param_grid = param_grid or {
-            'max_depth': [3],
-            'learning_rate': [0.1],
-            'n_estimators': [100]
-        }
-        self.use_gpu = use_gpu
-        self.gpu_id = gpu_id
-        self.layers = []
-    def _get_param_combinations(self):
-        keys, values = zip(*self.param_grid.items())
-        return [dict(zip(keys, v)) for v in itertools.product(*values)]
-    def _fit_layer(self, X, y, X_val=None, y_val=None):
-        layer = []
-        layer_outputs = []
-        param_combos = self._get_param_combinations()
-        for i in range(self.n_estimators_per_layer):
-            best_rmse = float('inf')
-            best_model = None
-            for params in param_combos:
-                # Set GPU support parameters in XGBRegressor
-                if self.use_gpu:
-                    params['tree_method'] = 'hist'  # Use hist method
-                    params['device'] = 'cuda'  # Enable CUDA for GPU
-                model = XGBRegressor(**params)
-                model.fit(X, y)
-                if X_val is not None:
-                    preds_val = model.predict(X_val)
-                    rmse = np.sqrt(mean_squared_error(y_val, preds_val))
-                    if rmse < best_rmse:
-                        best_rmse = rmse
-                        best_model = model
-                else:
-                    best_model = model
-            final_model = best_model
-            preds = final_model.predict(X).reshape(-1, 1)
-            layer.append(final_model)
-            layer_outputs.append(preds)
-        output = np.hstack(layer_outputs)
-        return layer, output
-    def fit(self, X, y, X_val=None, y_val=None):
-        X_current = X.copy()
-        X_val_current = X_val.copy() if X_val is not None else None
-        best_rmse = float("inf")
-        no_improve_rounds = 0
-        for layer_index in range(self.max_layers):
-            print(f"Training Layer {layer_index + 1}")
-            layer, output = self._fit_layer(X_current, y, X_val_current, y_val)
-            self.layers.append(layer)
-            X_current = np.hstack([X_current, output])
-            if X_val is not None:
-                val_outputs = []
-                for reg in layer:
-                    n_features = reg.n_features_in_
-                    preds = reg.predict(X_val_current[:, :n_features]).reshape(-1, 1)
-                    val_outputs.append(preds)
-                val_output = np.hstack(val_outputs)
-                X_val_current = np.hstack([X_val_current, val_output])
-                y_pred = self.predict(X_val)
-                rmse = np.sqrt(mean_squared_error(y_val, y_pred))
-                print(f"Validation RMSE: {rmse:.4f}")
-                if rmse < best_rmse:
-                    best_rmse = rmse
-                    no_improve_rounds = 0
-                else:
-                    no_improve_rounds += 1
-                    if no_improve_rounds >= self.early_stopping_rounds:
-                        print("Early stopping triggered.")
-                        break
-    def predict(self, X):
-        X_current = X.copy()
-        for layer in self.layers:
-            layer_outputs = []
-            for reg in layer:
-                n_features = reg.n_features_in_
-                preds = reg.predict(X_current[:, :n_features]).reshape(-1, 1)
-                layer_outputs.append(preds)
-            output = np.hstack(layer_outputs)
-            X_current = np.hstack([X_current, output])
-        final_outputs = []
-        for reg in self.layers[-1]:
-            n_features = reg.n_features_in_
-            final_outputs.append(reg.predict(X_current[:, :n_features]).reshape(-1, 1))
-        return np.mean(np.hstack(final_outputs), axis=1)
-from sklearn.datasets import load_diabetes
+import pandas as pd
+from sklearn.model_selection import ShuffleSplit
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error
-# Load dataset
-X, y = load_diabetes(return_X_y=True)
-X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
-# Hyperparameter grid
-param_grid = {
-    'max_depth': [3, 4],
-    'learning_rate': [0.1, 0.05],
-    'n_estimators': [50, 100]
-}
-# Create and fit the model with GPU enabled
-df_reg = XGBoostDeepForestRegressor(
-    n_estimators_per_layer=2,
-    max_layers=5,
-    early_stopping_rounds=2,
-    param_grid=param_grid,
-    use_gpu=True,  # Enable GPU acceleration
-    gpu_id=0  # Default to the first GPU
-)
-df_reg.fit(X_train, y_train, X_val, y_val)
-# Final evaluation
-y_pred = df_reg.predict(X_val)
-rmse = np.sqrt(mean_squared_error(y_val, y_pred))
-print("Final RMSE:", rmse)
-# ----------------------------------------------------------------------------------------------------
-xgb_params = {
-    "objective": "reg:squarederror",
-    "random_state": 0,
-    'seed': 0,
-    'n_estimators': 100,
-    'max_depth': 6,
-    'min_child_weight': 4,
-    'subsample': 0.8,
-    'colsample_bytree': 0.8,
-    'gamma': 0,
-    'reg_alpha': 0,
-    'reg_lambda': 1,
-    'learning_rate': 0.05,
-}
-from xgboost import XGBRegressor
-regr = XGBRegressor(**xgb_params)
-regr.fit(X_train, y_train)
-y_pred = regr.predict(X_val)
-from scipy import stats
+# randomly select sites
+def random_select(ds, count, num, random_state = 0):
+    np.random.seed(random_state)
+    idxs = np.random.choice(np.delete(np.arange(len(ds)), count), num, replace = False)
+    return np.sort(idxs)
+def split(Xs, ys, return_index = False, test_size = 0.33, random_state = 42):
+    if return_index:
+        sss = ShuffleSplit(n_splits=1, test_size = test_size, random_state = random_state)
+        sss.get_n_splits(Xs, ys)
+        train_index, test_index = next(sss.split(Xs, ys))
+        return (train_index, test_index)
+    else:
+        X_train, X_test, y_train, y_test = train_test_split(
+            Xs, ys,
+            test_size = test_size,
+            random_state = random_state
+        )
+        return (X_train, X_test, y_train, y_test)
+def split_cut(Xs, ys, test_ratio = 0.33):
+    """
+    Split the timeseries into before and after halves
+    """
+    assert ys.ndim == 2, 'ys must be 2D!'
+    assert len(Xs) == len(ys), 'Xs and ys should be equally long!'
+    assert type(Xs) == type(ys), 'Xs and ys should be the same data type!'
+    if not type(Xs) in [pd.core.frame.DataFrame, np.ndarray]: raise Exception('Only accept numpy ndarray or pandas dataframe')
+    anchor = int(np.floor(len(ys) * (1 - test_ratio)))
+    if type(Xs) == pd.core.frame.DataFrame:
+        X_train = Xs.iloc[0: anchor, :]
+        X_test = Xs.iloc[anchor::, :]
+        y_train = ys.iloc[0: anchor, :]
+        y_test = ys.iloc[anchor::, :]
+    else:
+        X_train = Xs[0: anchor, :]
+        X_test = Xs[anchor::, :]
+        y_train = ys[0: anchor, :]
+        y_test = ys[anchor::, :]
-stats.linregress(y_val, y_pred)
+    assert len(X_train) + len(X_test) == len(Xs), 'The sum of train and test lengths must equal to Xs/ys!'
-'''
+    return (X_train, X_test, y_train, y_test)

sciml/regress2.py ADDED Viewed

@@ -0,0 +1,217 @@
+# Model type I and II regression, including RMA (reduced major axis regression)
+"""
+Credit: UMaine MISC Lab; emmanuel.boss@maine.edu
+http://misclab.umeoce.maine.edu/
+https://github.com/OceanOptics
+------------------------------------------------------------------------------
+MIT License
+Copyright (c) [year] [fullname]
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import statsmodels.api as sm
+import numpy as np
+def regress2(_x, _y, _method_type_1 = "ordinary least square",
+             _method_type_2 = "reduced major axis",
+             _weight_x = [], _weight_y = [], _need_intercept = True):
+    # Regression Type II based on statsmodels
+    # Type II regressions are recommended if there is variability on both x and y
+    # It's computing the linear regression type I for (x,y) and (y,x)
+    # and then average relationship with one of the type II methods
+    #
+    # INPUT:
+    #   _x <np.array>
+    #   _y <np.array>
+    #   _method_type_1 <str> method to use for regression type I:
+    #     ordinary least square or OLS <default>
+    #     weighted least square or WLS
+    #     robust linear model or RLM
+    #   _method_type_2 <str> method to use for regression type II:
+    #     major axis
+    #     reduced major axis <default> (also known as geometric mean)
+    #     arithmetic mean
+    #   _need_intercept <bool>
+    #     True <default> add a constant to relation (y = a x + b)
+    #     False force relation by 0 (y = a x)
+    #   _weight_x <np.array> containing the weigth of x
+    #   _weigth_y <np.array> containing the weigth of y
+    #
+    # OUTPUT:
+    #   slope
+    #   intercept
+    #   r
+    #   std_slope
+    #   std_intercept
+    #   predict
+    #
+    # REQUIRE:
+    #   numpy
+    #   statsmodels
+    #
+    # The code is based on the matlab function of MBARI.
+    # AUTHOR: Nils Haentjens
+    # REFERENCE: https://www.mbari.org/products/research-software/matlab-scripts-linear-regressions/
+    # Check input
+    if _method_type_2 != "reduced major axis" and _method_type_1 != "ordinary least square":
+        raise ValueError("'" + _method_type_2 + "' only supports '" + _method_type_1 + "' method as type 1.")
+    # Set x, y depending on intercept requirement
+    if _need_intercept:
+        x_intercept = sm.add_constant(_x)
+        y_intercept = sm.add_constant(_y)
+    # Compute Regression Type I (if type II requires it)
+    if (_method_type_2 == "reduced major axis" or
+        _method_type_2 == "geometric mean"):
+        if _method_type_1 == "OLS" or _method_type_1 == "ordinary least square":
+            if _need_intercept:
+                [intercept_a, slope_a] = sm.OLS(_y, x_intercept).fit().params
+                [intercept_b, slope_b] = sm.OLS(_x, y_intercept).fit().params
+            else:
+                slope_a = sm.OLS(_y, _x).fit().params
+                slope_b = sm.OLS(_x, _y).fit().params
+        elif _method_type_1 == "WLS" or _method_type_1 == "weighted least square":
+            if _need_intercept:
+                [intercept_a, slope_a] = sm.WLS(
+                    _y, x_intercept, weights=1. / _weight_y).fit().params
+                [intercept_b, slope_b] = sm.WLS(
+                    _x, y_intercept, weights=1. / _weight_x).fit().params
+            else:
+                slope_a = sm.WLS(_y, _x, weights=1. / _weight_y).fit().params
+                slope_b = sm.WLS(_x, _y, weights=1. / _weight_x).fit().params
+        elif _method_type_1 == "RLM" or _method_type_1 == "robust linear model":
+            if _need_intercept:
+                [intercept_a, slope_a] = sm.RLM(_y, x_intercept).fit().params
+                [intercept_b, slope_b] = sm.RLM(_x, y_intercept).fit().params
+            else:
+                slope_a = sm.RLM(_y, _x).fit().params
+                slope_b = sm.RLM(_x, _y).fit().params
+        else:
+            raise ValueError("Invalid literal for _method_type_1: " + _method_type_1)
+    # Compute Regression Type II
+    if (_method_type_2 == "reduced major axis" or
+        _method_type_2 == "geometric mean"):
+        # Transpose coefficients
+        if _need_intercept:
+            intercept_b = -intercept_b / slope_b
+        slope_b = 1 / slope_b
+        # Check if correlated in same direction
+        if np.sign(slope_a) != np.sign(slope_b):
+            raise RuntimeError('Type I regressions of opposite sign.')
+        # Compute Reduced Major Axis Slope
+        slope = np.sign(slope_a) * np.sqrt(slope_a * slope_b)
+        if _need_intercept:
+            # Compute Intercept (use mean for least square)
+            if _method_type_1 == "OLS" or _method_type_1 == "ordinary least square":
+                intercept = np.mean(_y) - slope * np.mean(_x)
+            else:
+                intercept = np.median(_y) - slope * np.median(_x)
+        else:
+            intercept = 0
+        # Compute r
+        r = np.sign(slope_a) * np.sqrt(slope_a / slope_b)
+        # Compute predicted values
+        predict = slope * _x + intercept
+        # Compute standard deviation of the slope and the intercept
+        n = len(_x)
+        diff = _y - predict
+        Sx2 = np.sum(np.multiply(_x, _x))
+        den = n * Sx2 - np.sum(_x) ** 2
+        s2 = np.sum(np.multiply(diff, diff)) / (n - 2)
+        std_slope = np.sqrt(n * s2 / den)
+        if _need_intercept:
+            std_intercept = np.sqrt(Sx2 * s2 / den)
+        else:
+            std_intercept = 0
+    elif (_method_type_2 == "Pearson's major axis" or
+          _method_type_2 == "major axis"):
+        if not _need_intercept:
+            raise ValueError("Invalid value for _need_intercept: " + str(_need_intercept))
+        xm = np.mean(_x)
+        ym = np.mean(_y)
+        xp = _x - xm
+        yp = _y - ym
+        sumx2 = np.sum(np.multiply(xp, xp))
+        sumy2 = np.sum(np.multiply(yp, yp))
+        sumxy = np.sum(np.multiply(xp, yp))
+        slope = ((sumy2 - sumx2 + np.sqrt((sumy2 - sumx2)**2 + 4 * sumxy**2)) /
+                 (2 * sumxy))
+        intercept = ym - slope * xm
+        # Compute r
+        r = sumxy / np.sqrt(sumx2 * sumy2)
+        # Compute standard deviation of the slope and the intercept
+        n = len(_x)
+        std_slope = (slope / r) * np.sqrt((1 - r ** 2) / n)
+        sigx = np.sqrt(sumx2 / (n - 1))
+        sigy = np.sqrt(sumy2 / (n - 1))
+        std_i1 = (sigy - sigx * slope) ** 2
+        std_i2 = (2 * sigx * sigy) + ((xm ** 2 * slope * (1 + r)) / r ** 2)
+        std_intercept = np.sqrt((std_i1 + ((1 - r) * slope * std_i2)) / n)
+        # Compute predicted values
+        predict = slope * _x + intercept
+    elif _method_type_2 == "arithmetic mean":
+        if not _need_intercept:
+            raise ValueError("Invalid value for _need_intercept: " + str(_need_intercept))
+        n = len(_x)
+        sg = np.floor(n / 2)
+        # Sort x and y in order of x
+        sorted_index = sorted(range(len(_x)), key=lambda i: _x[i])
+        x_w = np.array([_x[i] for i in sorted_index])
+        y_w = np.array([_y[i] for i in sorted_index])
+        x1 = x_w[1:sg + 1]
+        x2 = x_w[sg:n]
+        y1 = y_w[1:sg + 1]
+        y2 = y_w[sg:n]
+        x1m = np.mean(x1)
+        x2m = np.mean(x2)
+        y1m = np.mean(y1)
+        y2m = np.mean(y2)
+        xm = (x1m + x2m) / 2
+        ym = (y1m + y2m) / 2
+        slope = (x2m - x1m) / (y2m - y1m)
+        intercept = ym - xm * slope
+        # r (to verify)
+        r = []
+        # Compute predicted values
+        predict = slope * _x + intercept
+        # Compute standard deviation of the slope and the intercept
+        std_slope = []
+        std_intercept = []
+    # Return all that
+    return {"slope": float(slope), "intercept": intercept, "r": r,
+            "std_slope": std_slope, "std_intercept": std_intercept,
+            "predict": predict}
+# if __name__ == '__main__':
+#     x = np.linspace(0, 10, 100)
+#     # Add random error on y
+#     e = np.random.normal(size=len(x))
+#     y = x + e
+#     results = regress2(x, y, _method_type_2="reduced major axis",
+#                        _need_intercept=False)
+#     # print(results)

{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sciml
-Version: 0.0.8
+Version: 0.0.10
 Summary: draw and basic calculations/conversions
 Home-page: https://github.com/soonyenju/sciml
 Author: Songyan Zhu

sciml-0.0.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+sciml/__init__.py,sha256=BqRVu5DbfbnxksBXhe4gH_uulPdqTjSaSO1LvGkc37Q,79
+sciml/ccc.py,sha256=AE1l46hvh18_Q9_BQufMjsGF9-JfsTw2hrT1CbgBHE8,1210
+sciml/metrics.py,sha256=ICEeH6jwmpdx9jxwYSzB_YTvbyBq9AEUYqkZiVS1ZGs,3577
+sciml/models.py,sha256=qc2LgdpSkq9kGMnLKZTnyuwzytCu6R8hyU5i6PaI7Qw,10345
+sciml/pipelines.py,sha256=NGBwl5vA0Uq5GO-VtIow_k42K7HoVwxPQrkW-jINflY,8381
+sciml/regress2.py,sha256=GSZ4IqmyF9u3PGOhHIKV0Rb_C2pI8eJ3jGJBa1IrEXM,8978
+sciml-0.0.10.dist-info/LICENSE,sha256=dX4jBmkgQPWc_TfYkXtKQzVIgZQWFuHZ8vQjV4sEeV4,1060
+sciml-0.0.10.dist-info/METADATA,sha256=iMcI6kpM6IX2oBhx9JwmI77JiX2bZPWI93dHta_jkCM,314
+sciml-0.0.10.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
+sciml-0.0.10.dist-info/top_level.txt,sha256=dS_7aBCZFKQE3myPy5sh4USjQZCZyGg382-YxUUYcdw,6
+sciml-0.0.10.dist-info/RECORD,,

sciml/utils.py DELETED Viewed

@@ -1,46 +0,0 @@
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import ShuffleSplit
-from sklearn.model_selection import train_test_split
-# randomly select sites
-def random_select(ds, count, num, random_state = 0):
-    np.random.seed(random_state)
-    idxs = np.random.choice(np.delete(np.arange(len(ds)), count), num, replace = False)
-    return np.sort(idxs)
-def split(Xs, ys, return_index = False, test_size = 0.33, random_state = 42):
-    if return_index:
-        sss = ShuffleSplit(n_splits=1, test_size = test_size, random_state = random_state)
-        sss.get_n_splits(Xs, ys)
-        train_index, test_index = next(sss.split(Xs, ys))
-        return (train_index, test_index)
-    else:
-        X_train, X_test, y_train, y_test = train_test_split(
-            Xs, ys,
-            test_size = test_size,
-            random_state = random_state
-        )
-        return (X_train, X_test, y_train, y_test)
-def split_cut(Xs, ys, test_ratio = 0.33):
-    assert ys.ndim == 2, 'ys must be 2D!'
-    assert len(Xs) == len(ys), 'Xs and ys should be equally long!'
-    assert type(Xs) == type(ys), 'Xs and ys should be the same data type!'
-    if not type(Xs) in [pd.core.frame.DataFrame, np.ndarray]: raise Exception('Only accept numpy ndarray or pandas dataframe')
-    anchor = int(np.floor(len(ys) * (1 - test_ratio)))
-    if type(Xs) == pd.core.frame.DataFrame:
-        X_train = Xs.iloc[0: anchor, :]
-        X_test = Xs.iloc[anchor::, :]
-        y_train = ys.iloc[0: anchor, :]
-        y_test = ys.iloc[anchor::, :]
-    else:
-        X_train = Xs[0: anchor, :]
-        X_test = Xs[anchor::, :]
-        y_train = ys[0: anchor, :]
-        y_test = ys[anchor::, :]
-    assert len(X_train) + len(X_test) == len(Xs), 'The sum of train and test lengths must equal to Xs/ys!'
-    return (X_train, X_test, y_train, y_test)

sciml-0.0.8.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-sciml/__init__.py,sha256=6iQAGgCEMuw4yoLBzZDax46a45LZgzEeNSHQMdmcBSQ,58
-sciml/models.py,sha256=p6cw3SxTQaOtFhJx8KdW0Z2QtxBlSBlVPHETTNCjJ2w,9880
-sciml/pipelines.py,sha256=CJolleJakoEQc-EV-v6NovP3bDb1hif7SvObXdaLXdY,15268
-sciml/utils.py,sha256=u5DzQJV4aCZ-p7sY56Fxzj8WDGYOgn1rOTeGzAw0vwY,1831
-sciml-0.0.8.dist-info/LICENSE,sha256=dX4jBmkgQPWc_TfYkXtKQzVIgZQWFuHZ8vQjV4sEeV4,1060
-sciml-0.0.8.dist-info/METADATA,sha256=uMCtigVwS2e0abqbvfbLZca6iZnkdDTBXtbjdg34yIA,313
-sciml-0.0.8.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-sciml-0.0.8.dist-info/top_level.txt,sha256=dS_7aBCZFKQE3myPy5sh4USjQZCZyGg382-YxUUYcdw,6
-sciml-0.0.8.dist-info/RECORD,,

{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/LICENSE RENAMED Viewed

File without changes

{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{sciml-0.0.8.dist-info → sciml-0.0.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

sciml 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

sciml 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl