sciml 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sciml/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- # coding: utf-8
2
- __all__ = ["utils", "pipelines", "models"]
1
+ # coding: utf-8
2
+ __all__ = ["pipelines", "models", "metrics", "regress2", "ccc"]
sciml/ccc.py ADDED
@@ -0,0 +1,36 @@
1
+ # https://rowannicholls.github.io/python/statistics/agreement/correlation_coefficients.html#lins-concordance-correlation-coefficient-ccc
2
+ # Lin LIK (1989). “A concordance correlation coefficient to evaluate reproducibility”. Biometrics. 45 (1):255-268.
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ def concordance_correlation_coefficient(y_true, y_pred):
7
+ """Concordance correlation coefficient."""
8
+ # Remove NaNs
9
+ df = pd.DataFrame({
10
+ 'y_true': y_true,
11
+ 'y_pred': y_pred
12
+ })
13
+ df = df.dropna()
14
+ y_true = df['y_true']
15
+ y_pred = df['y_pred']
16
+ # Pearson product-moment correlation coefficients
17
+ cor = np.corrcoef(y_true, y_pred)[0][1]
18
+ # Mean
19
+ mean_true = np.mean(y_true)
20
+ mean_pred = np.mean(y_pred)
21
+ # Variance
22
+ var_true = np.var(y_true)
23
+ var_pred = np.var(y_pred)
24
+ # Standard deviation
25
+ sd_true = np.std(y_true)
26
+ sd_pred = np.std(y_pred)
27
+ # Calculate CCC
28
+ numerator = 2 * cor * sd_true * sd_pred
29
+ denominator = var_true + var_pred + (mean_true - mean_pred)**2
30
+ return numerator / denominator
31
+
32
+
33
+ # y_true = [3, -0.5, 2, 7, np.NaN]
34
+ # y_pred = [2.5, 0.0, 2, 8, 3]
35
+ # ccc = concordance_correlation_coefficient(y_true, y_pred)
36
+ # print(ccc)
sciml/metrics.py ADDED
@@ -0,0 +1,123 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from scipy import stats
4
+ from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance
5
+
6
+ def stats_summary(df):
7
+ min_ = df.min().to_frame().T
8
+ Q1 = df.quantile(0.25).to_frame().T
9
+ median_ = df.quantile(0.5).to_frame().T
10
+ mean_ = df.mean().to_frame().T
11
+ Q3 = df.quantile(0.75).to_frame().T
12
+ max_ = df.max().to_frame().T
13
+ df_stats = pd.concat([min_, Q1, median_, mean_, Q3, max_])
14
+ df_stats.index = ["Min", "Q1", "Median", "Mean", "Q3", "Max"]
15
+ return df_stats
16
+
17
+ def stats_measures(x, y, return_dict = False):
18
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
19
+ mse = mean_squared_error(x, y)
20
+ r2 = rvalue ** 2
21
+ rmse = np.sqrt(mse)
22
+ mbe = (y - x).mean()
23
+ if return_dict:
24
+ return {
25
+ "R2": r2,
26
+ "SLOPE": slope,
27
+ "RMSE": rmse,
28
+ "MBE": mbe
29
+ }
30
+ else:
31
+ return [r2, slope, rmse, mbe]
32
+
33
+ def stats_measures_full(x, y):
34
+ # from sklearn.metrics import mean_absolute_percentage_error
35
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
36
+ mse = mean_squared_error(x, y)
37
+ r2 = rvalue ** 2
38
+ rmse = np.sqrt(mse)
39
+ mbe = (y - x).mean()
40
+ # ----------------------------------------------------------------
41
+ pearsonr = stats.pearsonr(x, y)
42
+ evs = explained_variance_score(x, y)
43
+ me = max_error(x, y)
44
+ mae = mean_absolute_error(x, y)
45
+ msle = mean_squared_log_error(x, y)
46
+ meae = median_absolute_error(x, y)
47
+ r2_score = r2_score(x, y)
48
+ mpd = mean_poisson_deviance(x, y)
49
+ mgd = mean_gamma_deviance(x, y)
50
+ mtd = mean_tweedie_deviance(x, y)
51
+ return {
52
+ "R2": r2,
53
+ "SLOPE": slope,
54
+ "RMSE": rmse,
55
+ "MBE": mbe,
56
+ "INTERCEPT": intercept,
57
+ "PVALUE": pvalue,
58
+ "STDERR": stderr,
59
+ "PEARSON": pearsonr,
60
+ "EXPLAINED_VARIANCE": evs,
61
+ "MAXERR": me,
62
+ "MAE": mae,
63
+ "MSLE": msle,
64
+ "MEDIAN_AE": meae,
65
+ "R2_SCORE": r2_score,
66
+ "MPD": mpd,
67
+ "MGD": mgd,
68
+ "MTD": mtd
69
+ }
70
+
71
+ def stats_measures_df(df, name1, name2, return_dict = False):
72
+ slope, intercept, rvalue, pvalue, stderr = stats.linregress(df[name1], df[name2])
73
+ mse = mean_squared_error(df[name1], df[name2])
74
+ r2 = rvalue ** 2
75
+ rmse = np.sqrt(mse)
76
+ mbe = (df[name2] - df[name1]).mean()
77
+ if return_dict:
78
+ return {
79
+ "R2": r2,
80
+ "SLOPE": slope,
81
+ "RMSE": rmse,
82
+ "MBE": mbe
83
+ }
84
+ else:
85
+ return [r2, slope, rmse, mbe]
86
+
87
+
88
+
89
+ def get_r2(x, y):
90
+ try:
91
+ x_bar = x.mean()
92
+ except:
93
+ x_bar = np.mean(x)
94
+
95
+ r2 = 1 - np.sum((x - y)**2) / np.sum((x - x_bar)**2)
96
+ return r2
97
+
98
+ def get_rmse(observations, estimates):
99
+ return np.sqrt(((estimates - observations) ** 2).mean())
100
+
101
+ def calculate_R2(y_true, y_pred):
102
+ """
103
+ Calculate the R^2 (coefficient of determination).
104
+
105
+ Args:
106
+ y_true (array-like): Actual values of the dependent variable.
107
+ y_pred (array-like): Predicted values of the dependent variable.
108
+
109
+ Returns:
110
+ float: The R^2 value.
111
+ """
112
+ y_true = np.array(y_true)
113
+ y_pred = np.array(y_pred)
114
+
115
+ # Residual sum of squares
116
+ ss_res = np.sum((y_true - y_pred) ** 2)
117
+
118
+ # Total sum of squares
119
+ ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
120
+
121
+ # R^2 calculation
122
+ R2 = 1 - (ss_res / ss_tot)
123
+ return R2