sciml 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciml/__init__.py +2 -2
- sciml/ccc.py +36 -0
- sciml/metrics.py +123 -0
- sciml/models.py +275 -276
- sciml/pipelines.py +226 -435
- sciml/regress2.py +217 -0
- {sciml-0.0.9.dist-info → sciml-0.0.10.dist-info}/LICENSE +21 -21
- {sciml-0.0.9.dist-info → sciml-0.0.10.dist-info}/METADATA +13 -13
- sciml-0.0.10.dist-info/RECORD +11 -0
- {sciml-0.0.9.dist-info → sciml-0.0.10.dist-info}/WHEEL +1 -1
- sciml/utils.py +0 -46
- sciml-0.0.9.dist-info/RECORD +0 -9
- {sciml-0.0.9.dist-info → sciml-0.0.10.dist-info}/top_level.txt +0 -0
sciml/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
__all__ = ["
|
1
|
+
# coding: utf-8
|
2
|
+
__all__ = ["pipelines", "models", "metrics", "regress2", "ccc"]
|
sciml/ccc.py
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# https://rowannicholls.github.io/python/statistics/agreement/correlation_coefficients.html#lins-concordance-correlation-coefficient-ccc
|
2
|
+
# Lin LIK (1989). “A concordance correlation coefficient to evaluate reproducibility”. Biometrics. 45 (1):255-268.
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
def concordance_correlation_coefficient(y_true, y_pred):
|
7
|
+
"""Concordance correlation coefficient."""
|
8
|
+
# Remove NaNs
|
9
|
+
df = pd.DataFrame({
|
10
|
+
'y_true': y_true,
|
11
|
+
'y_pred': y_pred
|
12
|
+
})
|
13
|
+
df = df.dropna()
|
14
|
+
y_true = df['y_true']
|
15
|
+
y_pred = df['y_pred']
|
16
|
+
# Pearson product-moment correlation coefficients
|
17
|
+
cor = np.corrcoef(y_true, y_pred)[0][1]
|
18
|
+
# Mean
|
19
|
+
mean_true = np.mean(y_true)
|
20
|
+
mean_pred = np.mean(y_pred)
|
21
|
+
# Variance
|
22
|
+
var_true = np.var(y_true)
|
23
|
+
var_pred = np.var(y_pred)
|
24
|
+
# Standard deviation
|
25
|
+
sd_true = np.std(y_true)
|
26
|
+
sd_pred = np.std(y_pred)
|
27
|
+
# Calculate CCC
|
28
|
+
numerator = 2 * cor * sd_true * sd_pred
|
29
|
+
denominator = var_true + var_pred + (mean_true - mean_pred)**2
|
30
|
+
return numerator / denominator
|
31
|
+
|
32
|
+
|
33
|
+
# y_true = [3, -0.5, 2, 7, np.NaN]
|
34
|
+
# y_pred = [2.5, 0.0, 2, 8, 3]
|
35
|
+
# ccc = concordance_correlation_coefficient(y_true, y_pred)
|
36
|
+
# print(ccc)
|
sciml/metrics.py
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from scipy import stats
|
4
|
+
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance
|
5
|
+
|
6
|
+
def stats_summary(df):
|
7
|
+
min_ = df.min().to_frame().T
|
8
|
+
Q1 = df.quantile(0.25).to_frame().T
|
9
|
+
median_ = df.quantile(0.5).to_frame().T
|
10
|
+
mean_ = df.mean().to_frame().T
|
11
|
+
Q3 = df.quantile(0.75).to_frame().T
|
12
|
+
max_ = df.max().to_frame().T
|
13
|
+
df_stats = pd.concat([min_, Q1, median_, mean_, Q3, max_])
|
14
|
+
df_stats.index = ["Min", "Q1", "Median", "Mean", "Q3", "Max"]
|
15
|
+
return df_stats
|
16
|
+
|
17
|
+
def stats_measures(x, y, return_dict = False):
|
18
|
+
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
|
19
|
+
mse = mean_squared_error(x, y)
|
20
|
+
r2 = rvalue ** 2
|
21
|
+
rmse = np.sqrt(mse)
|
22
|
+
mbe = (y - x).mean()
|
23
|
+
if return_dict:
|
24
|
+
return {
|
25
|
+
"R2": r2,
|
26
|
+
"SLOPE": slope,
|
27
|
+
"RMSE": rmse,
|
28
|
+
"MBE": mbe
|
29
|
+
}
|
30
|
+
else:
|
31
|
+
return [r2, slope, rmse, mbe]
|
32
|
+
|
33
|
+
def stats_measures_full(x, y):
|
34
|
+
# from sklearn.metrics import mean_absolute_percentage_error
|
35
|
+
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
|
36
|
+
mse = mean_squared_error(x, y)
|
37
|
+
r2 = rvalue ** 2
|
38
|
+
rmse = np.sqrt(mse)
|
39
|
+
mbe = (y - x).mean()
|
40
|
+
# ----------------------------------------------------------------
|
41
|
+
pearsonr = stats.pearsonr(x, y)
|
42
|
+
evs = explained_variance_score(x, y)
|
43
|
+
me = max_error(x, y)
|
44
|
+
mae = mean_absolute_error(x, y)
|
45
|
+
msle = mean_squared_log_error(x, y)
|
46
|
+
meae = median_absolute_error(x, y)
|
47
|
+
r2_score = r2_score(x, y)
|
48
|
+
mpd = mean_poisson_deviance(x, y)
|
49
|
+
mgd = mean_gamma_deviance(x, y)
|
50
|
+
mtd = mean_tweedie_deviance(x, y)
|
51
|
+
return {
|
52
|
+
"R2": r2,
|
53
|
+
"SLOPE": slope,
|
54
|
+
"RMSE": rmse,
|
55
|
+
"MBE": mbe,
|
56
|
+
"INTERCEPT": intercept,
|
57
|
+
"PVALUE": pvalue,
|
58
|
+
"STDERR": stderr,
|
59
|
+
"PEARSON": pearsonr,
|
60
|
+
"EXPLAINED_VARIANCE": evs,
|
61
|
+
"MAXERR": me,
|
62
|
+
"MAE": mae,
|
63
|
+
"MSLE": msle,
|
64
|
+
"MEDIAN_AE": meae,
|
65
|
+
"R2_SCORE": r2_score,
|
66
|
+
"MPD": mpd,
|
67
|
+
"MGD": mgd,
|
68
|
+
"MTD": mtd
|
69
|
+
}
|
70
|
+
|
71
|
+
def stats_measures_df(df, name1, name2, return_dict = False):
|
72
|
+
slope, intercept, rvalue, pvalue, stderr = stats.linregress(df[name1], df[name2])
|
73
|
+
mse = mean_squared_error(df[name1], df[name2])
|
74
|
+
r2 = rvalue ** 2
|
75
|
+
rmse = np.sqrt(mse)
|
76
|
+
mbe = (df[name2] - df[name1]).mean()
|
77
|
+
if return_dict:
|
78
|
+
return {
|
79
|
+
"R2": r2,
|
80
|
+
"SLOPE": slope,
|
81
|
+
"RMSE": rmse,
|
82
|
+
"MBE": mbe
|
83
|
+
}
|
84
|
+
else:
|
85
|
+
return [r2, slope, rmse, mbe]
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
def get_r2(x, y):
|
90
|
+
try:
|
91
|
+
x_bar = x.mean()
|
92
|
+
except:
|
93
|
+
x_bar = np.mean(x)
|
94
|
+
|
95
|
+
r2 = 1 - np.sum((x - y)**2) / np.sum((x - x_bar)**2)
|
96
|
+
return r2
|
97
|
+
|
98
|
+
def get_rmse(observations, estimates):
|
99
|
+
return np.sqrt(((estimates - observations) ** 2).mean())
|
100
|
+
|
101
|
+
def calculate_R2(y_true, y_pred):
|
102
|
+
"""
|
103
|
+
Calculate the R^2 (coefficient of determination).
|
104
|
+
|
105
|
+
Args:
|
106
|
+
y_true (array-like): Actual values of the dependent variable.
|
107
|
+
y_pred (array-like): Predicted values of the dependent variable.
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
float: The R^2 value.
|
111
|
+
"""
|
112
|
+
y_true = np.array(y_true)
|
113
|
+
y_pred = np.array(y_pred)
|
114
|
+
|
115
|
+
# Residual sum of squares
|
116
|
+
ss_res = np.sum((y_true - y_pred) ** 2)
|
117
|
+
|
118
|
+
# Total sum of squares
|
119
|
+
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
|
120
|
+
|
121
|
+
# R^2 calculation
|
122
|
+
R2 = 1 - (ss_res / ss_tot)
|
123
|
+
return R2
|