machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 ManGo Team
|
|
4
|
+
|
|
5
|
+
This work is licensed under the terms of the GNU General Public License version 3.0.
|
|
6
|
+
|
|
7
|
+
Author: Nirmal Parmar
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
'''
|
|
11
|
+
|
|
12
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
13
|
+
import logging
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
class EvaluationMetrics:
|
|
17
|
+
"""
|
|
18
|
+
Class to calculate evaluation metrics for robust regression models.
|
|
19
|
+
|
|
20
|
+
This class provides methods to calculate various (gnostic) evaluation metrics for robust regression models, including:
|
|
21
|
+
- RobR²: Weighted R-squared, a robust version of the coefficient of determination.
|
|
22
|
+
- GMMFE: Geometric Mean of Multiplicative Fitting Errors, a measure of the geometric mean of fitting errors.
|
|
23
|
+
- DivI: Divergence of Information, a metric to quantify the divergence between true and predicted values.
|
|
24
|
+
- EvalMet: An overall evaluation metric combining RobR², GMMFE, and DivI.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
y_true (np.ndarray): True target values.
|
|
28
|
+
y_pred (np.ndarray): Predicted target values.
|
|
29
|
+
weights (np.ndarray): Weights for each observation. Defaults to an array of ones if not provided.
|
|
30
|
+
N (int): Number of observations.
|
|
31
|
+
logger (logging.Logger): Logger instance for logging messages.
|
|
32
|
+
|
|
33
|
+
Methods:
|
|
34
|
+
calculate_rob_r2():
|
|
35
|
+
Calculate the Weighted R-squared (RobR²).
|
|
36
|
+
calculate_gmmfe():
|
|
37
|
+
Calculate the Geometric Mean of Multiplicative Fitting Errors (GMMFE).
|
|
38
|
+
calculate_divi():
|
|
39
|
+
Calculate the Divergence of Information (DivI).
|
|
40
|
+
calculate_evalmet():
|
|
41
|
+
Calculate the overall evaluation metric (EvalMet).
|
|
42
|
+
generate_report():
|
|
43
|
+
Generate a complete evaluation report containing all metrics.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
|
|
47
|
+
>>> from machinegnostics.metrics import EvaluationMetrics
|
|
48
|
+
>>> y_true = np.array([3, -0.5, 2, 7])
|
|
49
|
+
>>> y_pred = np.array([2.5, 0.0, 2, 8])
|
|
50
|
+
>>> evaluator = EvaluationMetrics(y_true, y_pred, verbose=True)
|
|
51
|
+
>>> report = evaluator.generate_report()
|
|
52
|
+
>>> print(report)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self,
|
|
56
|
+
y_true: np.ndarray,
|
|
57
|
+
y_pred: np.ndarray,
|
|
58
|
+
weights=None,
|
|
59
|
+
verbose: bool = False):
|
|
60
|
+
"""
|
|
61
|
+
Initialize the evaluation metrics calculator.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
y_true (np.ndarray): True target values.
|
|
65
|
+
y_pred (np.ndarray): Predicted target values.
|
|
66
|
+
weights (np.ndarray, optional): Weights for each observation. Defaults to None.
|
|
67
|
+
verbose (bool, optional): If True, enables detailed logging. Defaults to False.
|
|
68
|
+
"""
|
|
69
|
+
self.logger = get_logger('EvaluationMetrics', level=logging.WARNING if not verbose else logging.INFO)
|
|
70
|
+
self.logger.info("Initializing EvaluationMetrics...")
|
|
71
|
+
self.y_true = np.asarray(y_true).ravel()
|
|
72
|
+
self.y_pred = np.asarray(y_pred).ravel()
|
|
73
|
+
self.weights = np.ones_like(y_true) if weights is None else np.asarray(weights)
|
|
74
|
+
self.N = len(y_true)
|
|
75
|
+
|
|
76
|
+
def calculate_rob_r2(self):
|
|
77
|
+
"""Calculate the Weighted R-square (RobR²).
|
|
78
|
+
|
|
79
|
+
This metric measures the proportion of variance in the dependent variable that is predictable from the independent variables, using weighted observations.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
float: The RobR² value, where 1 indicates perfect prediction and values closer to 0 indicate poor prediction.
|
|
83
|
+
"""
|
|
84
|
+
self.logger.info("Calculating RobR²...")
|
|
85
|
+
errors = self.y_true - self.y_pred
|
|
86
|
+
weighted_errors_squared = np.sum(self.weights * (errors ** 2))
|
|
87
|
+
weighted_total_variance = np.sum(self.weights * (self.y_true - np.mean(self.y_true)) ** 2)
|
|
88
|
+
|
|
89
|
+
rob_r2 = 1 - (weighted_errors_squared / weighted_total_variance)
|
|
90
|
+
self.logger.info(f"RobR² calculated: {rob_r2}")
|
|
91
|
+
return rob_r2
|
|
92
|
+
|
|
93
|
+
def calculate_gmmfe(self):
|
|
94
|
+
"""Calculate the Geometric Mean of Multiplicative Fitting Errors (GMMFE).
|
|
95
|
+
|
|
96
|
+
This metric calculates the geometric mean of the multiplicative fitting errors between true and predicted values.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
float: The GMMFE value, where values closer to 1 indicate better fitting.
|
|
100
|
+
"""
|
|
101
|
+
self.logger.info("Calculating GMMFE...")
|
|
102
|
+
ratio = self.y_true / (self.y_pred + 1e-10)
|
|
103
|
+
# avoid invalid value for log
|
|
104
|
+
ratio = np.where(ratio <= 0, 1e-10, ratio)
|
|
105
|
+
log_sum = np.sum(np.abs(np.log(ratio))) / self.N
|
|
106
|
+
gmmfe = np.exp(log_sum)
|
|
107
|
+
self.logger.info(f"GMMFE calculated: {gmmfe}")
|
|
108
|
+
return gmmfe
|
|
109
|
+
|
|
110
|
+
def calculate_divi(self):
|
|
111
|
+
"""Calculate the Divergence of Information (DivI).
|
|
112
|
+
|
|
113
|
+
This metric quantifies the divergence between the information content of true and predicted values.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
float: The DivI value, where lower values indicate less divergence and better predictions.
|
|
117
|
+
"""
|
|
118
|
+
self.logger.info("Calculating DivI...")
|
|
119
|
+
I_true = self._calculate_information(self.y_true)
|
|
120
|
+
I_pred = self._calculate_information(self.y_pred)
|
|
121
|
+
divi = np.sum(I_true / I_pred) / self.N
|
|
122
|
+
self.logger.info(f"DivI calculated: {divi}")
|
|
123
|
+
return divi
|
|
124
|
+
|
|
125
|
+
def _calculate_information(self, y):
|
|
126
|
+
"""Helper method to calculate information content.""" # NOTE place holder
|
|
127
|
+
self.logger.info("Calculating information content...")
|
|
128
|
+
# This is a simplified version - you might want to implement
|
|
129
|
+
return np.abs(y) + 1e-10 # Adding small constant to avoid division by zero
|
|
130
|
+
|
|
131
|
+
def calculate_evalmet(self):
|
|
132
|
+
"""Calculate the overall evaluation metric (EvalMet).
|
|
133
|
+
|
|
134
|
+
This metric combines RobR², GMMFE, and DivI to provide an overall evaluation of the model's performance.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
float: The EvalMet value, where higher values indicate better overall performance.
|
|
138
|
+
"""
|
|
139
|
+
self.logger.info("Calculating EvalMet...")
|
|
140
|
+
rob_r2 = self.calculate_rob_r2()
|
|
141
|
+
gmmfe = self.calculate_gmmfe()
|
|
142
|
+
divi = self.calculate_divi()
|
|
143
|
+
|
|
144
|
+
evalmet = rob_r2 / (gmmfe * divi)
|
|
145
|
+
self.logger.info(f"EvalMet calculated: {evalmet}")
|
|
146
|
+
return evalmet
|
|
147
|
+
|
|
148
|
+
def generate_report(self) -> dict:
|
|
149
|
+
"""Generate a complete evaluation report.
|
|
150
|
+
|
|
151
|
+
This method calculates all evaluation metrics (RobR², GMMFE, DivI, EvalMet) and returns them in a dictionary format.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
dict: A dictionary containing all evaluation metrics with their respective values.
|
|
155
|
+
"""
|
|
156
|
+
self.logger.info("Generating evaluation report...")
|
|
157
|
+
rob_r2 = self.calculate_rob_r2()
|
|
158
|
+
gmmfe = self.calculate_gmmfe()
|
|
159
|
+
divi = self.calculate_divi()
|
|
160
|
+
evalmet = self.calculate_evalmet()
|
|
161
|
+
self.logger.info("Evaluation report generated.")
|
|
162
|
+
return {
|
|
163
|
+
'RobR²': rob_r2,
|
|
164
|
+
'GMMFE': gmmfe,
|
|
165
|
+
'DivI': divi,
|
|
166
|
+
'EvalMet': evalmet
|
|
167
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
3
|
+
import logging
|
|
4
|
+
from machinegnostics.metrics.mean import mean
|
|
5
|
+
|
|
6
|
+
def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray, verbose: bool = False) -> float:
|
|
7
|
+
"""
|
|
8
|
+
Computes the Gnostic mean squared error (MSE).
|
|
9
|
+
|
|
10
|
+
The Gnostic MSE metric is based on the principles of gnostic theory, which
|
|
11
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
12
|
+
of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
y_true : array-like
|
|
17
|
+
True values (targets).
|
|
18
|
+
y_pred : array-like
|
|
19
|
+
Predicted values.
|
|
20
|
+
verbose : bool, optional
|
|
21
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
float
|
|
26
|
+
Average of squared differences between actual and predicted values.
|
|
27
|
+
|
|
28
|
+
Raises
|
|
29
|
+
------
|
|
30
|
+
TypeError
|
|
31
|
+
If y_true or y_pred are not array-like.
|
|
32
|
+
ValueError
|
|
33
|
+
If inputs have mismatched shapes or are empty.
|
|
34
|
+
"""
|
|
35
|
+
logger = get_logger('MSE', level=logging.WARNING if not verbose else logging.INFO)
|
|
36
|
+
logger.info("Calculating Mean Squared Error...")
|
|
37
|
+
|
|
38
|
+
# Validate input types
|
|
39
|
+
if not isinstance(y_true, (list, tuple, np.ndarray)):
|
|
40
|
+
logger.error("y_true must be array-like (list, tuple, or numpy array).")
|
|
41
|
+
raise TypeError("y_true must be array-like (list, tuple, or numpy array).")
|
|
42
|
+
if not isinstance(y_pred, (list, tuple, np.ndarray)):
|
|
43
|
+
logger.error("y_pred must be array-like (list, tuple, or numpy array).")
|
|
44
|
+
raise TypeError("y_pred must be array-like (list, tuple, or numpy array).")
|
|
45
|
+
# Validate input dimensions
|
|
46
|
+
if np.ndim(y_true) > 1:
|
|
47
|
+
logger.error("y_true must be a 1D array.")
|
|
48
|
+
raise ValueError("y_true must be a 1D array.")
|
|
49
|
+
if np.ndim(y_pred) > 1:
|
|
50
|
+
logger.error("y_pred must be a 1D array.")
|
|
51
|
+
raise ValueError("y_pred must be a 1D array.")
|
|
52
|
+
# Check for shape mismatch
|
|
53
|
+
if np.shape(y_true) != np.shape(y_pred):
|
|
54
|
+
logger.error("y_true and y_pred must have the same shape.")
|
|
55
|
+
raise ValueError("y_true and y_pred must have the same shape.")
|
|
56
|
+
# Check for empty arrays
|
|
57
|
+
if len(y_true) == 0:
|
|
58
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
59
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
60
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
61
|
+
logger.error("y_true and y_pred must not contain NaN values.")
|
|
62
|
+
raise ValueError("y_true and y_pred must not contain NaN values.")
|
|
63
|
+
if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
|
|
64
|
+
logger.error("y_true and y_pred must not contain Inf values.")
|
|
65
|
+
raise ValueError("y_true and y_pred must not contain Inf values.")
|
|
66
|
+
|
|
67
|
+
# Convert to numpy arrays and flatten
|
|
68
|
+
y_true = np.asarray(y_true).flatten()
|
|
69
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
70
|
+
|
|
71
|
+
# Check for empty arrays
|
|
72
|
+
if y_true.size == 0:
|
|
73
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
74
|
+
|
|
75
|
+
# Compute MSE
|
|
76
|
+
mse = float(mean((y_true - y_pred) ** 2))
|
|
77
|
+
logger.info(f"Gnostic MSE calculated.")
|
|
78
|
+
return mse
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
def precision_score(y_true:np.ndarray,
|
|
7
|
+
y_pred:np.ndarray,
|
|
8
|
+
average='binary',
|
|
9
|
+
labels=None,
|
|
10
|
+
verbose: bool = False) -> float:
|
|
11
|
+
"""
|
|
12
|
+
Computes the precision classification score.
|
|
13
|
+
|
|
14
|
+
Precision is the ratio of true positives to the sum of true and false positives.
|
|
15
|
+
Supports binary and multiclass classification.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
|
|
20
|
+
Ground truth (correct) target values.
|
|
21
|
+
|
|
22
|
+
y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
|
|
23
|
+
Estimated targets as returned by a classifier.
|
|
24
|
+
|
|
25
|
+
average : {'binary', 'micro', 'macro', 'weighted', None}, default='binary'
|
|
26
|
+
- 'binary': Only report results for the class specified by `pos_label` (default for binary).
|
|
27
|
+
- 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
|
|
28
|
+
- 'macro': Calculate metrics for each label, and find their unweighted mean.
|
|
29
|
+
- 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label).
|
|
30
|
+
- None: Return the precision for each class.
|
|
31
|
+
|
|
32
|
+
labels : array-like, default=None
|
|
33
|
+
List of labels to include. If None, uses sorted unique labels from y_true and y_pred.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
precision : float or array of floats
|
|
38
|
+
Precision score(s). Float if average is not None, array otherwise.
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> y_true = [0, 1, 2, 2, 0]
|
|
43
|
+
>>> y_pred = [0, 0, 2, 2, 0]
|
|
44
|
+
>>> precision_score(y_true, y_pred, average='macro')
|
|
45
|
+
|
|
46
|
+
>>> import pandas as pd
|
|
47
|
+
>>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
|
|
48
|
+
>>> precision_score(df['true'], df['pred'], average='binary')
|
|
49
|
+
"""
|
|
50
|
+
logger = get_logger('precision_score', level=logging.WARNING if not verbose else logging.INFO)
|
|
51
|
+
logger.info("Calculating Precision Score...")
|
|
52
|
+
|
|
53
|
+
# If input is a DataFrame, raise error (must select column)
|
|
54
|
+
if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
|
|
55
|
+
logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
|
|
56
|
+
raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
|
|
57
|
+
|
|
58
|
+
# Convert pandas Series to numpy array
|
|
59
|
+
if isinstance(y_true, pd.Series):
|
|
60
|
+
y_true = y_true.values
|
|
61
|
+
if isinstance(y_pred, pd.Series):
|
|
62
|
+
y_pred = y_pred.values
|
|
63
|
+
|
|
64
|
+
# Convert to numpy arrays and flatten
|
|
65
|
+
y_true = np.asarray(y_true).flatten()
|
|
66
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
67
|
+
|
|
68
|
+
if y_true.shape != y_pred.shape:
|
|
69
|
+
logger.error("Shape of y_true and y_pred must be the same.")
|
|
70
|
+
raise ValueError("Shape of y_true and y_pred must be the same.")
|
|
71
|
+
if y_true.ndim != 1 or y_pred.ndim != 1:
|
|
72
|
+
logger.error("y_true and y_pred must be 1D arrays.")
|
|
73
|
+
raise ValueError("y_true and y_pred must be 1D arrays.")
|
|
74
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
75
|
+
logger.error("y_true and y_pred must not contain NaN values.")
|
|
76
|
+
raise ValueError("y_true and y_pred must not contain NaN values.")
|
|
77
|
+
if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
|
|
78
|
+
logger.error("y_true and y_pred must not contain Inf values.")
|
|
79
|
+
raise ValueError("y_true and y_pred must not contain Inf values.")
|
|
80
|
+
|
|
81
|
+
# Get unique labels
|
|
82
|
+
if labels is None:
|
|
83
|
+
labels = np.unique(np.concatenate([y_true, y_pred]))
|
|
84
|
+
else:
|
|
85
|
+
labels = np.asarray(labels)
|
|
86
|
+
|
|
87
|
+
precisions = []
|
|
88
|
+
for label in labels:
|
|
89
|
+
tp = np.sum((y_pred == label) & (y_true == label))
|
|
90
|
+
fp = np.sum((y_pred == label) & (y_true != label))
|
|
91
|
+
if tp + fp == 0:
|
|
92
|
+
precision = 0.0
|
|
93
|
+
else:
|
|
94
|
+
precision = tp / (tp + fp)
|
|
95
|
+
precisions.append(precision)
|
|
96
|
+
|
|
97
|
+
precisions = np.array(precisions)
|
|
98
|
+
|
|
99
|
+
logger.info("Precision Score calculated.")
|
|
100
|
+
|
|
101
|
+
if average == 'binary':
|
|
102
|
+
if len(labels) != 2:
|
|
103
|
+
logger.error("Binary average is only supported for binary classification with 2 classes.")
|
|
104
|
+
raise ValueError("Binary average is only supported for binary classification with 2 classes.")
|
|
105
|
+
# By convention, use the second label as positive class
|
|
106
|
+
return precisions[1]
|
|
107
|
+
elif average == 'micro':
|
|
108
|
+
tp = sum(np.sum((y_pred == label) & (y_true == label)) for label in labels)
|
|
109
|
+
fp = sum(np.sum((y_pred == label) & (y_true != label)) for label in labels)
|
|
110
|
+
return tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
111
|
+
elif average == 'macro':
|
|
112
|
+
return np.mean(precisions)
|
|
113
|
+
elif average == 'weighted':
|
|
114
|
+
support = np.array([np.sum(y_true == label) for label in labels])
|
|
115
|
+
return np.average(precisions, weights=support)
|
|
116
|
+
elif average is None:
|
|
117
|
+
return precisions
|
|
118
|
+
else:
|
|
119
|
+
raise ValueError(f"Unknown average type: {average}")
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
def r2_score(y_true:np.ndarray, y_pred:np.ndarray, verbose:bool=False) -> float:
|
|
6
|
+
"""
|
|
7
|
+
Computes the coefficient of determination (R² score).
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
y_true : array-like
|
|
12
|
+
True values (targets).
|
|
13
|
+
y_pred : array-like
|
|
14
|
+
Predicted values.
|
|
15
|
+
verbose : bool, optional
|
|
16
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
float
|
|
21
|
+
Proportion of variance explained (1 is perfect prediction).
|
|
22
|
+
|
|
23
|
+
Raises
|
|
24
|
+
------
|
|
25
|
+
TypeError
|
|
26
|
+
If inputs are not array-like.
|
|
27
|
+
ValueError
|
|
28
|
+
If shapes do not match or inputs are empty.
|
|
29
|
+
"""
|
|
30
|
+
logger = get_logger('R2', level=logging.WARNING if not verbose else logging.INFO)
|
|
31
|
+
logger.info("Calculating R2 Score...")
|
|
32
|
+
|
|
33
|
+
if not isinstance(y_true, (list, tuple, np.ndarray)):
|
|
34
|
+
logger.error("y_true must be array-like.")
|
|
35
|
+
raise TypeError("y_true must be array-like.")
|
|
36
|
+
if not isinstance(y_pred, (list, tuple, np.ndarray)):
|
|
37
|
+
logger.error("y_pred must be array-like.")
|
|
38
|
+
raise TypeError("y_pred must be array-like.")
|
|
39
|
+
|
|
40
|
+
y_true = np.asarray(y_true).flatten()
|
|
41
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
42
|
+
|
|
43
|
+
if y_true.shape != y_pred.shape:
|
|
44
|
+
logger.error(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
45
|
+
raise ValueError(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
46
|
+
|
|
47
|
+
if y_true.size == 0:
|
|
48
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
49
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
50
|
+
|
|
51
|
+
ss_res = np.sum((y_true - y_pred) ** 2)
|
|
52
|
+
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
|
|
53
|
+
|
|
54
|
+
if ss_tot == 0:
|
|
55
|
+
# All values in y_true are identical; R² is not defined
|
|
56
|
+
return 0.0
|
|
57
|
+
logger.info("R2 Score calculated.")
|
|
58
|
+
|
|
59
|
+
return float(1 - ss_res / ss_tot)
|
|
60
|
+
|
|
61
|
+
def adjusted_r2_score(y_true:np.ndarray, y_pred:np.ndarray, n_features:int, verbose:bool=False) -> float:
|
|
62
|
+
"""
|
|
63
|
+
Computes the adjusted R² score.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
y_true : array-like
|
|
68
|
+
True values (targets).
|
|
69
|
+
y_pred : array-like
|
|
70
|
+
Predicted values.
|
|
71
|
+
n_features : int
|
|
72
|
+
Number of features (independent variables) in the model.
|
|
73
|
+
verbose : bool, optional
|
|
74
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
float
|
|
79
|
+
Adjusted R² accounting for number of predictors.
|
|
80
|
+
|
|
81
|
+
Raises
|
|
82
|
+
------
|
|
83
|
+
ValueError
|
|
84
|
+
If n_features is invalid (e.g., greater than or equal to number of samples).
|
|
85
|
+
"""
|
|
86
|
+
logger = get_logger('adjusted_R2', level=logging.WARNING if not verbose else logging.INFO)
|
|
87
|
+
logger.info("Calculating Adjusted R2 Score...")
|
|
88
|
+
|
|
89
|
+
if not isinstance(n_features, int) or n_features < 0:
|
|
90
|
+
logger.error("n_features must be a non-negative integer.")
|
|
91
|
+
raise ValueError("n_features must be a non-negative integer.")
|
|
92
|
+
|
|
93
|
+
# Convert to numpy arrays and flatten
|
|
94
|
+
if not isinstance(y_true, (list, tuple, np.ndarray)):
|
|
95
|
+
logger.error("y_true must be array-like.")
|
|
96
|
+
raise TypeError("y_true must be array-like.")
|
|
97
|
+
if not isinstance(y_pred, (list, tuple, np.ndarray)):
|
|
98
|
+
logger.error("y_pred must be array-like.")
|
|
99
|
+
raise TypeError("y_pred must be array-like.")
|
|
100
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
101
|
+
y_true = np.asarray(y_true).flatten()
|
|
102
|
+
if y_true.shape != y_pred.shape:
|
|
103
|
+
logger.error(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
104
|
+
raise ValueError(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
105
|
+
if y_true.size == 0:
|
|
106
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
107
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
n = y_true.shape[0]
|
|
111
|
+
|
|
112
|
+
if n <= n_features + 1:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"Adjusted R² is undefined for n = {n} and n_features = {n_features} "
|
|
115
|
+
"(must have n > n_features + 1)."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
r2 = r2_score(y_true, y_pred)
|
|
119
|
+
r2_adj = float(1 - (1 - r2) * (n - 1) / (n - n_features - 1))
|
|
120
|
+
|
|
121
|
+
logger.info("Adjusted R2 Score calculated.")
|
|
122
|
+
return r2_adj
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
def recall_score(y_true:np.ndarray|pd.Series,
|
|
7
|
+
y_pred:np.ndarray|pd.Series,
|
|
8
|
+
average='binary',
|
|
9
|
+
labels=None,
|
|
10
|
+
verbose:bool=False) -> float|np.ndarray:
|
|
11
|
+
"""
|
|
12
|
+
Computes the recall classification score.
|
|
13
|
+
|
|
14
|
+
Recall is the ratio of true positives to the sum of true positives and false negatives.
|
|
15
|
+
Supports binary and multiclass classification.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
|
|
20
|
+
Ground truth (correct) target values.
|
|
21
|
+
|
|
22
|
+
y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
|
|
23
|
+
Estimated targets as returned by a classifier.
|
|
24
|
+
|
|
25
|
+
average : {'binary', 'micro', 'macro', 'weighted', None}, default='binary'
|
|
26
|
+
- 'binary': Only report results for the class specified by `pos_label` (default for binary).
|
|
27
|
+
- 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
|
|
28
|
+
- 'macro': Calculate metrics for each label, and find their unweighted mean.
|
|
29
|
+
- 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label).
|
|
30
|
+
- None: Return the recall for each class.
|
|
31
|
+
|
|
32
|
+
labels : array-like, default=None
|
|
33
|
+
List of labels to include. If None, uses sorted unique labels from y_true and y_pred.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
recall : float or array of floats
|
|
38
|
+
Recall score(s). Float if average is not None, array otherwise.
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> y_true = [0, 1, 2, 2, 0]
|
|
43
|
+
>>> y_pred = [0, 0, 2, 2, 0]
|
|
44
|
+
>>> recall_score(y_true, y_pred, average='macro')
|
|
45
|
+
0.8333333333333333
|
|
46
|
+
|
|
47
|
+
>>> import pandas as pd
|
|
48
|
+
>>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
|
|
49
|
+
>>> recall_score(df['true'], df['pred'], average='binary')
|
|
50
|
+
1.0
|
|
51
|
+
"""
|
|
52
|
+
logger = get_logger('recall_score', level=logging.WARNING if not verbose else logging.INFO)
|
|
53
|
+
logger.info("Calculating Recall Score...")
|
|
54
|
+
# If input is a DataFrame, raise error (must select column)
|
|
55
|
+
if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
|
|
56
|
+
logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
|
|
57
|
+
raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
|
|
58
|
+
|
|
59
|
+
# Convert pandas Series to numpy array
|
|
60
|
+
if isinstance(y_true, pd.Series):
|
|
61
|
+
y_true = y_true.values
|
|
62
|
+
if isinstance(y_pred, pd.Series):
|
|
63
|
+
y_pred = y_pred.values
|
|
64
|
+
|
|
65
|
+
# Convert to numpy arrays and flatten
|
|
66
|
+
y_true = np.asarray(y_true).flatten()
|
|
67
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
68
|
+
|
|
69
|
+
if y_true.shape != y_pred.shape:
|
|
70
|
+
raise ValueError("Shape of y_true and y_pred must be the same.")
|
|
71
|
+
|
|
72
|
+
# Get unique labels
|
|
73
|
+
if labels is None:
|
|
74
|
+
labels = np.unique(np.concatenate([y_true, y_pred]))
|
|
75
|
+
else:
|
|
76
|
+
labels = np.asarray(labels)
|
|
77
|
+
|
|
78
|
+
recalls = []
|
|
79
|
+
for label in labels:
|
|
80
|
+
tp = np.sum((y_pred == label) & (y_true == label))
|
|
81
|
+
fn = np.sum((y_pred != label) & (y_true == label))
|
|
82
|
+
if tp + fn == 0:
|
|
83
|
+
recall = 0.0
|
|
84
|
+
else:
|
|
85
|
+
recall = tp / (tp + fn)
|
|
86
|
+
recalls.append(recall)
|
|
87
|
+
|
|
88
|
+
recalls = np.array(recalls)
|
|
89
|
+
|
|
90
|
+
logger.info("Recall Score calculated.")
|
|
91
|
+
if average == 'binary':
|
|
92
|
+
if len(labels) != 2:
|
|
93
|
+
raise ValueError("Binary average is only supported for binary classification with 2 classes.")
|
|
94
|
+
# By convention, use the second label as positive class
|
|
95
|
+
return recalls[1]
|
|
96
|
+
elif average == 'micro':
|
|
97
|
+
tp = sum(np.sum((y_pred == label) & (y_true == label)) for label in labels)
|
|
98
|
+
fn = sum(np.sum((y_pred != label) & (y_true == label)) for label in labels)
|
|
99
|
+
return tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
100
|
+
elif average == 'macro':
|
|
101
|
+
return np.mean(recalls)
|
|
102
|
+
elif average == 'weighted':
|
|
103
|
+
support = np.array([np.sum(y_true == label) for label in labels])
|
|
104
|
+
return np.average(recalls, weights=support)
|
|
105
|
+
elif average is None:
|
|
106
|
+
return recalls
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unknown average type: {average}")
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import logging
|
|
3
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
4
|
+
from machinegnostics.metrics.mean import mean
|
|
5
|
+
|
|
6
|
+
def root_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray, verbose: bool = False) -> float:
|
|
7
|
+
"""
|
|
8
|
+
Computes the Gnostic Root Mean Squared Error (RMSE).
|
|
9
|
+
|
|
10
|
+
The Gnostic RMSE metric is based on the principles of gnostic theory, which
|
|
11
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
12
|
+
of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
y_true : array-like
|
|
17
|
+
True values (targets).
|
|
18
|
+
y_pred : array-like
|
|
19
|
+
Predicted values.
|
|
20
|
+
verbose : bool, optional
|
|
21
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
float
|
|
26
|
+
Square root of the average of squared errors.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
Example 1: Basic usage with simple arrays
|
|
31
|
+
>>> import numpy as np
|
|
32
|
+
>>> from machinegnostics.metrics import root_mean_squared_error
|
|
33
|
+
>>> y_true = np.array([3, -0.5, 2, 7])
|
|
34
|
+
>>> y_pred = np.array([2.5, 0.0, 2, 8])
|
|
35
|
+
>>> rmse = root_mean_squared_error(y_true, y_pred, verbose=True)
|
|
36
|
+
>>> print(f"RMSE: {rmse}")
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
TypeError
|
|
41
|
+
If y_true or y_pred are not array-like.
|
|
42
|
+
ValueError
|
|
43
|
+
If inputs have mismatched shapes or are empty.
|
|
44
|
+
"""
|
|
45
|
+
logger = get_logger('RMSE', level=logging.WARNING if not verbose else logging.INFO)
|
|
46
|
+
logger.info("Calculating Root Mean Squared Error...")
|
|
47
|
+
# Convert to numpy arrays and flatten
|
|
48
|
+
y_true = np.asarray(y_true)
|
|
49
|
+
y_pred = np.asarray(y_pred)
|
|
50
|
+
|
|
51
|
+
# Ensure 1D arrays (one column)
|
|
52
|
+
if y_true.ndim != 1:
|
|
53
|
+
logger.error("y_true must be a 1D array (single column).")
|
|
54
|
+
raise ValueError("y_true must be a 1D array (single column).")
|
|
55
|
+
if y_pred.ndim != 1:
|
|
56
|
+
logger.error("y_pred must be a 1D array (single column).")
|
|
57
|
+
raise ValueError("y_pred must be a 1D array (single column).")
|
|
58
|
+
|
|
59
|
+
# Validate shapes
|
|
60
|
+
if len(y_true) != len(y_pred):
|
|
61
|
+
logger.error("y_true and y_pred must have the same shape.")
|
|
62
|
+
raise ValueError("y_true and y_pred must have the same shape.")
|
|
63
|
+
|
|
64
|
+
if len(y_true) == 0:
|
|
65
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
66
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
67
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
68
|
+
logger.error("y_true and y_pred must not contain NaN values.")
|
|
69
|
+
raise ValueError("y_true and y_pred must not contain NaN values.")
|
|
70
|
+
if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
|
|
71
|
+
logger.error("y_true and y_pred must not contain Inf values.")
|
|
72
|
+
raise ValueError("y_true and y_pred must not contain Inf values.")
|
|
73
|
+
|
|
74
|
+
# Compute RMSE
|
|
75
|
+
rmse = float(np.sqrt(mean((y_true - y_pred) ** 2)))
|
|
76
|
+
logger.info(f"Gnostic RMSE calculated.")
|
|
77
|
+
return rmse
|