machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,167 @@
1
+ '''
2
+ ManGo - Machine Gnostics Library
3
+ Copyright (C) 2025 ManGo Team
4
+
5
+ This work is licensed under the terms of the GNU General Public License version 3.0.
6
+
7
+ Author: Nirmal Parmar
8
+
9
+
10
+ '''
11
+
12
+ from machinegnostics.magcal.util.logging import get_logger
13
+ import logging
14
+ import numpy as np
15
+
16
+ class EvaluationMetrics:
17
+ """
18
+ Class to calculate evaluation metrics for robust regression models.
19
+
20
+ This class provides methods to calculate various (gnostic) evaluation metrics for robust regression models, including:
21
+ - RobR²: Weighted R-squared, a robust version of the coefficient of determination.
22
+ - GMMFE: Geometric Mean of Multiplicative Fitting Errors, a measure of the geometric mean of fitting errors.
23
+ - DivI: Divergence of Information, a metric to quantify the divergence between true and predicted values.
24
+ - EvalMet: An overall evaluation metric combining RobR², GMMFE, and DivI.
25
+
26
+ Attributes:
27
+ y_true (np.ndarray): True target values.
28
+ y_pred (np.ndarray): Predicted target values.
29
+ weights (np.ndarray): Weights for each observation. Defaults to an array of ones if not provided.
30
+ N (int): Number of observations.
31
+ logger (logging.Logger): Logger instance for logging messages.
32
+
33
+ Methods:
34
+ calculate_rob_r2():
35
+ Calculate the Weighted R-squared (RobR²).
36
+ calculate_gmmfe():
37
+ Calculate the Geometric Mean of Multiplicative Fitting Errors (GMMFE).
38
+ calculate_divi():
39
+ Calculate the Divergence of Information (DivI).
40
+ calculate_evalmet():
41
+ Calculate the overall evaluation metric (EvalMet).
42
+ generate_report():
43
+ Generate a complete evaluation report containing all metrics.
44
+
45
+ Example:
46
+
47
+ >>> from machinegnostics.metrics import EvaluationMetrics
48
+ >>> y_true = np.array([3, -0.5, 2, 7])
49
+ >>> y_pred = np.array([2.5, 0.0, 2, 8])
50
+ >>> evaluator = EvaluationMetrics(y_true, y_pred, verbose=True)
51
+ >>> report = evaluator.generate_report()
52
+ >>> print(report)
53
+ """
54
+
55
+ def __init__(self,
56
+ y_true: np.ndarray,
57
+ y_pred: np.ndarray,
58
+ weights=None,
59
+ verbose: bool = False):
60
+ """
61
+ Initialize the evaluation metrics calculator.
62
+
63
+ Args:
64
+ y_true (np.ndarray): True target values.
65
+ y_pred (np.ndarray): Predicted target values.
66
+ weights (np.ndarray, optional): Weights for each observation. Defaults to None.
67
+ verbose (bool, optional): If True, enables detailed logging. Defaults to False.
68
+ """
69
+ self.logger = get_logger('EvaluationMetrics', level=logging.WARNING if not verbose else logging.INFO)
70
+ self.logger.info("Initializing EvaluationMetrics...")
71
+ self.y_true = np.asarray(y_true).ravel()
72
+ self.y_pred = np.asarray(y_pred).ravel()
73
+ self.weights = np.ones_like(y_true) if weights is None else np.asarray(weights)
74
+ self.N = len(y_true)
75
+
76
+ def calculate_rob_r2(self):
77
+ """Calculate the Weighted R-square (RobR²).
78
+
79
+ This metric measures the proportion of variance in the dependent variable that is predictable from the independent variables, using weighted observations.
80
+
81
+ Returns:
82
+ float: The RobR² value, where 1 indicates perfect prediction and values closer to 0 indicate poor prediction.
83
+ """
84
+ self.logger.info("Calculating RobR²...")
85
+ errors = self.y_true - self.y_pred
86
+ weighted_errors_squared = np.sum(self.weights * (errors ** 2))
87
+ weighted_total_variance = np.sum(self.weights * (self.y_true - np.mean(self.y_true)) ** 2)
88
+
89
+ rob_r2 = 1 - (weighted_errors_squared / weighted_total_variance)
90
+ self.logger.info(f"RobR² calculated: {rob_r2}")
91
+ return rob_r2
92
+
93
+ def calculate_gmmfe(self):
94
+ """Calculate the Geometric Mean of Multiplicative Fitting Errors (GMMFE).
95
+
96
+ This metric calculates the geometric mean of the multiplicative fitting errors between true and predicted values.
97
+
98
+ Returns:
99
+ float: The GMMFE value, where values closer to 1 indicate better fitting.
100
+ """
101
+ self.logger.info("Calculating GMMFE...")
102
+ ratio = self.y_true / (self.y_pred + 1e-10)
103
+ # avoid invalid value for log
104
+ ratio = np.where(ratio <= 0, 1e-10, ratio)
105
+ log_sum = np.sum(np.abs(np.log(ratio))) / self.N
106
+ gmmfe = np.exp(log_sum)
107
+ self.logger.info(f"GMMFE calculated: {gmmfe}")
108
+ return gmmfe
109
+
110
+ def calculate_divi(self):
111
+ """Calculate the Divergence of Information (DivI).
112
+
113
+ This metric quantifies the divergence between the information content of true and predicted values.
114
+
115
+ Returns:
116
+ float: The DivI value, where lower values indicate less divergence and better predictions.
117
+ """
118
+ self.logger.info("Calculating DivI...")
119
+ I_true = self._calculate_information(self.y_true)
120
+ I_pred = self._calculate_information(self.y_pred)
121
+ divi = np.sum(I_true / I_pred) / self.N
122
+ self.logger.info(f"DivI calculated: {divi}")
123
+ return divi
124
+
125
+ def _calculate_information(self, y):
126
+ """Helper method to calculate information content.""" # NOTE place holder
127
+ self.logger.info("Calculating information content...")
128
+ # This is a simplified version - you might want to implement
129
+ return np.abs(y) + 1e-10 # Adding small constant to avoid division by zero
130
+
131
+ def calculate_evalmet(self):
132
+ """Calculate the overall evaluation metric (EvalMet).
133
+
134
+ This metric combines RobR², GMMFE, and DivI to provide an overall evaluation of the model's performance.
135
+
136
+ Returns:
137
+ float: The EvalMet value, where higher values indicate better overall performance.
138
+ """
139
+ self.logger.info("Calculating EvalMet...")
140
+ rob_r2 = self.calculate_rob_r2()
141
+ gmmfe = self.calculate_gmmfe()
142
+ divi = self.calculate_divi()
143
+
144
+ evalmet = rob_r2 / (gmmfe * divi)
145
+ self.logger.info(f"EvalMet calculated: {evalmet}")
146
+ return evalmet
147
+
148
+ def generate_report(self) -> dict:
149
+ """Generate a complete evaluation report.
150
+
151
+ This method calculates all evaluation metrics (RobR², GMMFE, DivI, EvalMet) and returns them in a dictionary format.
152
+
153
+ Returns:
154
+ dict: A dictionary containing all evaluation metrics with their respective values.
155
+ """
156
+ self.logger.info("Generating evaluation report...")
157
+ rob_r2 = self.calculate_rob_r2()
158
+ gmmfe = self.calculate_gmmfe()
159
+ divi = self.calculate_divi()
160
+ evalmet = self.calculate_evalmet()
161
+ self.logger.info("Evaluation report generated.")
162
+ return {
163
+ 'RobR²': rob_r2,
164
+ 'GMMFE': gmmfe,
165
+ 'DivI': divi,
166
+ 'EvalMet': evalmet
167
+ }
@@ -0,0 +1,78 @@
1
+ import numpy as np
2
+ from machinegnostics.magcal.util.logging import get_logger
3
+ import logging
4
+ from machinegnostics.metrics.mean import mean
5
+
6
+ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray, verbose: bool = False) -> float:
7
+ """
8
+ Computes the Gnostic mean squared error (MSE).
9
+
10
+ The Gnostic MSE metric is based on the principles of gnostic theory, which
11
+ provides robust estimates of data relationships. This metric leverages the concepts
12
+ of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
13
+
14
+ Parameters
15
+ ----------
16
+ y_true : array-like
17
+ True values (targets).
18
+ y_pred : array-like
19
+ Predicted values.
20
+ verbose : bool, optional
21
+ If True, enables detailed logging for debugging purposes. Default is False.
22
+
23
+ Returns
24
+ -------
25
+ float
26
+ Average of squared differences between actual and predicted values.
27
+
28
+ Raises
29
+ ------
30
+ TypeError
31
+ If y_true or y_pred are not array-like.
32
+ ValueError
33
+ If inputs have mismatched shapes or are empty.
34
+ """
35
+ logger = get_logger('MSE', level=logging.WARNING if not verbose else logging.INFO)
36
+ logger.info("Calculating Mean Squared Error...")
37
+
38
+ # Validate input types
39
+ if not isinstance(y_true, (list, tuple, np.ndarray)):
40
+ logger.error("y_true must be array-like (list, tuple, or numpy array).")
41
+ raise TypeError("y_true must be array-like (list, tuple, or numpy array).")
42
+ if not isinstance(y_pred, (list, tuple, np.ndarray)):
43
+ logger.error("y_pred must be array-like (list, tuple, or numpy array).")
44
+ raise TypeError("y_pred must be array-like (list, tuple, or numpy array).")
45
+ # Validate input dimensions
46
+ if np.ndim(y_true) > 1:
47
+ logger.error("y_true must be a 1D array.")
48
+ raise ValueError("y_true must be a 1D array.")
49
+ if np.ndim(y_pred) > 1:
50
+ logger.error("y_pred must be a 1D array.")
51
+ raise ValueError("y_pred must be a 1D array.")
52
+ # Check for shape mismatch
53
+ if np.shape(y_true) != np.shape(y_pred):
54
+ logger.error("y_true and y_pred must have the same shape.")
55
+ raise ValueError("y_true and y_pred must have the same shape.")
56
+ # Check for empty arrays
57
+ if len(y_true) == 0:
58
+ logger.error("y_true and y_pred must not be empty.")
59
+ raise ValueError("y_true and y_pred must not be empty.")
60
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
61
+ logger.error("y_true and y_pred must not contain NaN values.")
62
+ raise ValueError("y_true and y_pred must not contain NaN values.")
63
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
64
+ logger.error("y_true and y_pred must not contain Inf values.")
65
+ raise ValueError("y_true and y_pred must not contain Inf values.")
66
+
67
+ # Convert to numpy arrays and flatten
68
+ y_true = np.asarray(y_true).flatten()
69
+ y_pred = np.asarray(y_pred).flatten()
70
+
71
+ # Check for empty arrays
72
+ if y_true.size == 0:
73
+ raise ValueError("y_true and y_pred must not be empty.")
74
+
75
+ # Compute MSE
76
+ mse = float(mean((y_true - y_pred) ** 2))
77
+ logger.info(f"Gnostic MSE calculated.")
78
+ return mse
@@ -0,0 +1,119 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+ import logging
5
+
6
+ def precision_score(y_true:np.ndarray,
7
+ y_pred:np.ndarray,
8
+ average='binary',
9
+ labels=None,
10
+ verbose: bool = False) -> float:
11
+ """
12
+ Computes the precision classification score.
13
+
14
+ Precision is the ratio of true positives to the sum of true and false positives.
15
+ Supports binary and multiclass classification.
16
+
17
+ Parameters
18
+ ----------
19
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
20
+ Ground truth (correct) target values.
21
+
22
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
23
+ Estimated targets as returned by a classifier.
24
+
25
+ average : {'binary', 'micro', 'macro', 'weighted', None}, default='binary'
26
+ - 'binary': Only report results for the class specified by `pos_label` (default for binary).
27
+ - 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
28
+ - 'macro': Calculate metrics for each label, and find their unweighted mean.
29
+ - 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label).
30
+ - None: Return the precision for each class.
31
+
32
+ labels : array-like, default=None
33
+ List of labels to include. If None, uses sorted unique labels from y_true and y_pred.
34
+
35
+ Returns
36
+ -------
37
+ precision : float or array of floats
38
+ Precision score(s). Float if average is not None, array otherwise.
39
+
40
+ Examples
41
+ --------
42
+ >>> y_true = [0, 1, 2, 2, 0]
43
+ >>> y_pred = [0, 0, 2, 2, 0]
44
+ >>> precision_score(y_true, y_pred, average='macro')
45
+
46
+ >>> import pandas as pd
47
+ >>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
48
+ >>> precision_score(df['true'], df['pred'], average='binary')
49
+ """
50
+ logger = get_logger('precision_score', level=logging.WARNING if not verbose else logging.INFO)
51
+ logger.info("Calculating Precision Score...")
52
+
53
+ # If input is a DataFrame, raise error (must select column)
54
+ if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
55
+ logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
56
+ raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
57
+
58
+ # Convert pandas Series to numpy array
59
+ if isinstance(y_true, pd.Series):
60
+ y_true = y_true.values
61
+ if isinstance(y_pred, pd.Series):
62
+ y_pred = y_pred.values
63
+
64
+ # Convert to numpy arrays and flatten
65
+ y_true = np.asarray(y_true).flatten()
66
+ y_pred = np.asarray(y_pred).flatten()
67
+
68
+ if y_true.shape != y_pred.shape:
69
+ logger.error("Shape of y_true and y_pred must be the same.")
70
+ raise ValueError("Shape of y_true and y_pred must be the same.")
71
+ if y_true.ndim != 1 or y_pred.ndim != 1:
72
+ logger.error("y_true and y_pred must be 1D arrays.")
73
+ raise ValueError("y_true and y_pred must be 1D arrays.")
74
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
75
+ logger.error("y_true and y_pred must not contain NaN values.")
76
+ raise ValueError("y_true and y_pred must not contain NaN values.")
77
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
78
+ logger.error("y_true and y_pred must not contain Inf values.")
79
+ raise ValueError("y_true and y_pred must not contain Inf values.")
80
+
81
+ # Get unique labels
82
+ if labels is None:
83
+ labels = np.unique(np.concatenate([y_true, y_pred]))
84
+ else:
85
+ labels = np.asarray(labels)
86
+
87
+ precisions = []
88
+ for label in labels:
89
+ tp = np.sum((y_pred == label) & (y_true == label))
90
+ fp = np.sum((y_pred == label) & (y_true != label))
91
+ if tp + fp == 0:
92
+ precision = 0.0
93
+ else:
94
+ precision = tp / (tp + fp)
95
+ precisions.append(precision)
96
+
97
+ precisions = np.array(precisions)
98
+
99
+ logger.info("Precision Score calculated.")
100
+
101
+ if average == 'binary':
102
+ if len(labels) != 2:
103
+ logger.error("Binary average is only supported for binary classification with 2 classes.")
104
+ raise ValueError("Binary average is only supported for binary classification with 2 classes.")
105
+ # By convention, use the second label as positive class
106
+ return precisions[1]
107
+ elif average == 'micro':
108
+ tp = sum(np.sum((y_pred == label) & (y_true == label)) for label in labels)
109
+ fp = sum(np.sum((y_pred == label) & (y_true != label)) for label in labels)
110
+ return tp / (tp + fp) if (tp + fp) > 0 else 0.0
111
+ elif average == 'macro':
112
+ return np.mean(precisions)
113
+ elif average == 'weighted':
114
+ support = np.array([np.sum(y_true == label) for label in labels])
115
+ return np.average(precisions, weights=support)
116
+ elif average is None:
117
+ return precisions
118
+ else:
119
+ raise ValueError(f"Unknown average type: {average}")
@@ -0,0 +1,122 @@
1
+ import numpy as np
2
+ from machinegnostics.magcal.util.logging import get_logger
3
+ import logging
4
+
5
+ def r2_score(y_true:np.ndarray, y_pred:np.ndarray, verbose:bool=False) -> float:
6
+ """
7
+ Computes the coefficient of determination (R² score).
8
+
9
+ Parameters
10
+ ----------
11
+ y_true : array-like
12
+ True values (targets).
13
+ y_pred : array-like
14
+ Predicted values.
15
+ verbose : bool, optional
16
+ If True, enables detailed logging for debugging purposes. Default is False.
17
+
18
+ Returns
19
+ -------
20
+ float
21
+ Proportion of variance explained (1 is perfect prediction).
22
+
23
+ Raises
24
+ ------
25
+ TypeError
26
+ If inputs are not array-like.
27
+ ValueError
28
+ If shapes do not match or inputs are empty.
29
+ """
30
+ logger = get_logger('R2', level=logging.WARNING if not verbose else logging.INFO)
31
+ logger.info("Calculating R2 Score...")
32
+
33
+ if not isinstance(y_true, (list, tuple, np.ndarray)):
34
+ logger.error("y_true must be array-like.")
35
+ raise TypeError("y_true must be array-like.")
36
+ if not isinstance(y_pred, (list, tuple, np.ndarray)):
37
+ logger.error("y_pred must be array-like.")
38
+ raise TypeError("y_pred must be array-like.")
39
+
40
+ y_true = np.asarray(y_true).flatten()
41
+ y_pred = np.asarray(y_pred).flatten()
42
+
43
+ if y_true.shape != y_pred.shape:
44
+ logger.error(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
45
+ raise ValueError(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
46
+
47
+ if y_true.size == 0:
48
+ logger.error("y_true and y_pred must not be empty.")
49
+ raise ValueError("y_true and y_pred must not be empty.")
50
+
51
+ ss_res = np.sum((y_true - y_pred) ** 2)
52
+ ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
53
+
54
+ if ss_tot == 0:
55
+ # All values in y_true are identical; R² is not defined
56
+ return 0.0
57
+ logger.info("R2 Score calculated.")
58
+
59
+ return float(1 - ss_res / ss_tot)
60
+
61
+ def adjusted_r2_score(y_true:np.ndarray, y_pred:np.ndarray, n_features:int, verbose:bool=False) -> float:
62
+ """
63
+ Computes the adjusted R² score.
64
+
65
+ Parameters
66
+ ----------
67
+ y_true : array-like
68
+ True values (targets).
69
+ y_pred : array-like
70
+ Predicted values.
71
+ n_features : int
72
+ Number of features (independent variables) in the model.
73
+ verbose : bool, optional
74
+ If True, enables detailed logging for debugging purposes. Default is False.
75
+
76
+ Returns
77
+ -------
78
+ float
79
+ Adjusted R² accounting for number of predictors.
80
+
81
+ Raises
82
+ ------
83
+ ValueError
84
+ If n_features is invalid (e.g., greater than or equal to number of samples).
85
+ """
86
+ logger = get_logger('adjusted_R2', level=logging.WARNING if not verbose else logging.INFO)
87
+ logger.info("Calculating Adjusted R2 Score...")
88
+
89
+ if not isinstance(n_features, int) or n_features < 0:
90
+ logger.error("n_features must be a non-negative integer.")
91
+ raise ValueError("n_features must be a non-negative integer.")
92
+
93
+ # Convert to numpy arrays and flatten
94
+ if not isinstance(y_true, (list, tuple, np.ndarray)):
95
+ logger.error("y_true must be array-like.")
96
+ raise TypeError("y_true must be array-like.")
97
+ if not isinstance(y_pred, (list, tuple, np.ndarray)):
98
+ logger.error("y_pred must be array-like.")
99
+ raise TypeError("y_pred must be array-like.")
100
+ y_pred = np.asarray(y_pred).flatten()
101
+ y_true = np.asarray(y_true).flatten()
102
+ if y_true.shape != y_pred.shape:
103
+ logger.error(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
104
+ raise ValueError(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
105
+ if y_true.size == 0:
106
+ logger.error("y_true and y_pred must not be empty.")
107
+ raise ValueError("y_true and y_pred must not be empty.")
108
+
109
+
110
+ n = y_true.shape[0]
111
+
112
+ if n <= n_features + 1:
113
+ raise ValueError(
114
+ f"Adjusted R² is undefined for n = {n} and n_features = {n_features} "
115
+ "(must have n > n_features + 1)."
116
+ )
117
+
118
+ r2 = r2_score(y_true, y_pred)
119
+ r2_adj = float(1 - (1 - r2) * (n - 1) / (n - n_features - 1))
120
+
121
+ logger.info("Adjusted R2 Score calculated.")
122
+ return r2_adj
@@ -0,0 +1,108 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+ import logging
5
+
6
+ def recall_score(y_true:np.ndarray|pd.Series,
7
+ y_pred:np.ndarray|pd.Series,
8
+ average='binary',
9
+ labels=None,
10
+ verbose:bool=False) -> float|np.ndarray:
11
+ """
12
+ Computes the recall classification score.
13
+
14
+ Recall is the ratio of true positives to the sum of true positives and false negatives.
15
+ Supports binary and multiclass classification.
16
+
17
+ Parameters
18
+ ----------
19
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
20
+ Ground truth (correct) target values.
21
+
22
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
23
+ Estimated targets as returned by a classifier.
24
+
25
+ average : {'binary', 'micro', 'macro', 'weighted', None}, default='binary'
26
+ - 'binary': Only report results for the class specified by `pos_label` (default for binary).
27
+ - 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
28
+ - 'macro': Calculate metrics for each label, and find their unweighted mean.
29
+ - 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label).
30
+ - None: Return the recall for each class.
31
+
32
+ labels : array-like, default=None
33
+ List of labels to include. If None, uses sorted unique labels from y_true and y_pred.
34
+
35
+ Returns
36
+ -------
37
+ recall : float or array of floats
38
+ Recall score(s). Float if average is not None, array otherwise.
39
+
40
+ Examples
41
+ --------
42
+ >>> y_true = [0, 1, 2, 2, 0]
43
+ >>> y_pred = [0, 0, 2, 2, 0]
44
+ >>> recall_score(y_true, y_pred, average='macro')
45
+ 0.8333333333333333
46
+
47
+ >>> import pandas as pd
48
+ >>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
49
+ >>> recall_score(df['true'], df['pred'], average='binary')
50
+ 1.0
51
+ """
52
+ logger = get_logger('recall_score', level=logging.WARNING if not verbose else logging.INFO)
53
+ logger.info("Calculating Recall Score...")
54
+ # If input is a DataFrame, raise error (must select column)
55
+ if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
56
+ logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
57
+ raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
58
+
59
+ # Convert pandas Series to numpy array
60
+ if isinstance(y_true, pd.Series):
61
+ y_true = y_true.values
62
+ if isinstance(y_pred, pd.Series):
63
+ y_pred = y_pred.values
64
+
65
+ # Convert to numpy arrays and flatten
66
+ y_true = np.asarray(y_true).flatten()
67
+ y_pred = np.asarray(y_pred).flatten()
68
+
69
+ if y_true.shape != y_pred.shape:
70
+ raise ValueError("Shape of y_true and y_pred must be the same.")
71
+
72
+ # Get unique labels
73
+ if labels is None:
74
+ labels = np.unique(np.concatenate([y_true, y_pred]))
75
+ else:
76
+ labels = np.asarray(labels)
77
+
78
+ recalls = []
79
+ for label in labels:
80
+ tp = np.sum((y_pred == label) & (y_true == label))
81
+ fn = np.sum((y_pred != label) & (y_true == label))
82
+ if tp + fn == 0:
83
+ recall = 0.0
84
+ else:
85
+ recall = tp / (tp + fn)
86
+ recalls.append(recall)
87
+
88
+ recalls = np.array(recalls)
89
+
90
+ logger.info("Recall Score calculated.")
91
+ if average == 'binary':
92
+ if len(labels) != 2:
93
+ raise ValueError("Binary average is only supported for binary classification with 2 classes.")
94
+ # By convention, use the second label as positive class
95
+ return recalls[1]
96
+ elif average == 'micro':
97
+ tp = sum(np.sum((y_pred == label) & (y_true == label)) for label in labels)
98
+ fn = sum(np.sum((y_pred != label) & (y_true == label)) for label in labels)
99
+ return tp / (tp + fn) if (tp + fn) > 0 else 0.0
100
+ elif average == 'macro':
101
+ return np.mean(recalls)
102
+ elif average == 'weighted':
103
+ support = np.array([np.sum(y_true == label) for label in labels])
104
+ return np.average(recalls, weights=support)
105
+ elif average is None:
106
+ return recalls
107
+ else:
108
+ raise ValueError(f"Unknown average type: {average}")
@@ -0,0 +1,77 @@
1
+ import numpy as np
2
+ import logging
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+ from machinegnostics.metrics.mean import mean
5
+
6
+ def root_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray, verbose: bool = False) -> float:
7
+ """
8
+ Computes the Gnostic Root Mean Squared Error (RMSE).
9
+
10
+ The Gnostic RMSE metric is based on the principles of gnostic theory, which
11
+ provides robust estimates of data relationships. This metric leverages the concepts
12
+ of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
13
+
14
+ Parameters
15
+ ----------
16
+ y_true : array-like
17
+ True values (targets).
18
+ y_pred : array-like
19
+ Predicted values.
20
+ verbose : bool, optional
21
+ If True, enables detailed logging for debugging purposes. Default is False.
22
+
23
+ Returns
24
+ -------
25
+ float
26
+ Square root of the average of squared errors.
27
+
28
+ Examples
29
+ --------
30
+ Example 1: Basic usage with simple arrays
31
+ >>> import numpy as np
32
+ >>> from machinegnostics.metrics import root_mean_squared_error
33
+ >>> y_true = np.array([3, -0.5, 2, 7])
34
+ >>> y_pred = np.array([2.5, 0.0, 2, 8])
35
+ >>> rmse = root_mean_squared_error(y_true, y_pred, verbose=True)
36
+ >>> print(f"RMSE: {rmse}")
37
+
38
+ Raises
39
+ ------
40
+ TypeError
41
+ If y_true or y_pred are not array-like.
42
+ ValueError
43
+ If inputs have mismatched shapes or are empty.
44
+ """
45
+ logger = get_logger('RMSE', level=logging.WARNING if not verbose else logging.INFO)
46
+ logger.info("Calculating Root Mean Squared Error...")
47
+ # Convert to numpy arrays and flatten
48
+ y_true = np.asarray(y_true)
49
+ y_pred = np.asarray(y_pred)
50
+
51
+ # Ensure 1D arrays (one column)
52
+ if y_true.ndim != 1:
53
+ logger.error("y_true must be a 1D array (single column).")
54
+ raise ValueError("y_true must be a 1D array (single column).")
55
+ if y_pred.ndim != 1:
56
+ logger.error("y_pred must be a 1D array (single column).")
57
+ raise ValueError("y_pred must be a 1D array (single column).")
58
+
59
+ # Validate shapes
60
+ if len(y_true) != len(y_pred):
61
+ logger.error("y_true and y_pred must have the same shape.")
62
+ raise ValueError("y_true and y_pred must have the same shape.")
63
+
64
+ if len(y_true) == 0:
65
+ logger.error("y_true and y_pred must not be empty.")
66
+ raise ValueError("y_true and y_pred must not be empty.")
67
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
68
+ logger.error("y_true and y_pred must not contain NaN values.")
69
+ raise ValueError("y_true and y_pred must not contain NaN values.")
70
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
71
+ logger.error("y_true and y_pred must not contain Inf values.")
72
+ raise ValueError("y_true and y_pred must not contain Inf values.")
73
+
74
+ # Compute RMSE
75
+ rmse = float(np.sqrt(mean((y_true - y_pred) ** 2)))
76
+ logger.info(f"Gnostic RMSE calculated.")
77
+ return rmse