machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,178 @@
1
+ '''
2
+ Gnostic Correlation Metric
3
+
4
+ This module provides a function to compute the Gnostic correlation between two data samples.
5
+
6
+ Author: Nirmal Parmar
7
+ Machine Gnostics
8
+ '''
9
+
10
+ import numpy as np
11
+ from machinegnostics.magcal import EGDF, QGDF, DataHomogeneity
12
+ import logging
13
+ from machinegnostics.magcal.util.logging import get_logger
14
+
15
+ def correlation(X: np.ndarray, y: np.ndarray, case: str = 'i', verbose: bool = False) -> float:
16
+ """
17
+ Calculate the Gnostic correlation coefficient between a feature array X and a target array y.
18
+
19
+ Parameters:
20
+ ----------
21
+ X : np.ndarray
22
+ The feature data sample. Must be a numpy array without NaN or Inf values.
23
+ If X has more than one column, pass each column one by one to this function.
24
+ y : np.ndarray
25
+ The target data sample. Must be a 1D numpy array without NaN or Inf values.
26
+ case : str, optional, default='i'
27
+ Specifies the type of geometry to use:
28
+ - 'i': Estimation geometry (EGDF).
29
+ - 'j': Quantifying geometry (QGDF).
30
+ verbose : bool, optional, default=False
31
+ If True, enables detailed logging for debugging purposes.
32
+
33
+ Returns:
34
+ -------
35
+ float
36
+ The Gnostic correlation coefficient between the two data samples.
37
+
38
+ Examples:
39
+ ---------
40
+ Example 1: Compute correlation for two simple datasets
41
+ >>> import numpy as np
42
+ >>> from machinegnostics.metrics import correlation
43
+ >>> X = np.array([1, 2, 3, 4, 5])
44
+ >>> y = np.array([5, 4, 3, 2, 1])
45
+ >>> corr = correlation(X, y, case='i', verbose=False)
46
+ >>> print(f"Correlation (case='i'): {corr}")
47
+
48
+ Example 2: For multi-column X
49
+ >>> X = np.array([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]])
50
+ >>> y = np.array([5, 4, 3, 2, 1])
51
+ >>> for i in range(X.shape[1]):
52
+ ... corr = correlation(X[:, i], y)
53
+ ... print(f"Correlation for column {i}: {corr}")
54
+
55
+ Raises:
56
+ ------
57
+ ValueError
58
+ If the input arrays are not of the same length, are empty, contain NaN/Inf values,
59
+ or are not 1D numpy arrays. Also raised if `case` is not 'i' or 'j'.
60
+
61
+ Notes:
62
+ -----
63
+ - If X has more than one column, pass each column separately (e.g., X[:, i]).
64
+ - y must be a 1D array.
65
+ - This metric is robust to data uncertainty and provides meaningful estimates even
66
+ in the presence of noise or outliers.
67
+ - Ensure that the input data is preprocessed and cleaned for optimal results.
68
+ - In cases where data homogeneity is not met, a warning is raised, and the scale
69
+ parameter is adjusted to improve results.
70
+ """
71
+ logger = get_logger('correlation', level=logging.WARNING if not verbose else logging.INFO)
72
+ logger.info("Starting correlation computation.")
73
+
74
+ # Validate inputs
75
+ if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
76
+ logger.error("Inputs must be numpy arrays.")
77
+ raise ValueError("Inputs must be numpy arrays.")
78
+
79
+ # Flatten X and y to 1D if possible
80
+ X = X.flatten()
81
+ y = y.flatten()
82
+
83
+ if len(X) != len(y):
84
+ logger.error("Input arrays must have the same length.")
85
+ raise ValueError("Input arrays must have the same length.")
86
+ if len(X) == 0 or len(y) == 0:
87
+ logger.error("Input arrays must not be empty.")
88
+ raise ValueError("Input arrays must not be empty.")
89
+ if np.any(np.isnan(X)) or np.any(np.isnan(y)):
90
+ logger.error("Input arrays must not contain NaN values.")
91
+ raise ValueError("Input arrays must not contain NaN values.")
92
+ if np.any(np.isinf(X)) or np.any(np.isinf(y)):
93
+ logger.error("Input arrays must not contain Inf values.")
94
+ raise ValueError("Input arrays must not contain Inf values.")
95
+ if case not in ['i', 'j']:
96
+ logger.error("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
97
+ raise ValueError("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
98
+
99
+ # default arg
100
+ FLUSH = False
101
+ VERBOSE = False
102
+
103
+ # ...existing code logic, replacing data_1 with X and data_2 with y...
104
+ if case == 'i':
105
+ logger.info("Using Estimation Global Distribution Function (EGDF) for correlation computation.")
106
+ egdf_X = EGDF(flush=FLUSH, verbose=VERBOSE)
107
+ egdf_X.fit(X)
108
+
109
+ egdf_y = EGDF(flush=FLUSH, verbose=VERBOSE)
110
+ egdf_y.fit(y)
111
+
112
+ logger.info("Performing data homogeneity check.")
113
+ dh_X = DataHomogeneity(gdf=egdf_X, verbose=VERBOSE, flush=FLUSH)
114
+ is_homo_X = dh_X.fit()
115
+
116
+ dh_y = DataHomogeneity(gdf=egdf_y, verbose=VERBOSE, flush=FLUSH)
117
+ is_homo_y = dh_y.fit()
118
+
119
+ if not is_homo_X:
120
+ logger.warning("X is not homogeneous. Switching to S=1 for better results.")
121
+ logger.info("Fitting EGDF with S=1.")
122
+ egdf_X = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
123
+ egdf_X.fit(X)
124
+
125
+ if not is_homo_y:
126
+ logger.warning("y is not homogeneous. Switching to S=1 for better results.")
127
+ logger.info("Fitting EGDF with S=1.")
128
+ egdf_y = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
129
+ egdf_y.fit(y)
130
+
131
+ hc_X = np.mean(egdf_X.hi, axis=0)
132
+ hc_y = np.mean(egdf_y.hi, axis=0)
133
+
134
+ if case == 'j':
135
+ logger.info("Using Estimation Global Distribution Function (EGDF) for correlation computation.")
136
+ egdf_X = EGDF(flush=FLUSH, verbose=VERBOSE)
137
+ egdf_X.fit(X)
138
+
139
+ egdf_y = EGDF(flush=FLUSH, verbose=VERBOSE)
140
+ egdf_y.fit(y)
141
+
142
+ logger.info("Checking data homogeneity.")
143
+ dh_X = DataHomogeneity(gdf=egdf_X, verbose=VERBOSE, flush=FLUSH)
144
+ is_homo_X = dh_X.fit()
145
+
146
+ dh_y = DataHomogeneity(gdf=egdf_y, verbose=VERBOSE, flush=FLUSH)
147
+ is_homo_y = dh_y.fit()
148
+
149
+ if not is_homo_X:
150
+ logger.warning("X is not homogeneous. Switching to S=1 for better results.")
151
+ if not is_homo_y:
152
+ logger.warning("y is not homogeneous. Switching to S=1 for better results.")
153
+
154
+ logger.info("Using Quantification Global Distribution Function (QGDF) for correlation computation.")
155
+ qgdf_X = QGDF(flush=FLUSH, verbose=VERBOSE, S=1)
156
+ qgdf_X.fit(X)
157
+
158
+ qgdf_y = QGDF(flush=FLUSH, verbose=VERBOSE)
159
+ qgdf_y.fit(y)
160
+
161
+ hc_X = np.mean(qgdf_X.hj, axis=0)
162
+ hc_y = np.mean(qgdf_y.hj, axis=0)
163
+
164
+ hc_X = np.clip(hc_X, 1, 1e12)
165
+ hc_y = np.clip(hc_y, 1, 1e12)
166
+
167
+ def compute_correlation(hc_X: np.ndarray, hc_y: np.ndarray) -> float:
168
+ logger.info("Computing correlation.")
169
+ numerator = np.sum(hc_X * hc_y)
170
+ denominator = (np.sqrt(np.sum(hc_X**2)) * np.sqrt(np.sum(hc_y**2)))
171
+ corr = numerator / denominator
172
+ if denominator == 0:
173
+ return np.nan
174
+ return corr
175
+
176
+ corr = compute_correlation(hc_X, hc_y)
177
+ logger.info("Correlation computed successfully.")
178
+ return corr
@@ -0,0 +1,167 @@
1
+ '''
2
+ Gnostic Cross-Variance
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ '''
7
+
8
+ import numpy as np
9
+ from machinegnostics.magcal import EGDF, QGDF, DataHomogeneity
10
+ import logging
11
+ from machinegnostics.magcal.util.logging import get_logger
12
+
13
+ def cross_covariance(X: np.ndarray, y: np.ndarray, case: str = 'i', verbose: bool = False) -> float:
14
+ """
15
+ Calculate the Gnostic cross-covariance between a feature array X and a target array y.
16
+
17
+ Parameters:
18
+ ----------
19
+ X : np.ndarray
20
+ The feature data sample. Must be a 1D numpy array (single feature/column).
21
+ If X has more than one column, pass each column separately (e.g., X[:, i]).
22
+ y : np.ndarray
23
+ The target data sample. Must be a 1D numpy array without NaN or Inf values.
24
+ case : str, optional, default='i'
25
+ Specifies the type of geometry to use:
26
+ - 'i': Estimation geometry.
27
+ - 'j': Quantifying geometry.
28
+ verbose : bool, optional, default=False
29
+ If True, enables detailed logging for debugging purposes.
30
+
31
+ Returns:
32
+ -------
33
+ float
34
+ The Gnostic cross-covariance between the two data samples.
35
+
36
+ Examples:
37
+ ---------
38
+ Example 1: Compute cross-covariance for two simple datasets
39
+ >>> import numpy as np
40
+ >>> from machinegnostics.metrics import cross_covariance
41
+ >>> X = np.array([1, 2, 3, 4, 5])
42
+ >>> y = np.array([5, 4, 3, 2, 1])
43
+ >>> covar = cross_covariance(X, y, case='i', verbose=False)
44
+ >>> print(f"Cross-Covariance (case='i'): {covar}")
45
+
46
+ Example 2: For multi-column X
47
+ >>> X = np.array([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]])
48
+ >>> y = np.array([5, 4, 3, 2, 1])
49
+ >>> for i in range(X.shape[1]):
50
+ ... covar = cross_covariance(X[:, i], y)
51
+ ... print(f"Cross-Covariance for column {i}: {covar}")
52
+
53
+ Raises:
54
+ ------
55
+ ValueError
56
+ If the input arrays are not of the same length, are empty, contain NaN/Inf values,
57
+ or are not 1D numpy arrays. Also raised if `case` is not 'i' or 'j'.
58
+
59
+ Notes:
60
+ -----
61
+ - X must be a 1D numpy array (single column). For multi-column X, pass each column separately.
62
+ - y must be a 1D numpy array.
63
+ - This metric is robust to data uncertainty and provides meaningful estimates even
64
+ in the presence of noise or outliers.
65
+ - Ensure that the input data is preprocessed and cleaned for optimal results.
66
+ - In cases where data homogeneity is not met, a warning is raised, and the scale
67
+ parameter is adjusted to improve results.
68
+ """
69
+ logger = get_logger('cross_covariance', level=logging.WARNING if not verbose else logging.INFO)
70
+ logger.info("Starting cross-covariance computation.")
71
+ # Validate inputs
72
+ if len(X) != len(y):
73
+ logger.error("Input arrays must have the same length.")
74
+ raise ValueError("Input arrays must have the same length.")
75
+ if len(X) == 0 or len(y) == 0:
76
+ logger.error("Input arrays must not be empty.")
77
+ raise ValueError("Input arrays must not be empty.")
78
+ if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
79
+ logger.error("Inputs must be numpy arrays.")
80
+ raise ValueError("Inputs must be numpy arrays.")
81
+ # flatten the arrays if they are not 1D
82
+ X = X.flatten()
83
+ y = y.flatten()
84
+ if X.ndim != 1 or y.ndim != 1:
85
+ logger.error("X and y must be 1D numpy arrays. For multi-column X, pass each column separately (e.g., X[:, i]).")
86
+ raise ValueError("X and y must be 1D numpy arrays. For multi-column X, pass each column separately (e.g., X[:, i]).")
87
+ # avoid inf and nan in data
88
+ if np.any(np.isnan(X)) or np.any(np.isnan(y)):
89
+ logger.error("Input arrays must not contain NaN values.")
90
+ raise ValueError("Input arrays must not contain NaN values.")
91
+ if np.any(np.isinf(X)) or np.any(np.isinf(y)):
92
+ logger.error("Input arrays must not contain Inf values.")
93
+ raise ValueError("Input arrays must not contain Inf values.")
94
+ if case not in ['i', 'j']:
95
+ logger.error("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
96
+ raise ValueError("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
97
+
98
+ # ...existing logic unchanged...
99
+ FLUSH = False
100
+ VERBOSE = False
101
+
102
+ if case == 'i':
103
+ logger.info("Using Estimation Global Distribution Function (EGDF) for correlation computation.")
104
+ egdf_data_1 = EGDF(flush=FLUSH, verbose=VERBOSE)
105
+ egdf_data_1.fit(X)
106
+
107
+ egdf_data_2 = EGDF(flush=FLUSH, verbose=VERBOSE)
108
+ egdf_data_2.fit(y)
109
+
110
+ logger.info("Performing data homogeneity check.")
111
+ dh_data_1 = DataHomogeneity(gdf=egdf_data_1, verbose=VERBOSE, flush=FLUSH)
112
+ is_homo_data_1 = dh_data_1.fit()
113
+
114
+ dh_data_2 = DataHomogeneity(gdf=egdf_data_2, verbose=VERBOSE, flush=FLUSH)
115
+ is_homo_data_2 = dh_data_2.fit()
116
+
117
+ if not is_homo_data_1:
118
+ logger.warning("X is not homogeneous. Switching to S=1 for better results.")
119
+ logger.info("Fitting EGDF with S=1.")
120
+ egdf_data_1 = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
121
+ egdf_data_1.fit(X)
122
+
123
+ if not is_homo_data_2:
124
+ logger.warning("y is not homogeneous. Switching to S=1 for better results.")
125
+ logger.info("Fitting EGDF with S=1.")
126
+ egdf_data_2 = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
127
+ egdf_data_2.fit(y)
128
+
129
+ hc_data_1 = np.mean(egdf_data_1.hi, axis=0)
130
+ hc_data_2 = np.mean(egdf_data_2.hi, axis=0)
131
+
132
+ if case == 'j':
133
+ logger.info("Using Estimation Global Distribution Function (EGDF) for correlation computation.")
134
+ egdf_data_1 = EGDF(flush=FLUSH, verbose=VERBOSE)
135
+ egdf_data_1.fit(X)
136
+
137
+ egdf_data_2 = EGDF(flush=FLUSH, verbose=VERBOSE)
138
+ egdf_data_2.fit(y)
139
+
140
+ logger.info("Checking data homogeneity.")
141
+ dh_data_1 = DataHomogeneity(gdf=egdf_data_1, verbose=VERBOSE, flush=FLUSH)
142
+ is_homo_data_1 = dh_data_1.fit()
143
+
144
+ dh_data_2 = DataHomogeneity(gdf=egdf_data_2, verbose=VERBOSE, flush=FLUSH)
145
+ is_homo_data_2 = dh_data_2.fit()
146
+
147
+ if not is_homo_data_1:
148
+ logger.warning("X is not homogeneous. Switching to S=1 for better results.")
149
+ if not is_homo_data_2:
150
+ logger.warning("y is not homogeneous. Switching to S=1 for better results.")
151
+
152
+ logger.info("Using Quantification Global Distribution Function (QGDF) for correlation computation.")
153
+ qgdf_data_1 = QGDF(flush=FLUSH, verbose=VERBOSE, S=1)
154
+ qgdf_data_1.fit(X)
155
+
156
+ qgdf_data_2 = QGDF(flush=FLUSH, verbose=VERBOSE)
157
+ qgdf_data_2.fit(y)
158
+
159
+ hc_data_1 = np.mean(qgdf_data_1.hj, axis=0)
160
+ hc_data_2 = np.mean(qgdf_data_2.hj, axis=0)
161
+
162
+ hc_data_1 = np.clip(hc_data_1, 1, 1e12)
163
+ hc_data_2 = np.clip(hc_data_2, 1, 1e12)
164
+
165
+ cross_covar = np.mean(hc_data_1 * hc_data_2)
166
+ logger.info(f"Cross-covariance calculated successfully.")
167
+ return cross_covar
@@ -0,0 +1,82 @@
1
+ '''
2
+ ManGo - Machine Gnostics Library
3
+ Copyright (C) 2025 ManGo Team
4
+
5
+ Author: Nirmal Parmar
6
+ '''
7
+ from machinegnostics.magcal.util.logging import get_logger
8
+ import logging
9
+ import numpy as np
10
+ from machinegnostics.magcal.criteria_eval import CriteriaEvaluator
11
+
12
+ def divI(y: np.ndarray, y_fit: np.ndarray, verbose: bool = False) -> float:
13
+ """
14
+ Compute the Divergence Information (DivI) for evaluating the fit between observed data and model predictions.
15
+
16
+ The DivI is a statistical metric that measures the divergence between the distributions of the observed and fitted values
17
+ using gnostic characteristics. It is particularly useful for assessing the quality of model fits in various applications.
18
+
19
+ Parameters
20
+ ----------
21
+ y : np.ndarray
22
+ The observed data (ground truth). Must be a 1D array of numerical values.
23
+ y_fit : np.ndarray
24
+ The fitted data (model predictions). Must be a 1D array of the same shape as `y`.
25
+ verbose : bool, optional
26
+ If True, enables detailed logging for debugging purposes. Default is False.
27
+
28
+ Returns
29
+ -------
30
+ float
31
+ The computed Divergence Information (DivI) value.
32
+
33
+ Raises
34
+ ------
35
+ ValueError
36
+ If `y` and `y_fit` do not have the same shape.
37
+ ValueError
38
+ If `w` is provided and does not have the same shape as `y`.
39
+ ValueError
40
+ If `y` or `y_fit` are not 1D arrays.
41
+
42
+ Notes
43
+ -----
44
+ - The DivI is calculated using gnostic characteristics, which provide a robust way to measure divergence between distributions.
45
+
46
+ References
47
+ ----------
48
+ - Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical Gnostics for Data Analysis, Chapter 19.3.4
49
+
50
+ Example
51
+ -------
52
+ >>> import numpy as np
53
+ >>> from src.metrics.divi import divI
54
+ >>> y = np.array([
55
+ ... 1.0, 2.0, 3.0, 4.0
56
+ ... ])
57
+ >>> y_fit = np.array([
58
+ ... 1.1, 1.9, 3.2, 3.8
59
+ ... ])
60
+ >>> divI(y, y_fit)
61
+ """
62
+ logger = get_logger('DivI', level=logging.WARNING if not verbose else logging.INFO)
63
+ logger.info("Starting DivI calculation.")
64
+ # Ensure y and y_fit are 1D arrays
65
+ if y.ndim != 1 or y_fit.ndim != 1:
66
+ logger.error("Both y and y_fit must be 1D arrays.")
67
+ raise ValueError("Both y and y_fit must be 1D arrays.")
68
+
69
+ # Ensure y and y_fit have the same shape
70
+ if y.shape != y_fit.shape:
71
+ logger.error("y and y_fit must have the same shape.")
72
+ raise ValueError("y and y_fit must have the same shape.")
73
+
74
+ # Convert to numpy arrays and flatten
75
+ y = np.asarray(y).flatten()
76
+ y_fit = np.asarray(y_fit).flatten()
77
+
78
+ # Compute the Divergence Information (DivI)
79
+ evaluator = CriteriaEvaluator(y, y_fit, verbose=verbose)
80
+ divI_value = evaluator._divI()
81
+ logger.info(f"Divergence Information (DivI) calculation completed.")
82
+ return divI_value
@@ -0,0 +1,109 @@
1
+ '''
2
+ ManGo - Machine Gnostics Library
3
+ Copyright (C) 2025 ManGo Team
4
+
5
+ Author: Nirmal Parmar
6
+ '''
7
+ from machinegnostics.magcal.util.logging import get_logger
8
+ import logging
9
+ import numpy as np
10
+ from machinegnostics.magcal.criteria_eval import CriteriaEvaluator
11
+
12
+ def evalMet(y: np.ndarray, y_fit: np.ndarray, w: np.ndarray = None, verbose: bool = False) -> float:
13
+ """
14
+ Compute the Evaluation Metric (EvalMet) for evaluating the fit between observed data and model predictions.
15
+
16
+ The EvalMet is a composite metric that combines Robust R-squared (RobR2), Geometric Mean of Model Fit Error (GMMFE),
17
+ and Divergence Information (DivI) to provide a comprehensive assessment of model performance.
18
+
19
+ Parameters
20
+ ----------
21
+ y : np.ndarray
22
+ The observed data (ground truth). Must be a 1D array of numerical values.
23
+ y_fit : np.ndarray
24
+ The fitted data (model predictions). Must be a 1D array of the same shape as `y`.
25
+ w : np.ndarray, optional
26
+ Weights for the data points. If not provided, an array of ones is used.
27
+ verbose : bool, optional
28
+ If True, enables detailed logging for debugging purposes. Default is False.
29
+
30
+ Returns
31
+ -------
32
+ float
33
+ The computed Evaluation Metric (EvalMet) value.
34
+
35
+ Raises
36
+ ------
37
+ ValueError
38
+ If `y` and `y_fit` do not have the same shape.
39
+ ValueError
40
+ If `w` is provided and does not have the same shape as `y`.
41
+ ValueError
42
+ If `y` or `y_fit` are not 1D arrays.
43
+
44
+ Notes
45
+ -----
46
+ - The EvalMet is calculated as:
47
+ EvalMet = RobR2 / (GMMFE . DivI)
48
+ where:
49
+ - RobR2 = Robust R-squared value
50
+ - GMMFE = Geometric Mean of Model Fit Error
51
+ - DivI = Divergence Information
52
+
53
+ References
54
+ ----------
55
+ - Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical Gnostics for Data Analysis, Chapter 19.3.4
56
+
57
+ Example
58
+ -------
59
+ >>> from mango.metrics.evalmet import evalMet
60
+ >>> import numpy as np
61
+ >>> y = np.array([
62
+ ... 1.0, 2.0, 3.0, 4.0
63
+ ... ])
64
+ >>> y_fit = np.array([
65
+ ... 1.1, 1.9, 3.2, 3.8
66
+ ... ])
67
+ >>> evalMet(y, y_fit, weights)
68
+ """
69
+ logger = get_logger('EvalMet', level=logging.WARNING if not verbose else logging.INFO)
70
+ logger.info("Starting EvalMet calculation.")
71
+ # Ensure y and y_fit are 1D arrays
72
+ if y.ndim != 1 or y_fit.ndim != 1:
73
+ logger.error("Both y and y_fit must be 1D arrays.")
74
+ raise ValueError("Both y and y_fit must be 1D arrays.")
75
+
76
+ # Ensure y and y_fit have the same shape
77
+ if y.shape != y_fit.shape:
78
+ logger.error("y and y_fit must have the same shape.")
79
+ raise ValueError("y and y_fit must have the same shape.")
80
+
81
+ # empty check
82
+ if y.size == 0 or y_fit.size == 0:
83
+ logger.error("y and y_fit must not be empty.")
84
+ raise ValueError("y and y_fit must not be empty.")
85
+ if np.any(np.isnan(y)) or np.any(np.isnan(y_fit)):
86
+ logger.error("y and y_fit must not contain NaN values.")
87
+ raise ValueError("y and y_fit must not contain NaN values.")
88
+ if np.any(np.isinf(y)) or np.any(np.isinf(y_fit)):
89
+ logger.error("y and y_fit must not contain Inf values.")
90
+ raise ValueError("y and y_fit must not contain Inf values.")
91
+
92
+ # If weights are not provided, use an array of ones
93
+ if w is None:
94
+ w = np.ones_like(y)
95
+
96
+ # Ensure weights have the same shape as y
97
+ if w.shape != y.shape:
98
+ logger.error("Weights must have the same shape as y.")
99
+ raise ValueError("Weights must have the same shape as y.")
100
+
101
+ # Convert to numpy arrays and flatten
102
+ y = np.asarray(y).flatten()
103
+ y_fit = np.asarray(y_fit).flatten()
104
+
105
+ # Compute the Evaluation Metric (EvalMet)
106
+ evaluator = CriteriaEvaluator(y, y_fit, w, verbose=verbose)
107
+ evalmet = evaluator._evalmet()
108
+ logger.info(f"EvalMet calculation completed.")
109
+ return evalmet
@@ -0,0 +1,128 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+ import logging
5
+
6
+ def f1_score(y_true:np.ndarray | pd.Series | list,
7
+ y_pred:np.ndarray | pd.Series | list,
8
+ average='binary',
9
+ labels=None,
10
+ verbose:bool=False) -> float | np.ndarray:
11
+ """
12
+ Computes the F1 score for classification tasks.
13
+
14
+ The F1 score is the harmonic mean of precision and recall.
15
+ Supports binary and multiclass classification.
16
+
17
+ Parameters
18
+ ----------
19
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
20
+ Ground truth (correct) target values.
21
+
22
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
23
+ Estimated targets as returned by a classifier.
24
+
25
+ average : {'binary', 'micro', 'macro', 'weighted', None}, default='binary'
26
+ - 'binary': Only report results for the class specified by `pos_label` (default for binary).
27
+ - 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
28
+ - 'macro': Calculate metrics for each label, and find their unweighted mean.
29
+ - 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label).
30
+ - None: Return the F1 score for each class.
31
+
32
+ labels : array-like, default=None
33
+ List of labels to include. If None, uses sorted unique labels from y_true and y_pred.
34
+
35
+ verbose : bool, optional
36
+ If True, enables detailed logging for debugging purposes. Default is False.
37
+ Returns
38
+ -------
39
+ f1 : float or array of floats
40
+ F1 score(s). Float if average is not None, array otherwise.
41
+
42
+ Examples
43
+ --------
44
+ >>> y_true = [0, 1, 2, 2, 0]
45
+ >>> y_pred = [0, 0, 2, 2, 0]
46
+ >>> f1_score(y_true, y_pred, average='macro')
47
+ 0.7777777777777777
48
+
49
+ >>> import pandas as pd
50
+ >>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
51
+ >>> f1_score(df['true'], df['pred'], average='binary')
52
+ 0.8
53
+ """
54
+ logger = get_logger('f1_score', level=logging.WARNING if not verbose else logging.INFO)
55
+ logger.info("Calculating F1 Score...")
56
+ # If input is a DataFrame, raise error (must select column)
57
+ if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
58
+ logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
59
+ raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
60
+
61
+ # Convert pandas Series to numpy array
62
+ if isinstance(y_true, pd.Series):
63
+ y_true = y_true.values
64
+ if isinstance(y_pred, pd.Series):
65
+ y_pred = y_pred.values
66
+
67
+ # Convert to numpy arrays and flatten
68
+ y_true = np.asarray(y_true).flatten()
69
+ y_pred = np.asarray(y_pred).flatten()
70
+
71
+ if y_true.shape != y_pred.shape:
72
+ logger.error("Shape mismatch between y_true and y_pred.")
73
+ raise ValueError("Shape of y_true and y_pred must be the same.")
74
+ if y_true.size == 0:
75
+ logger.error("Empty input arrays.")
76
+ raise ValueError("y_true and y_pred must not be empty.")
77
+ # inf and nan check
78
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
79
+ logger.error("Input contains NaN values.")
80
+ raise ValueError("y_true and y_pred must not contain NaN values.")
81
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
82
+ logger.error("Input contains Inf values.")
83
+ raise ValueError("y_true and y_pred must not contain Inf values.")
84
+
85
+ # Get unique labels
86
+ if labels is None:
87
+ labels = np.unique(np.concatenate([y_true, y_pred]))
88
+ else:
89
+ labels = np.asarray(labels)
90
+
91
+ precisions = []
92
+ recalls = []
93
+ for label in labels:
94
+ tp = np.sum((y_pred == label) & (y_true == label))
95
+ fp = np.sum((y_pred == label) & (y_true != label))
96
+ fn = np.sum((y_pred != label) & (y_true == label))
97
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
98
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
99
+ precisions.append(precision)
100
+ recalls.append(recall)
101
+
102
+ precisions = np.array(precisions)
103
+ recalls = np.array(recalls)
104
+ f1s = np.where((precisions + recalls) > 0, 2 * precisions * recalls / (precisions + recalls), 0.0)
105
+
106
+ logger.info("F1 Score calculation completed.")
107
+ if average == 'binary':
108
+ if len(labels) != 2:
109
+ logger.error("Binary average is only supported for binary classification with 2 classes.")
110
+ raise ValueError("Binary average is only supported for binary classification with 2 classes.")
111
+ return f1s[1]
112
+ elif average == 'micro':
113
+ tp = sum(np.sum((y_pred == label) & (y_true == label)) for label in labels)
114
+ fp = sum(np.sum((y_pred == label) & (y_true != label)) for label in labels)
115
+ fn = sum(np.sum((y_pred != label) & (y_true == label)) for label in labels)
116
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
117
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
118
+ return 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
119
+ elif average == 'macro':
120
+ return np.mean(f1s)
121
+ elif average == 'weighted':
122
+ support = np.array([np.sum(y_true == label) for label in labels])
123
+ return np.average(f1s, weights=support)
124
+ elif average is None:
125
+ return f1s
126
+ else:
127
+ logger.error(f"Unknown average type: {average}")
128
+ raise ValueError(f"Unknown average type: {average}")