machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,67 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import logging
4
+ from machinegnostics.magcal.util.logging import get_logger
5
+ from machinegnostics.metrics.mean import mean
6
+
7
+ def accuracy_score(y_true:np.ndarray, y_pred:np.ndarray, verbose:bool=False) -> float:
8
+ """
9
+ Computes the accuracy classification score.
10
+
11
+ Supports input as numpy arrays, lists, or pandas Series/DataFrame columns.
12
+
13
+ Parameters
14
+ ----------
15
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
16
+ Ground truth (correct) target values.
17
+
18
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
19
+ Estimated targets as returned by a classifier.
20
+ verbose : bool, optional
21
+ If True, enables detailed logging for debugging purposes. Default is False.
22
+
23
+ Returns
24
+ -------
25
+ accuracy : float
26
+ The accuracy score as a float in the range [0, 1].
27
+
28
+ Examples
29
+ --------
30
+ >>> y_true = [0, 1, 2, 2, 0]
31
+ >>> y_pred = [0, 0, 2, 2, 0]
32
+ >>> accuracy_score(y_true, y_pred)
33
+ 0.8
34
+
35
+ >>> import pandas as pd
36
+ >>> df = pd.DataFrame({'true': [1, 0, 1], 'pred': [1, 1, 1]})
37
+ >>> accuracy_score(df['true'], df['pred'])
38
+ """
39
+ logger = get_logger('accuracy_score', level=logging.WARNING if not verbose else logging.INFO)
40
+ logger.info("Calculating Accuracy Score...")
41
+ # Check for empty input
42
+ if y_true is None or y_pred is None:
43
+ logger.error("y_true and y_pred must not be None.")
44
+ raise ValueError("y_true and y_pred must not be None.")
45
+ # If input is a DataFrame, raise error (must select column)
46
+ if isinstance(y_true, pd.DataFrame) or isinstance(y_pred, pd.DataFrame):
47
+ logger.error("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
48
+ raise ValueError("y_true and y_pred must be 1D array-like or pandas Series, not DataFrame. Select a column.")
49
+
50
+ # Convert pandas Series to numpy array
51
+ if isinstance(y_true, pd.Series):
52
+ y_true = y_true.values
53
+ if isinstance(y_pred, pd.Series):
54
+ y_pred = y_pred.values
55
+
56
+ # Convert to numpy arrays and flatten
57
+ y_true = np.asarray(y_true).flatten()
58
+ y_pred = np.asarray(y_pred).flatten()
59
+
60
+ if y_true.shape != y_pred.shape:
61
+ raise ValueError("Shape of y_true and y_pred must be the same.")
62
+
63
+ correct = np.sum(y_true == y_pred)
64
+ total = y_true.size
65
+ accuracy = correct / total
66
+ logger.info("Accuracy score calculation complete.")
67
+ return accuracy
@@ -0,0 +1,183 @@
1
+ """
2
+ Auto-Correlation Metric
3
+
4
+ This module provides a function to compute the auto-correlation of a data sample.
5
+
6
+ Author: Nirmal Parmar
7
+ Machine Gnostics
8
+ """
9
+
10
+ import logging
11
+ from machinegnostics.magcal.util.logging import get_logger
12
+ import numpy as np
13
+ from machinegnostics.magcal import EGDF, QGDF, DataHomogeneity
14
+
15
+ def auto_correlation(data: np.ndarray, lag: int = 0, case: str = 'i', verbose: bool = False) -> float:
16
+ """
17
+ Calculate the Gnostic auto-correlation of a data sample.
18
+
19
+ Auto-correlation measures the similarity between a data sample and a lagged version of itself.
20
+ This function uses the principles of gnostic theory to compute robust estimates of auto-correlation.
21
+
22
+ Parameters:
23
+ ----------
24
+ data : np.ndarray
25
+ The data sample. Must be a 1D numpy array without NaN or Inf values.
26
+ lag : int, optional, default=0
27
+ The lag value for which the auto-correlation is computed. Must be non-negative and less than the length of the data.
28
+ case : str, optional, default='i'
29
+ Specifies the type of geometry to use:
30
+ - 'i': Estimation geometry (EGDF).
31
+ - 'j': Quantifying geometry (QGDF).
32
+ verbose : bool, optional, default=False
33
+ If True, enables detailed logging for debugging purposes.
34
+
35
+ Returns:
36
+ -------
37
+ float
38
+ The Gnostic auto-correlation coefficient for the given lag.
39
+
40
+ Raises:
41
+ ------
42
+ ValueError
43
+ If the input array is empty, contains NaN/Inf values, is not 1D, or if the lag is invalid.
44
+
45
+ Examples:
46
+ ---------
47
+ Example 1: Compute auto-correlation for a simple dataset
48
+ >>> import numpy as np
49
+ >>> from machinegnostics.metrics import auto_correlation
50
+ >>> data = np.array([1, 2, 3, 4, 5])
51
+ >>> lag = 1
52
+ >>> auto_corr = auto_correlation(data, lag=lag, case='i', verbose=False)
53
+ >>> print(f"Auto-Correlation (lag={lag}, case='i'): {auto_corr}")
54
+
55
+ Notes:
56
+ -----
57
+ - This metric is robust to data uncertainty and provides meaningful estimates even in the presence of noise or outliers.
58
+ - Ensure that the input data is preprocessed and cleaned for optimal results.
59
+ """
60
+ logger = get_logger('auto_correlation', level=logging.WARNING if not verbose else logging.INFO)
61
+ logger.info("Starting auto-correlation computation.")
62
+
63
+ # Validate inputs
64
+ if not isinstance(data, np.ndarray):
65
+ logger.error("Input must be a numpy array.")
66
+ raise ValueError("Input must be a numpy array.")
67
+ # flatten data
68
+ data = data.flatten()
69
+ if data.ndim != 1:
70
+ logger.error("Input array must be 1D.")
71
+ raise ValueError("Input array must be 1D.")
72
+ if len(data) == 0:
73
+ logger.error("Input array must not be empty.")
74
+ raise ValueError("Input array must not be empty.")
75
+ if np.any(np.isnan(data)):
76
+ logger.error("Input array must not contain NaN values.")
77
+ raise ValueError("Input array must not contain NaN values.")
78
+ if np.any(np.isinf(data)):
79
+ logger.error("Input array must not contain Inf values.")
80
+ raise ValueError("Input array must not contain Inf values.")
81
+ if lag < 0 or lag >= len(data):
82
+ logger.error("Lag must be non-negative and less than the length of the data.")
83
+ raise ValueError("Lag must be non-negative and less than the length of the data.")
84
+ if case not in ['i', 'j']:
85
+ logger.error("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
86
+ raise ValueError("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
87
+
88
+ # Shift data by lag
89
+ data_lagged = np.roll(data, -lag)
90
+ data_lagged = data_lagged[:-lag] if lag > 0 else data_lagged
91
+ data = data[:len(data_lagged)]
92
+
93
+ # Default arguments for gnostic functions
94
+ FLUSH = False
95
+ VERBOSE = False
96
+
97
+ if case == 'i':
98
+ logger.info("Using Estimation Global Distribution Function (EGDF) for irrelevance computation.")
99
+ # EGDF
100
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE)
101
+ egdf_data.fit(data)
102
+
103
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE)
104
+ egdf_data_lagged.fit(data_lagged)
105
+
106
+ # Data Homogeneity
107
+ logger.info("Performing data homogeneity check.")
108
+ dh_data = DataHomogeneity(gdf=egdf_data, verbose=VERBOSE, flush=FLUSH)
109
+ is_homo_data = dh_data.fit()
110
+
111
+ dh_data_lagged = DataHomogeneity(gdf=egdf_data_lagged, verbose=VERBOSE, flush=FLUSH)
112
+ is_homo_data_lagged = dh_data_lagged.fit()
113
+
114
+ # data homogeneity check
115
+ if not is_homo_data:
116
+ logger.warning("Data is not homogeneous. Switching to S=1 for better results.")
117
+ logger.info("Fitting EGDF with S=1.")
118
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
119
+ egdf_data.fit(data)
120
+
121
+ if not is_homo_data_lagged:
122
+ logger.warning("Lagged data is not homogeneous. Switching to S=1 for better results.")
123
+ logger.info("Fitting EGDF with S=1.")
124
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
125
+ egdf_data_lagged.fit(data_lagged)
126
+
127
+ # Get irrelevance of the data sample
128
+ logger.info("Getting irrelevance of the data sample.")
129
+ hc_data = np.mean(egdf_data.hi, axis=0)
130
+ hc_data_lagged = np.mean(egdf_data_lagged.hi, axis=0)
131
+
132
+ if case == 'j':
133
+ logger.info("Using Quantifying Global Distribution Function (QGDF) for irrelevance computation.")
134
+ # EGDF
135
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE)
136
+ egdf_data.fit(data)
137
+
138
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE)
139
+ egdf_data_lagged.fit(data_lagged)
140
+
141
+ # Data Homogeneity
142
+ logger.info("Performing data homogeneity check.")
143
+ dh_data = DataHomogeneity(gdf=egdf_data, verbose=VERBOSE, flush=FLUSH)
144
+ is_homo_data = dh_data.fit()
145
+
146
+ dh_data_lagged = DataHomogeneity(gdf=egdf_data_lagged, verbose=VERBOSE, flush=FLUSH)
147
+ is_homo_data_lagged = dh_data_lagged.fit()
148
+
149
+ # data homogeneity check
150
+ if not is_homo_data:
151
+ logger.info("Data is not homogeneous.")
152
+ if not is_homo_data_lagged:
153
+ logger.info("Lagged data is not homogeneous.")
154
+
155
+ # QGDF
156
+ logger.info("Fitting QGDF with S=1.")
157
+ qgdf_data = QGDF(flush=FLUSH, verbose=VERBOSE)
158
+ qgdf_data.fit(data)
159
+
160
+ qgdf_data_lagged = QGDF(flush=FLUSH, verbose=VERBOSE)
161
+ qgdf_data_lagged.fit(data_lagged)
162
+
163
+ # Get irrelevance of the data sample
164
+ hc_data = np.mean(qgdf_data.hj, axis=0)
165
+ hc_data_lagged = np.mean(qgdf_data_lagged.hj, axis=0)
166
+
167
+ # Stop overflow by limiting big value in hc up to 1e12
168
+ hc_data = np.clip(hc_data, 1, 1e12)
169
+ hc_data_lagged = np.clip(hc_data_lagged, 1, 1e12)
170
+
171
+ # Compute correlation
172
+ def compute_correlation(hc_data_1: np.ndarray, hc_data_2: np.ndarray) -> float:
173
+ logger.debug("Computing correlation.")
174
+ numerator = np.sum(hc_data_1 * hc_data_2)
175
+ denominator = (np.sqrt(np.sum(hc_data_1**2)) * np.sqrt(np.sum(hc_data_2**2)))
176
+ corr = numerator / denominator
177
+ if denominator == 0:
178
+ return np.nan
179
+ return corr
180
+
181
+ auto_corr = compute_correlation(hc_data, hc_data_lagged)
182
+
183
+ return auto_corr
@@ -0,0 +1,204 @@
1
+ '''
2
+ Gnostic auto-co-variance
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ '''
7
+
8
+ from re import VERBOSE
9
+ import numpy as np
10
+ from machinegnostics.magcal import EGDF, QGDF, DataHomogeneity
11
+ from machinegnostics.magcal.util.logging import get_logger
12
+ import logging
13
+
14
+ def auto_covariance(data: np.ndarray, lag: int = 0, case: str = 'i', verbose: bool = False) -> float:
15
+ """
16
+ Calculate the Gnostic auto-covariance of a data sample.
17
+
18
+ Auto-covariance measures the relationship between a data sample and a lagged version of itself.
19
+ This function uses the principles of Gnostic theory to compute robust estimates of auto-covariance.
20
+
21
+ Parameters:
22
+ ----------
23
+ data : np.ndarray
24
+ The data sample. Must be a 1D numpy array without NaN or Inf values.
25
+ The input data should represent a time series or sequential data points.
26
+ lag : int, optional, default=0
27
+ The lag value for which the auto-covariance is computed. Must be non-negative and less than the length of the data.
28
+ A lag of 0 computes the covariance of the data with itself.
29
+ case : str, optional, default='i'
30
+ Specifies the type of geometry to use for irrelevance computation:
31
+ - 'i': Estimation Geometry Distribution Function.
32
+ - 'j': Quantifying Geometry Distribution Function.
33
+ verbose : bool, optional, default=False
34
+ If True, detailed logging information will be printed during the computation.
35
+
36
+ Returns:
37
+ -------
38
+ float
39
+ The Gnostic auto-covariance coefficient for the given lag. If the computed value is less than 1e-6, it is set to 0.0.
40
+
41
+ Raises:
42
+ ------
43
+ ValueError
44
+ If the input array is invalid (e.g., not a numpy array, contains NaN/Inf values, is not 1D, or is empty).
45
+ If the lag is negative or greater than or equal to the length of the data.
46
+ If the case is not one of ['i', 'j'].
47
+
48
+ Notes:
49
+ -----
50
+ - This function uses Gnostic theory to compute irrelevance values for the data and its lagged version.
51
+ - Irrelevance values are clipped to avoid overflow, with a maximum value of 1e12.
52
+ - Homogeneity checks are performed on the data and its lagged version. If the data is not homogeneous, warnings are raised.
53
+
54
+ Warnings:
55
+ --------
56
+ - If the data or its lagged version is not homogeneous, a warning is printed suggesting the use of a scale parameter ( S = 1 ) for better results.
57
+
58
+ Examples:
59
+ ---------
60
+ Example 1: Compute auto-covariance for a simple dataset
61
+ >>> from machinegnostics.metrics import auto_covariance
62
+ >>> import machinegnostics as mg # alternative import
63
+ >>> import numpy as np
64
+ >>> data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
65
+ >>> lag = 1
66
+ >>> auto_covar = auto_covariance(data, lag=lag, case='i')
67
+ >>> auto_covar = mg.auto_covariance(data, lag=lag, case='i') # alternative usage
68
+ >>> print(f"Auto-covariance with lag={lag}: {auto_covar}")
69
+
70
+ Example 2: Compute auto-covariance for a dataset with QGDF
71
+ >>> data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
72
+ >>> lag = 2
73
+ >>> auto_covar = auto_covariance(data, lag=lag, case='j')
74
+ >>> print(f"Auto-covariance with lag={lag}: {auto_covar}")
75
+
76
+ Example 3: Handle invalid input
77
+ >>> data = np.array([1.0, np.nan, 3.0, 4.0, 5.0])
78
+ >>> lag = 1
79
+ >>> try:
80
+ >>> auto_covar = auto_covariance(data, lag=lag, case='i')
81
+ >>> except ValueError as e:
82
+ >>> print(f"Error: {e}")
83
+
84
+ """
85
+ logger = get_logger('auto_covariance', level=logging.WARNING if not verbose else logging.INFO)
86
+ # Validate inputs
87
+ if not isinstance(data, np.ndarray):
88
+ logger.error("Input must be a numpy array.")
89
+ raise ValueError("Input must be a numpy array.")
90
+ # flatten data
91
+ data = data.flatten()
92
+ if data.ndim != 1:
93
+ logger.error("Input array must be 1D.")
94
+ raise ValueError("Input array must be 1D.")
95
+ if len(data) == 0:
96
+ logger.error("Input array must not be empty.")
97
+ raise ValueError("Input array must not be empty.")
98
+ if np.any(np.isnan(data)):
99
+ logger.error("Input array must not contain NaN values.")
100
+ raise ValueError("Input array must not contain NaN values.")
101
+ if np.any(np.isinf(data)):
102
+ logger.error("Input array must not contain Inf values.")
103
+ raise ValueError("Input array must not contain Inf values.")
104
+ if lag < 0 or lag >= len(data):
105
+ logger.error("Lag must be non-negative and less than the length of the data.")
106
+ raise ValueError("Lag must be non-negative and less than the length of the data.")
107
+ if case not in ['i', 'j']:
108
+ logger.error("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
109
+ raise ValueError("Case must be 'i' for estimation geometry or 'j' for quantifying geometry.")
110
+
111
+ # Shift data by lag
112
+ data_lagged = np.roll(data, -lag)
113
+ data_lagged = data_lagged[:-lag] if lag > 0 else data_lagged
114
+ data = data[:len(data_lagged)]
115
+
116
+ # Default arguments for gnostic functions
117
+ FLUSH = False
118
+ VERBOSE = False
119
+
120
+ if case == 'i':
121
+ logger.info("Using Estimation Global Distribution Function (EGDF) for irrelevance computation.")
122
+ # EGDF
123
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE)
124
+ egdf_data.fit(data)
125
+
126
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE)
127
+ egdf_data_lagged.fit(data_lagged)
128
+
129
+ # Data Homogeneity
130
+ logger.info("Performing data homogeneity check.")
131
+ dh_data = DataHomogeneity(gdf=egdf_data, verbose=VERBOSE, flush=FLUSH)
132
+ is_homo_data = dh_data.fit()
133
+
134
+ dh_data_lagged = DataHomogeneity(gdf=egdf_data_lagged, verbose=VERBOSE, flush=FLUSH)
135
+ is_homo_data_lagged = dh_data_lagged.fit()
136
+
137
+ # data homogeneity check
138
+ if not is_homo_data:
139
+ logger.warning("Data is not homogeneous. Switching to S=1 for better results.")
140
+ logger.info("Fitting EGDF with S=1.")
141
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
142
+ egdf_data.fit(data)
143
+
144
+ if not is_homo_data_lagged:
145
+ logger.warning("Lagged data is not homogeneous. Switching to S=1 for better results.")
146
+ logger.info("Fitting EGDF with S=1.")
147
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE, S=1)
148
+ egdf_data_lagged.fit(data_lagged)
149
+
150
+ # Get irrelevance of the data sample
151
+ logger.info("Getting irrelevance of the data sample.")
152
+ hc_data = np.mean(egdf_data.hi, axis=0)
153
+ hc_data_lagged = np.mean(egdf_data_lagged.hi, axis=0)
154
+
155
+ if case == 'j':
156
+ logger.info("Using Quantifying Global Distribution Function (QGDF) for irrelevance computation.")
157
+ # EGDF
158
+ egdf_data = EGDF(flush=FLUSH, verbose=VERBOSE)
159
+ egdf_data.fit(data)
160
+
161
+ egdf_data_lagged = EGDF(flush=FLUSH, verbose=VERBOSE)
162
+ egdf_data_lagged.fit(data_lagged)
163
+
164
+ # Data Homogeneity
165
+ logger.info("Performing data homogeneity check.")
166
+ dh_data = DataHomogeneity(gdf=egdf_data, verbose=VERBOSE, flush=FLUSH)
167
+ is_homo_data = dh_data.fit()
168
+
169
+ dh_data_lagged = DataHomogeneity(gdf=egdf_data_lagged, verbose=VERBOSE, flush=FLUSH)
170
+ is_homo_data_lagged = dh_data_lagged.fit()
171
+
172
+ # data homogeneity check
173
+ if not is_homo_data:
174
+ logger.info("Data is not homogeneous.")
175
+ if not is_homo_data_lagged:
176
+ logger.info("Lagged data is not homogeneous.")
177
+
178
+ # QGDF
179
+ logger.info("Fitting QGDF with S=1.")
180
+ qgdf_data = QGDF(flush=FLUSH, verbose=VERBOSE)
181
+ qgdf_data.fit(data)
182
+
183
+ qgdf_data_lagged = QGDF(flush=FLUSH, verbose=VERBOSE)
184
+ qgdf_data_lagged.fit(data_lagged)
185
+
186
+ # Get irrelevance of the data sample
187
+ hc_data = np.mean(qgdf_data.hj, axis=0)
188
+ hc_data_lagged = np.mean(qgdf_data_lagged.hj, axis=0)
189
+
190
+ # Stop overflow by limiting big value in hc up to 1e12
191
+ hc_data = np.clip(hc_data, 1, 1e12)
192
+ hc_data_lagged = np.clip(hc_data_lagged, 1, 1e12)
193
+
194
+
195
+
196
+ # Compute auto-covariance
197
+ numerator = np.sum(hc_data * hc_data_lagged)
198
+ denominator = (len(data) - lag)
199
+ if denominator == 0:
200
+ auto_covar = 0
201
+ if denominator != 0:
202
+ auto_covar = numerator / denominator
203
+
204
+ return auto_covar
@@ -0,0 +1,130 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from machinegnostics.metrics import precision_score, recall_score, f1_score
4
+ from machinegnostics.magcal.util.logging import get_logger
5
+ import logging
6
+
7
+ def classification_report(
8
+ y_true:np.ndarray | pd.Series,
9
+ y_pred:np.ndarray | pd.Series,
10
+ labels=None,
11
+ target_names=None,
12
+ digits=2,
13
+ output_dict=False,
14
+ verbose: bool = False
15
+ ):
16
+ """
17
+ Builds a text summary or dictionary of the precision, recall, F1 score, and support for each class.
18
+
19
+ Uses the precision_score, recall_score, and f1_score functions for consistency.
20
+
21
+ Parameters
22
+ ----------
23
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
24
+ Ground truth (correct) target values.
25
+
26
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
27
+ Estimated targets as returned by a classifier.
28
+
29
+ labels : array-like, default=None
30
+ List of labels to include in the report. If None, uses sorted unique labels from y_true and y_pred.
31
+
32
+ target_names : list of str, default=None
33
+ Optional display names matching the labels (same order).
34
+
35
+ digits : int, default=2
36
+ Number of digits for formatting output.
37
+
38
+ output_dict : bool, default=False
39
+ If True, return output as a dict for programmatic use. If False, return as a formatted string.
40
+
41
+ verbose : bool, optional
42
+ If True, enables detailed logging for debugging purposes. Default is False.
43
+
44
+ Returns
45
+ -------
46
+ report : str or dict
47
+ Text summary or dictionary of the precision, recall, F1 score for each class.
48
+ """
49
+ logger = get_logger('classification_report', level=logging.WARNING if not verbose else logging.INFO)
50
+ logger.info("Generating Classification Report...")
51
+ # Convert pandas Series to numpy array
52
+ if isinstance(y_true, pd.Series):
53
+ y_true = y_true.values
54
+ if isinstance(y_pred, pd.Series):
55
+ y_pred = y_pred.values
56
+
57
+ # Convert to numpy arrays and flatten
58
+ y_true = np.asarray(y_true).flatten()
59
+ y_pred = np.asarray(y_pred).flatten()
60
+
61
+ if y_true.shape != y_pred.shape:
62
+ logger.error("Shape of y_true and y_pred must be the same.")
63
+ raise ValueError("Shape of y_true and y_pred must be the same.")
64
+
65
+ # Get unique labels
66
+ if labels is None:
67
+ labels = np.unique(np.concatenate([y_true, y_pred]))
68
+ else:
69
+ labels = np.asarray(labels)
70
+
71
+ n_labels = len(labels)
72
+ if target_names is not None:
73
+ if len(target_names) != n_labels:
74
+ logger.error("target_names length must match number of labels")
75
+ raise ValueError("target_names length must match number of labels")
76
+ else:
77
+ target_names = [str(label) for label in labels]
78
+
79
+ # Use your metric functions for each class
80
+ precisions = precision_score(y_true, y_pred, average=None, labels=labels)
81
+ recalls = recall_score(y_true, y_pred, average=None, labels=labels)
82
+ f1s = f1_score(y_true, y_pred, average=None, labels=labels)
83
+ supports = np.array([(y_true == label).sum() for label in labels])
84
+
85
+ # Weighted averages
86
+ total_support = supports.sum()
87
+ avg_precision = np.average(precisions, weights=supports) if total_support > 0 else 0.0
88
+ avg_recall = np.average(recalls, weights=supports) if total_support > 0 else 0.0
89
+ avg_f1 = np.average(f1s, weights=supports) if total_support > 0 else 0.0
90
+
91
+ if output_dict:
92
+ report = {}
93
+ for i, label in enumerate(labels):
94
+ report[target_names[i]] = {
95
+ "precision": round(precisions[i], digits),
96
+ "recall": round(recalls[i], digits),
97
+ "f1-score": round(f1s[i], digits),
98
+ "support": int(supports[i])
99
+ }
100
+ report["avg/total"] = {
101
+ "precision": round(avg_precision, digits),
102
+ "recall": round(avg_recall, digits),
103
+ "f1-score": round(avg_f1, digits),
104
+ "support": int(total_support)
105
+ }
106
+ return report
107
+
108
+ # Build report string
109
+ header = f"{'Class':<15}{'Precision':>10}{'Recall':>10}{'F1-score':>10}{'Support':>10}\n"
110
+ report = header
111
+ report += "=" * len(header) + "\n"
112
+ for i in range(n_labels):
113
+ report += (
114
+ f"{target_names[i]:<15}"
115
+ f"{precisions[i]:>10.{digits}f}"
116
+ f"{recalls[i]:>10.{digits}f}"
117
+ f"{f1s[i]:>10.{digits}f}"
118
+ f"{supports[i]:>10}\n"
119
+ )
120
+ report += "=" * len(header) + "\n"
121
+ report += (
122
+ f"{'Avg/Total':<15}"
123
+ f"{avg_precision:>10.{digits}f}"
124
+ f"{avg_recall:>10.{digits}f}"
125
+ f"{avg_f1:>10.{digits}f}"
126
+ f"{total_support:>10}\n"
127
+ )
128
+
129
+ logger.info("Classification Report generated.")
130
+ return report
@@ -0,0 +1,93 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+ import logging
5
+
6
+ def confusion_matrix(y_true:np.ndarray | pd.Series,
7
+ y_pred:np.ndarray | pd.Series,
8
+ labels=None, verbose=False) -> np.ndarray:
9
+ """
10
+ Computes the confusion matrix to evaluate the accuracy of a classification.
11
+
12
+ By definition, entry (i, j) in the confusion matrix is the number of observations
13
+ actually in class i but predicted to be in class j.
14
+
15
+ Parameters
16
+ ----------
17
+ y_true : array-like or pandas Series/DataFrame column of shape (n_samples,)
18
+ Ground truth (correct) target values.
19
+
20
+ y_pred : array-like or pandas Series/DataFrame column of shape (n_samples,)
21
+ Estimated targets as returned by a classifier.
22
+
23
+ labels : array-like, default=None
24
+ List of labels to index the matrix. This may be used to reorder or select a subset of labels.
25
+ If None, labels that appear at least once in y_true or y_pred are used in sorted order.
26
+ verbose : bool, optional
27
+ If True, enables detailed logging for debugging purposes. Default is False.
28
+
29
+ Returns
30
+ -------
31
+ cm : ndarray of shape (n_classes, n_classes)
32
+ Confusion matrix whose i-th row and j-th column entry indicates the number of samples with
33
+ true label being i-th class and predicted label being j-th class.
34
+
35
+ Examples
36
+ --------
37
+ >>> y_true = [2, 0, 2, 2, 0, 1]
38
+ >>> y_pred = [0, 0, 2, 2, 0, 2]
39
+ >>> confusion_matrix(y_true, y_pred)
40
+ array([[2, 0, 0],
41
+ [0, 0, 1],
42
+ [1, 0, 2]])
43
+ """
44
+ logger = get_logger('confusion_matrix', level=logging.WARNING if not verbose else logging.INFO)
45
+ logger.info("Calculating Confusion Matrix...")
46
+ # Convert pandas Series to numpy array
47
+ if isinstance(y_true, pd.Series):
48
+ y_true = y_true.values
49
+ if isinstance(y_pred, pd.Series):
50
+ y_pred = y_pred.values
51
+
52
+ # Convert to numpy arrays and flatten
53
+ y_true = np.asarray(y_true).flatten()
54
+ y_pred = np.asarray(y_pred).flatten()
55
+
56
+ if y_true.shape != y_pred.shape:
57
+ logger.error("Shape of y_true and y_pred must be the same.")
58
+ raise ValueError("Shape of y_true and y_pred must be the same.")
59
+ if y_true.size == 0:
60
+ logger.error("y_true and y_pred must not be empty.")
61
+ raise ValueError("y_true and y_pred must not be empty.")
62
+ # Ensure 1D arrays
63
+ if y_true.ndim != 1 or y_pred.ndim != 1:
64
+ logger.error("y_true and y_pred must be 1D arrays.")
65
+ raise ValueError("y_true and y_pred must be 1D arrays.")
66
+ # inf and nan check
67
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
68
+ logger.error("y_true and y_pred must not contain NaN values.")
69
+ raise ValueError("y_true and y_pred must not contain NaN values.")
70
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
71
+ logger.error("y_true and y_pred must not contain Inf values.")
72
+ raise ValueError("y_true and y_pred must not contain Inf values.")
73
+
74
+ # Determine labels
75
+ if labels is None:
76
+ labels = np.unique(np.concatenate([y_true, y_pred]))
77
+ else:
78
+ labels = np.asarray(labels)
79
+ n_labels = len(labels)
80
+ label_to_index = {label: idx for idx, label in enumerate(labels)}
81
+
82
+ # Initialize confusion matrix
83
+ cm = np.zeros((n_labels, n_labels), dtype=int)
84
+
85
+ # Populate confusion matrix
86
+ for true, pred in zip(y_true, y_pred):
87
+ if true in label_to_index and pred in label_to_index:
88
+ i = label_to_index[true]
89
+ j = label_to_index[pred]
90
+ cm[i, j] += 1
91
+
92
+ logger.info("Confusion Matrix calculation completed.")
93
+ return cm