machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 ManGo Team
|
|
4
|
+
|
|
5
|
+
Author: Nirmal Parmar
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from machinegnostics.magcal.criteria_eval import CriteriaEvaluator
|
|
10
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
def gmmfe(y: np.ndarray, y_fit: np.ndarray, verbose: bool = False) -> float:
|
|
14
|
+
"""
|
|
15
|
+
Compute the Geometric Mean of Model Fit Error (GMMFE) for evaluating the fit between observed data and model predictions.
|
|
16
|
+
|
|
17
|
+
The GMMFE is a statistical metric that quantifies the average relative error between the observed and fitted values
|
|
18
|
+
on a logarithmic scale. It is particularly useful for datasets with a wide range of values or when the data is
|
|
19
|
+
multiplicative in nature.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
y : np.ndarray
|
|
24
|
+
The observed data (ground truth). Must be a 1D array of numerical values.
|
|
25
|
+
y_fit : np.ndarray
|
|
26
|
+
The fitted data (model predictions). Must be a 1D array of the same shape as `y`.
|
|
27
|
+
verbose : bool, optional
|
|
28
|
+
If True, enables detailed logging of the computation process. Default is False.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
float
|
|
33
|
+
The computed Geometric Mean of Model Fit Error (GMMFE) value.
|
|
34
|
+
|
|
35
|
+
Raises
|
|
36
|
+
------
|
|
37
|
+
ValueError
|
|
38
|
+
If `y` and `y_fit` do not have the same shape.
|
|
39
|
+
ValueError
|
|
40
|
+
If `w` is provided and does not have the same shape as `y`.
|
|
41
|
+
ValueError
|
|
42
|
+
If `y` or `y_fit` are not 1D arrays.
|
|
43
|
+
|
|
44
|
+
Notes
|
|
45
|
+
-----
|
|
46
|
+
- The GMMFE is calculated using the formula:
|
|
47
|
+
GMMFE = exp(Σ(w_i * log(e_i)) / Σ(w_i))
|
|
48
|
+
where:
|
|
49
|
+
- e_i = |y_i - y_fit_i| / |y_i| (relative error)
|
|
50
|
+
- w_i = weights for each data point
|
|
51
|
+
This formula computes the weighted geometric mean of the relative errors.
|
|
52
|
+
|
|
53
|
+
References
|
|
54
|
+
----------
|
|
55
|
+
- Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical Gnostics for Data Analysis, Chapter 19.3.4
|
|
56
|
+
|
|
57
|
+
Example
|
|
58
|
+
-------
|
|
59
|
+
>>> import numpy as np
|
|
60
|
+
>>> from src.metrics.gmmfe import gmmfe
|
|
61
|
+
>>> y = np.array([
|
|
62
|
+
... 1.0, 2.0, 3.0, 4.0
|
|
63
|
+
... ])
|
|
64
|
+
>>> y_fit = np.array([
|
|
65
|
+
... 1.1, 1.9, 3.2, 3.8
|
|
66
|
+
... ])
|
|
67
|
+
>>> gmmfe(y, y_fit)
|
|
68
|
+
0.06666666666666667
|
|
69
|
+
"""
|
|
70
|
+
logger = get_logger('gmmfe', level=logging.WARNING if not verbose else logging.INFO)
|
|
71
|
+
logger.info("Calculating GMMFE...")
|
|
72
|
+
|
|
73
|
+
# validate input types
|
|
74
|
+
if not isinstance(y, (np.ndarray,)):
|
|
75
|
+
logger.error("Invalid input type for y.")
|
|
76
|
+
raise ValueError("y must be a numpy array.")
|
|
77
|
+
if not isinstance(y_fit, (np.ndarray,)):
|
|
78
|
+
logger.error("Invalid input type for y_fit.")
|
|
79
|
+
raise ValueError("y_fit must be a numpy array.")
|
|
80
|
+
if y.ndim != 1 and y_fit.ndim != 1:
|
|
81
|
+
logger.error("Invalid input dimensions.")
|
|
82
|
+
raise ValueError("y and y_fit must be 1D arrays.")
|
|
83
|
+
if y.shape != y_fit.shape:
|
|
84
|
+
logger.error("Shape mismatch.")
|
|
85
|
+
raise ValueError("y and y_fit must have the same shape.")
|
|
86
|
+
if y.size == 0:
|
|
87
|
+
logger.error("Empty array.")
|
|
88
|
+
raise ValueError("y and y_fit must not be empty.")
|
|
89
|
+
if y.shape != y_fit.shape:
|
|
90
|
+
logger.error("Shape mismatch.")
|
|
91
|
+
raise ValueError("y and y_fit must have the same shape.")
|
|
92
|
+
# inf and nan check
|
|
93
|
+
if np.any(np.isnan(y)) or np.any(np.isnan(y_fit)):
|
|
94
|
+
logger.error("Input contains NaN values.")
|
|
95
|
+
raise ValueError("y and y_fit must not contain NaN values.")
|
|
96
|
+
if np.any(np.isinf(y)) or np.any(np.isinf(y_fit)):
|
|
97
|
+
logger.error("Input contains Inf values.")
|
|
98
|
+
raise ValueError("y and y_fit must not contain Inf values.")
|
|
99
|
+
|
|
100
|
+
# Convert to numpy arrays and flatten
|
|
101
|
+
y = np.asarray(y).flatten()
|
|
102
|
+
y_fit = np.asarray(y_fit).flatten()
|
|
103
|
+
|
|
104
|
+
# generate the GMMFE value
|
|
105
|
+
ce = CriteriaEvaluator(y, y_fit, verbose=verbose)
|
|
106
|
+
gmmfe_value = ce._gmmfe()
|
|
107
|
+
logger.info("GMMFE calculation completed.")
|
|
108
|
+
return gmmfe_value
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 ManGo Team
|
|
4
|
+
|
|
5
|
+
Author: Nirmal Parmar
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
9
|
+
import logging
|
|
10
|
+
import numpy as np
|
|
11
|
+
from machinegnostics.magcal.characteristics import GnosticsCharacteristics
|
|
12
|
+
|
|
13
|
+
def hc(y_true: np.ndarray, y_pred: np.ndarray, case: str = 'i', verbose: bool = False) -> float:
|
|
14
|
+
"""
|
|
15
|
+
Calculate the Gnostic Characteristics (Hc) metric of the data sample.
|
|
16
|
+
|
|
17
|
+
i - for estimating gnostic relevance
|
|
18
|
+
For case 'i': Range [0, 1]. Close to one indicates less relevance
|
|
19
|
+
|
|
20
|
+
j - for estimating gnostic irrelevance
|
|
21
|
+
For case 'j': Range [0, ∞) (measures strength of relationship). Close to 1 indicates less irrelevance.
|
|
22
|
+
|
|
23
|
+
The HC metric is used to evaluate the performance of a model by comparing
|
|
24
|
+
the predicted values with the true values's relevance or irrelevance.
|
|
25
|
+
It is calculated as the sum of the characteristics of the model. For standard comparison, irrelevances are calculated with S=1.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
y_true : array-like
|
|
30
|
+
True values.
|
|
31
|
+
y_pred : array-like
|
|
32
|
+
Predicted values.
|
|
33
|
+
case : str, optional
|
|
34
|
+
Case to be used for calculation. Options are 'i' or 'j'. Default is 'i'.
|
|
35
|
+
verbose : bool, optional
|
|
36
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
float
|
|
41
|
+
The calculated HC value.
|
|
42
|
+
|
|
43
|
+
Example
|
|
44
|
+
-------
|
|
45
|
+
>>> y_true = [1, 2, 3]
|
|
46
|
+
>>> y_pred = [1, 2, 3]
|
|
47
|
+
>>> from mango.metrics import hc
|
|
48
|
+
>>> hc_value = hc(y_true, y_pred, case='i')
|
|
49
|
+
>>> print(hc_value)
|
|
50
|
+
"""
|
|
51
|
+
logger = get_logger('Hc', level=logging.WARNING if not verbose else logging.INFO)
|
|
52
|
+
logger.info("Starting HC calculation.")
|
|
53
|
+
|
|
54
|
+
# Validate input types
|
|
55
|
+
if not isinstance(y_true, (list, tuple, np.ndarray)):
|
|
56
|
+
logger.error("Invalid input type for y_true.")
|
|
57
|
+
raise TypeError("y_true must be array-like (list, tuple, or numpy array).")
|
|
58
|
+
if not isinstance(y_pred, (list, tuple, np.ndarray)):
|
|
59
|
+
logger.error("Invalid input type for y_pred.")
|
|
60
|
+
raise TypeError("y_pred must be array-like (list, tuple, or numpy array).")
|
|
61
|
+
if case not in ['i', 'j']:
|
|
62
|
+
logger.error("Invalid case value.")
|
|
63
|
+
raise ValueError("case must be either 'i' or 'j'.")
|
|
64
|
+
# Validate input dimensions
|
|
65
|
+
if np.ndim(y_true) > 1:
|
|
66
|
+
logger.error("y_true must be a 1D array.")
|
|
67
|
+
raise ValueError("y_true must be a 1D array.")
|
|
68
|
+
if np.ndim(y_pred) > 1:
|
|
69
|
+
logger.error("y_pred must be a 1D array.")
|
|
70
|
+
raise ValueError("y_pred must be a 1D array.")
|
|
71
|
+
# Check for empty arrays
|
|
72
|
+
if len(y_true) == 0:
|
|
73
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
74
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
75
|
+
if len(y_pred) == 0:
|
|
76
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
77
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
78
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
79
|
+
logger.error("y_true and y_pred must not contain NaN values.")
|
|
80
|
+
raise ValueError("y_true and y_pred must not contain NaN values.")
|
|
81
|
+
if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
|
|
82
|
+
logger.error("y_true and y_pred must not contain Inf values.")
|
|
83
|
+
raise ValueError("y_true and y_pred must not contain Inf values.")
|
|
84
|
+
# Convert to numpy arrays and flatten if necessary
|
|
85
|
+
if isinstance(y_true, (list, tuple)):
|
|
86
|
+
y_true = np.array(y_true)
|
|
87
|
+
if isinstance(y_pred, (list, tuple)):
|
|
88
|
+
y_pred = np.array(y_pred)
|
|
89
|
+
if isinstance(y_true, np.ndarray) and y_true.ndim > 1:
|
|
90
|
+
y_true = y_true.ravel()
|
|
91
|
+
if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1:
|
|
92
|
+
y_pred = y_pred.ravel()
|
|
93
|
+
# Check for shape mismatch
|
|
94
|
+
if y_true.shape != y_pred.shape:
|
|
95
|
+
logger.error("y_true and y_pred must have the same shape.")
|
|
96
|
+
raise ValueError("y_true and y_pred must have the same shape.")
|
|
97
|
+
if len(y_true) == 0:
|
|
98
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
99
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
100
|
+
if len(y_pred) == 0:
|
|
101
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
102
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
103
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
104
|
+
logger.error("y_true and y_pred must not contain NaN values.")
|
|
105
|
+
raise ValueError("y_true and y_pred must not contain NaN values.")
|
|
106
|
+
if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
|
|
107
|
+
logger.error("y_true and y_pred must not contain Inf values.")
|
|
108
|
+
raise ValueError("y_true and y_pred must not contain Inf values.")
|
|
109
|
+
|
|
110
|
+
# Convert to numpy arrays and flatten
|
|
111
|
+
y_true = np.asarray(y_true).flatten()
|
|
112
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
113
|
+
|
|
114
|
+
# Calculate the ratio R = Z / Z0
|
|
115
|
+
R = y_true / y_pred
|
|
116
|
+
|
|
117
|
+
# Create an instance of GnosticsCharacteristics
|
|
118
|
+
logger.info("Creating GnosticsCharacteristics instance.")
|
|
119
|
+
gnostics = GnosticsCharacteristics(R=R)
|
|
120
|
+
|
|
121
|
+
# Calculate q and q1
|
|
122
|
+
logger.info("Calculating q and q1.")
|
|
123
|
+
q, q1 = gnostics._get_q_q1()
|
|
124
|
+
|
|
125
|
+
# Calculate fi, fj, hi, hj based on the case
|
|
126
|
+
if case == 'i':
|
|
127
|
+
hc = gnostics._hi(q, q1)
|
|
128
|
+
|
|
129
|
+
elif case == 'j':
|
|
130
|
+
hc = gnostics._hj(q, q1)
|
|
131
|
+
|
|
132
|
+
else:
|
|
133
|
+
logger.error("Invalid case. Use 'i' or 'j'.")
|
|
134
|
+
raise ValueError("Invalid case. Use 'i' or 'j'.")
|
|
135
|
+
|
|
136
|
+
hcsr = np.sum(hc**2)
|
|
137
|
+
|
|
138
|
+
# normalize the result
|
|
139
|
+
hcsr = hcsr / len(y_true)
|
|
140
|
+
logger.info(f"Gnostic irrelevance Hc calculation completed")
|
|
141
|
+
return hcsr
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
3
|
+
import logging
|
|
4
|
+
from machinegnostics.metrics.mean import mean
|
|
5
|
+
|
|
6
|
+
def mean_absolute_error(y_true: np.ndarray, y_pred:np.ndarray, verbose: bool = False) -> float:
|
|
7
|
+
"""
|
|
8
|
+
Computes the mean absolute error (MAE).
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
y_true : array-like
|
|
13
|
+
True values (targets).
|
|
14
|
+
y_pred : array-like
|
|
15
|
+
Predicted values.
|
|
16
|
+
verbose : bool, optional
|
|
17
|
+
If True, enables detailed logging. Default is False.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
float
|
|
22
|
+
Average absolute difference between actual and predicted values.
|
|
23
|
+
|
|
24
|
+
Raises
|
|
25
|
+
------
|
|
26
|
+
TypeError
|
|
27
|
+
If y_true or y_pred are not array-like.
|
|
28
|
+
ValueError
|
|
29
|
+
If inputs have mismatched shapes or are empty.
|
|
30
|
+
"""
|
|
31
|
+
logger = get_logger('MAE', level=logging.WARNING if not verbose else logging.INFO)
|
|
32
|
+
logger.info("Calculating Mean Absolute Error...")
|
|
33
|
+
|
|
34
|
+
# Validate input types
|
|
35
|
+
if not isinstance(y_true, (list, tuple, np.ndarray)):
|
|
36
|
+
logger.error("Invalid input type for y_true.")
|
|
37
|
+
raise TypeError("y_true must be array-like (list, tuple, or numpy array).")
|
|
38
|
+
if not isinstance(y_pred, (list, tuple, np.ndarray)):
|
|
39
|
+
logger.error("Invalid input type for y_pred.")
|
|
40
|
+
raise TypeError("y_pred must be array-like (list, tuple, or numpy array).")
|
|
41
|
+
if isinstance(y_true, (list, tuple)):
|
|
42
|
+
y_true = np.array(y_true)
|
|
43
|
+
if isinstance(y_pred, (list, tuple)):
|
|
44
|
+
y_pred = np.array(y_pred)
|
|
45
|
+
if isinstance(y_true, np.ndarray) and y_true.ndim > 1:
|
|
46
|
+
y_true = y_true.ravel()
|
|
47
|
+
if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1:
|
|
48
|
+
y_pred = y_pred.ravel()
|
|
49
|
+
# Check for shape mismatch
|
|
50
|
+
if y_true.shape != y_pred.shape:
|
|
51
|
+
logger.error(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
52
|
+
raise ValueError(f"Shape mismatch: y_true shape {y_true.shape} != y_pred shape {y_pred.shape}")
|
|
53
|
+
# Check for empty arrays
|
|
54
|
+
if y_true.size == 0:
|
|
55
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
56
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
57
|
+
if y_pred.size == 0:
|
|
58
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
59
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
60
|
+
if y_true.size == 0:
|
|
61
|
+
logger.error("y_true and y_pred must not be empty.")
|
|
62
|
+
raise ValueError("y_true and y_pred must not be empty.")
|
|
63
|
+
|
|
64
|
+
# Convert to numpy arrays and flatten
|
|
65
|
+
y_true = np.asarray(y_true).flatten()
|
|
66
|
+
y_pred = np.asarray(y_pred).flatten()
|
|
67
|
+
|
|
68
|
+
# Compute MAE
|
|
69
|
+
mae = float(mean(np.abs(y_true - y_pred)))
|
|
70
|
+
|
|
71
|
+
logger.info(f"Mean Absolute Error (MAE) calculated.")
|
|
72
|
+
return mae
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
'''
|
|
2
|
+
calculate gnostic mean of given sample
|
|
3
|
+
|
|
4
|
+
method: mean()
|
|
5
|
+
LEL local estimate of location
|
|
6
|
+
|
|
7
|
+
Authors: Nirmal Parmar
|
|
8
|
+
Machine Gnostics
|
|
9
|
+
'''
|
|
10
|
+
import logging
|
|
11
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
12
|
+
import numpy as np
|
|
13
|
+
from machinegnostics.magcal import ELDF, QLDF
|
|
14
|
+
from typing import Union
|
|
15
|
+
|
|
16
|
+
def mean(data: np.ndarray,
|
|
17
|
+
S: Union[float, str] = 1,
|
|
18
|
+
case: str = 'i',
|
|
19
|
+
z0_optimize: bool = True,
|
|
20
|
+
data_form: str = 'a',
|
|
21
|
+
tolerance: float = 1e-6,
|
|
22
|
+
verbose: bool = False) -> float:
|
|
23
|
+
"""
|
|
24
|
+
Calculate the gnostic mean of the given data.
|
|
25
|
+
|
|
26
|
+
The Gnostic mean metric is based on the principles of gnostic theory, which
|
|
27
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
28
|
+
of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
-----------
|
|
32
|
+
data : np.ndarray
|
|
33
|
+
Input data array.
|
|
34
|
+
S : float, optional
|
|
35
|
+
Scaling parameter for ELDF. Default is 1.
|
|
36
|
+
case : str, optional
|
|
37
|
+
Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
|
|
38
|
+
'i' for estimating irrelevance, 'j' for quantifying irrelevance.
|
|
39
|
+
z0_optimize : bool, optional
|
|
40
|
+
Whether to optimize z0 in ELDF. Default is True.
|
|
41
|
+
data_form : str, optional
|
|
42
|
+
Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
|
|
43
|
+
tolerance : float, optional
|
|
44
|
+
Tolerance for ELDF fitting. Default is 1e-6.
|
|
45
|
+
verbose : bool, optional
|
|
46
|
+
If True, enables detailed logging for debugging purposes.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
--------
|
|
50
|
+
float
|
|
51
|
+
Gnostic mean of the data.
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
--------
|
|
55
|
+
>>> import machinegnostics as mg
|
|
56
|
+
>>> import numpy as np
|
|
57
|
+
>>> data = np.array([1, 2, 3, 4, 5])
|
|
58
|
+
>>> mg.mean(data)
|
|
59
|
+
"""
|
|
60
|
+
logger = get_logger('mean', level=logging.WARNING if not verbose else logging.INFO)
|
|
61
|
+
logger.info("Calculating gnostic mean...")
|
|
62
|
+
|
|
63
|
+
# flatten data
|
|
64
|
+
data = np.asarray(data).flatten()
|
|
65
|
+
# Validate input
|
|
66
|
+
if not isinstance(data, np.ndarray):
|
|
67
|
+
logger.error("Input must be a numpy array.")
|
|
68
|
+
raise TypeError("Input must be a numpy array.")
|
|
69
|
+
if data.ndim != 1:
|
|
70
|
+
logger.error("Input data must be a one-dimensional array.")
|
|
71
|
+
raise ValueError("Input data must be a one-dimensional array.")
|
|
72
|
+
if len(data) == 0:
|
|
73
|
+
logger.error("Input data array is empty.")
|
|
74
|
+
raise ValueError("Input data array is empty.")
|
|
75
|
+
if np.any(np.isnan(data)):
|
|
76
|
+
logger.error("Input data contains NaN values.")
|
|
77
|
+
raise ValueError("Input data contains NaN values.")
|
|
78
|
+
if np.any(np.isinf(data)):
|
|
79
|
+
logger.error("Input data contains Inf values.")
|
|
80
|
+
raise ValueError("Input data contains Inf values.")
|
|
81
|
+
# Check for valid case
|
|
82
|
+
if case not in ['i', 'j']:
|
|
83
|
+
logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
84
|
+
raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
85
|
+
|
|
86
|
+
# arg validation
|
|
87
|
+
if isinstance(S, str):
|
|
88
|
+
if S != 'auto':
|
|
89
|
+
logger.error("S must be a float or 'auto'.")
|
|
90
|
+
raise ValueError("S must be a float or 'auto'.")
|
|
91
|
+
elif not isinstance(S, (int, float)):
|
|
92
|
+
logger.error("S must be a float or 'auto'.")
|
|
93
|
+
raise TypeError("S must be a float or 'auto'.")
|
|
94
|
+
# S proper value [0,2] suggested
|
|
95
|
+
if isinstance(S, (int)):
|
|
96
|
+
if S < 0 or S > 2:
|
|
97
|
+
logger.warning("S must be in the range [0, 2].")
|
|
98
|
+
# Check for valid data_form
|
|
99
|
+
if data_form not in ['a', 'm']:
|
|
100
|
+
logger.error("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
101
|
+
raise ValueError("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
102
|
+
|
|
103
|
+
if case == 'i':
|
|
104
|
+
logger.info("Using estimating geometry for mean calculation.")
|
|
105
|
+
# Compute eldf
|
|
106
|
+
eldf = ELDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False)
|
|
107
|
+
eldf.fit(data, plot=False)
|
|
108
|
+
mean_value = eldf.z0
|
|
109
|
+
else:
|
|
110
|
+
logger.info("Using quantifying geometry for mean calculation.")
|
|
111
|
+
# Compute qldf
|
|
112
|
+
qldf = QLDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False)
|
|
113
|
+
qldf.fit(data, plot=False)
|
|
114
|
+
mean_value = qldf.z0
|
|
115
|
+
logger.info(f"Gnostic mean calculated.")
|
|
116
|
+
|
|
117
|
+
return float(mean_value)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
'''
|
|
2
|
+
To calculate gnostic median
|
|
3
|
+
|
|
4
|
+
Author: Nirmal Parmar
|
|
5
|
+
Machine Gnostics
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from machinegnostics.magcal import EGDF, QGDF
|
|
10
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
def median(data: np.ndarray,
|
|
14
|
+
case: str = 'i',
|
|
15
|
+
S: float = 1,
|
|
16
|
+
z0_optimize: bool = True,
|
|
17
|
+
data_form: str = 'a',
|
|
18
|
+
tolerance: float = 1e-6,
|
|
19
|
+
verbose: bool = False) -> float:
|
|
20
|
+
"""
|
|
21
|
+
Calculate the median of a dataset.
|
|
22
|
+
|
|
23
|
+
The Gnostic median metric is based on the principles of gnostic theory, which
|
|
24
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
25
|
+
of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
|
|
26
|
+
|
|
27
|
+
Parameters:
|
|
28
|
+
-----------
|
|
29
|
+
data : np.ndarray
|
|
30
|
+
Input data array.
|
|
31
|
+
case : str, optional
|
|
32
|
+
Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
|
|
33
|
+
'i' for estimating irrelevance, 'j' for quantifying irrelevance.
|
|
34
|
+
S : float, optional
|
|
35
|
+
Scaling parameter for ELDF. Default is 1.
|
|
36
|
+
z0_optimize : bool, optional
|
|
37
|
+
Whether to optimize z0 in ELDF. Default is True.
|
|
38
|
+
data_form : str, optional
|
|
39
|
+
Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
|
|
40
|
+
tolerance : float, optional
|
|
41
|
+
Tolerance for ELDF fitting. Default is 1e-6.
|
|
42
|
+
verbose : bool, optional
|
|
43
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
--------
|
|
47
|
+
float
|
|
48
|
+
Median of the data.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
--------
|
|
52
|
+
>>> import machinegnostics as mg
|
|
53
|
+
>>> import numpy as np
|
|
54
|
+
>>> data = np.array([1, 2, 3, 4, 5])
|
|
55
|
+
>>> mg.median(data)
|
|
56
|
+
3.0
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
logger = get_logger('median', level=logging.WARNING if not verbose else logging.INFO)
|
|
60
|
+
logger.info("Starting median calculation.")
|
|
61
|
+
|
|
62
|
+
# Validate input
|
|
63
|
+
if not isinstance(data, np.ndarray):
|
|
64
|
+
logger.error("Input must be a numpy array.")
|
|
65
|
+
raise TypeError("Input must be a numpy array.")
|
|
66
|
+
if data.ndim != 1:
|
|
67
|
+
logger.error("Input data must be a one-dimensional array.")
|
|
68
|
+
raise ValueError("Input data must be a one-dimensional array.")
|
|
69
|
+
if len(data) == 0:
|
|
70
|
+
logger.error("Input data array is empty.")
|
|
71
|
+
raise ValueError("Input data array is empty.")
|
|
72
|
+
if np.any(np.isnan(data)):
|
|
73
|
+
logger.error("Input data contains NaN values.")
|
|
74
|
+
raise ValueError("Input data contains NaN values.")
|
|
75
|
+
if np.any(np.isinf(data)):
|
|
76
|
+
logger.error("Input data contains Inf values.")
|
|
77
|
+
raise ValueError("Input data contains Inf values.")
|
|
78
|
+
# Check for valid case
|
|
79
|
+
if case not in ['i', 'j']:
|
|
80
|
+
logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
81
|
+
raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
82
|
+
|
|
83
|
+
# Validate input
|
|
84
|
+
if not isinstance(data, np.ndarray):
|
|
85
|
+
raise TypeError("Input must be a numpy array.")
|
|
86
|
+
if data.ndim != 1:
|
|
87
|
+
raise ValueError("Input data must be a one-dimensional array.")
|
|
88
|
+
if len(data) == 0:
|
|
89
|
+
raise ValueError("Input data array is empty.")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# arg validation
|
|
93
|
+
if isinstance(S, str):
|
|
94
|
+
if S != 'auto':
|
|
95
|
+
logger.error("S must be a float or 'auto'.")
|
|
96
|
+
raise ValueError("S must be a float or 'auto'.")
|
|
97
|
+
elif not isinstance(S, (int, float)):
|
|
98
|
+
logger.error("S must be a float or 'auto'.")
|
|
99
|
+
raise TypeError("S must be a float or 'auto'.")
|
|
100
|
+
# S proper value [0,2] suggested
|
|
101
|
+
if isinstance(S, (int)):
|
|
102
|
+
if S < 0 or S > 2:
|
|
103
|
+
logger.warning("S must be in the range [0, 2].")
|
|
104
|
+
# Check for valid data_form
|
|
105
|
+
if data_form not in ['a', 'm']:
|
|
106
|
+
logger.error("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
107
|
+
raise ValueError("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
108
|
+
|
|
109
|
+
if case == 'i':
|
|
110
|
+
logger.info("Using estimating geometry for median calculation.")
|
|
111
|
+
# egdf
|
|
112
|
+
egdf = EGDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form)
|
|
113
|
+
egdf.fit(data, plot=False)
|
|
114
|
+
median_value = egdf.z0
|
|
115
|
+
else:
|
|
116
|
+
logger.info("Using quantifying geometry for median calculation.")
|
|
117
|
+
# qgdf
|
|
118
|
+
qgdf = QGDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form)
|
|
119
|
+
qgdf.fit(data, plot=False)
|
|
120
|
+
median_value = qgdf.z0
|
|
121
|
+
logger.info("Gnostic median calculated.")
|
|
122
|
+
return float(median_value)
|