machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 ManGo Team
|
|
4
|
+
|
|
5
|
+
Author: Nirmal Parmar
|
|
6
|
+
'''
|
|
7
|
+
import logging
|
|
8
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
9
|
+
import numpy as np
|
|
10
|
+
from machinegnostics.magcal.criteria_eval import CriteriaEvaluator
|
|
11
|
+
|
|
12
|
+
def robr2(y: np.ndarray, y_fit: np.ndarray, w: np.ndarray = None, verbose: bool = False) -> float:
|
|
13
|
+
"""
|
|
14
|
+
Compute the Robust R-squared (RobR2) value for evaluating the goodness of fit between observed data and model predictions.
|
|
15
|
+
|
|
16
|
+
The Robust R-squared (RobR2) is a statistical metric that measures the proportion of variance in the observed data
|
|
17
|
+
explained by the fitted data, with robustness to outliers. Unlike the classical R-squared metric, RobR2 incorporates
|
|
18
|
+
weights and is less sensitive to outliers, making it suitable for datasets with noise or irregularities.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
y : np.ndarray
|
|
23
|
+
The observed data (ground truth). Must be a 1D array of numerical values.
|
|
24
|
+
y_fit : np.ndarray
|
|
25
|
+
The fitted data (model predictions). Must be a 1D array of the same shape as `y`.
|
|
26
|
+
w : np.ndarray, optional
|
|
27
|
+
Weights for the data points. Must be a 1D array of the same shape as `y` if provided. Defaults to `None`, in which
|
|
28
|
+
case equal weights are assumed.
|
|
29
|
+
verbose : bool, optional
|
|
30
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
float
|
|
35
|
+
The computed Robust R-squared (RobR2) value. The value ranges from 0 to 1, where:
|
|
36
|
+
- 1 indicates a perfect fit.
|
|
37
|
+
- 0 indicates no explanatory power of the model.
|
|
38
|
+
|
|
39
|
+
Raises
|
|
40
|
+
------
|
|
41
|
+
ValueError
|
|
42
|
+
If `y` and `y_fit` do not have the same shape.
|
|
43
|
+
ValueError
|
|
44
|
+
If `w` is provided and does not have the same shape as `y`.
|
|
45
|
+
ValueError
|
|
46
|
+
If `y` or `y_fit` are not 1D arrays.
|
|
47
|
+
|
|
48
|
+
Notes
|
|
49
|
+
-----
|
|
50
|
+
- The Robust R-squared (RobR2) is calculated using the formula:
|
|
51
|
+
RobR2 = 1 - (Σ(w_i * (e_i - ē)²) / Σ(w_i * (y_i - ȳ)²))
|
|
52
|
+
where:
|
|
53
|
+
- e_i = y_i - y_fit_i (residuals)
|
|
54
|
+
- ē = weighted mean of residuals
|
|
55
|
+
- ȳ = weighted mean of observed data
|
|
56
|
+
- w_i = weights for each data point
|
|
57
|
+
- This metric is robust to outliers due to the use of weights and is particularly useful for noisy datasets.
|
|
58
|
+
- If weights are not provided, equal weights are assumed for all data points.
|
|
59
|
+
|
|
60
|
+
References
|
|
61
|
+
----------
|
|
62
|
+
- Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical Gnostics for Data Analysis, Chapter 19.3.4
|
|
63
|
+
- Robust R-squared (RobR2) is defined in Equation 19.7 of the reference.
|
|
64
|
+
|
|
65
|
+
Example
|
|
66
|
+
-------
|
|
67
|
+
>>> import numpy as np
|
|
68
|
+
>>> from mango.metrics import robr2
|
|
69
|
+
>>> y = np.array([1.0, 2.0, 3.0, 4.0])
|
|
70
|
+
>>> y_fit = np.array([1.1, 1.9, 3.2, 3.8])
|
|
71
|
+
>>> w = np.array([1.0, 1.0, 1.0, 1.0])
|
|
72
|
+
>>> result = robr2(y, y_fit, w)
|
|
73
|
+
>>> print(result)
|
|
74
|
+
|
|
75
|
+
Comparison with Classical R-squared
|
|
76
|
+
-----------------------------------
|
|
77
|
+
The classical R-squared metric assumes equal weights and is sensitive to outliers. RobR2, on the other hand,
|
|
78
|
+
incorporates weights and is robust to outliers, making it more reliable for datasets with irregularities or noise.
|
|
79
|
+
"""
|
|
80
|
+
logger = get_logger('RobR2', level=logging.WARNING if not verbose else logging.INFO)
|
|
81
|
+
logger.info("Calculating Robust R-squared...")
|
|
82
|
+
|
|
83
|
+
# Check if y and y_fit are of the same shape
|
|
84
|
+
if y.shape != y_fit.shape:
|
|
85
|
+
logger.error("y and y_fit must have the same shape")
|
|
86
|
+
raise ValueError("y and y_fit must have the same shape")
|
|
87
|
+
|
|
88
|
+
# Check with w shape
|
|
89
|
+
if w is not None and y.shape != w.shape:
|
|
90
|
+
logger.error("y and w must have the same shape")
|
|
91
|
+
raise ValueError("y and w must have the same shape")
|
|
92
|
+
|
|
93
|
+
# 1D array check
|
|
94
|
+
if y.ndim != 1 or y_fit.ndim != 1:
|
|
95
|
+
logger.error("y and y_fit must be 1D arrays")
|
|
96
|
+
raise ValueError("y and y_fit must be 1D arrays")
|
|
97
|
+
|
|
98
|
+
# Convert to numpy arrays and flatten
|
|
99
|
+
y_true = np.asarray(y).flatten()
|
|
100
|
+
y_pred = np.asarray(y_fit).flatten()
|
|
101
|
+
|
|
102
|
+
# dimensions of y and y_fit validation
|
|
103
|
+
if y_true.ndim != 1 or y_pred.ndim != 1:
|
|
104
|
+
logger.error("y and y_fit must be 1D arrays")
|
|
105
|
+
raise ValueError("y and y_fit must be 1D arrays")
|
|
106
|
+
|
|
107
|
+
# inf and nan check
|
|
108
|
+
if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
|
|
109
|
+
logger.error("y and y_fit must not contain NaN values")
|
|
110
|
+
raise ValueError("y and y_fit must not contain NaN values")
|
|
111
|
+
|
|
112
|
+
# CE
|
|
113
|
+
logger.info("Initializing CriteriaEvaluator...")
|
|
114
|
+
ce = CriteriaEvaluator(y=y, y_fit=y_fit, w=w, verbose=verbose)
|
|
115
|
+
|
|
116
|
+
# Compute the robust R-squared
|
|
117
|
+
robr2_value = ce._robr2()
|
|
118
|
+
logger.info(f"Gnostic Robust R-squared calculated.")
|
|
119
|
+
return robr2_value
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Gnostic standard deviation of given sample
|
|
3
|
+
|
|
4
|
+
method: std()
|
|
5
|
+
|
|
6
|
+
Authors: Nirmal Parmar
|
|
7
|
+
Machine Gnostics
|
|
8
|
+
'''
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
12
|
+
import numpy as np
|
|
13
|
+
from machinegnostics.metrics.mean import mean
|
|
14
|
+
from machinegnostics.metrics.variance import variance
|
|
15
|
+
from machinegnostics.magcal import EGDF
|
|
16
|
+
|
|
17
|
+
def std(data: np.ndarray,
|
|
18
|
+
case: str = 'i',
|
|
19
|
+
S: float = 'auto',
|
|
20
|
+
z0_optimize: bool = True,
|
|
21
|
+
data_form: str = 'a',
|
|
22
|
+
tolerance: float = 1e-6,
|
|
23
|
+
verbose: bool = False) -> tuple:
|
|
24
|
+
"""
|
|
25
|
+
Calculate the standard deviation of the given data.
|
|
26
|
+
|
|
27
|
+
The Gnostic standard deviation metric is based on the principles of gnostic theory, which
|
|
28
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
29
|
+
of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
-----------
|
|
33
|
+
data : np.ndarray
|
|
34
|
+
Input data array.
|
|
35
|
+
case : str, optional
|
|
36
|
+
Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
|
|
37
|
+
'i' for estimating variance, 'j' for quantifying variance.
|
|
38
|
+
Scaling parameter for ELDF. Default is 1. Can be 'auto' to optimize using EGDF.
|
|
39
|
+
Suggested range is [0.01, 2].
|
|
40
|
+
z0_optimize : bool, optional
|
|
41
|
+
Whether to optimize z0 in ELDF. Default is True.
|
|
42
|
+
data_form : str, optional
|
|
43
|
+
Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
|
|
44
|
+
tolerance : float, optional
|
|
45
|
+
Tolerance for ELDF fitting. Default is 1e-6.
|
|
46
|
+
verbose : bool, optional
|
|
47
|
+
If True, enables detailed logging for debugging purposes. Default is False.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
--------
|
|
51
|
+
tuple
|
|
52
|
+
Lower and upper bounds of the standard deviation.
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
--------
|
|
56
|
+
>>> import machinegnostics as mg
|
|
57
|
+
>>> import numpy as np
|
|
58
|
+
>>> data = np.array([1, 2, 3, 4, 5])
|
|
59
|
+
>>> mg.std(data)
|
|
60
|
+
(2.9403976979154143, 3.0599336862362043)
|
|
61
|
+
"""
|
|
62
|
+
logger = get_logger('std', level=logging.WARNING if not verbose else logging.INFO)
|
|
63
|
+
logger.info("Calculating standard deviation...")
|
|
64
|
+
|
|
65
|
+
# Validate input
|
|
66
|
+
if not isinstance(data, np.ndarray):
|
|
67
|
+
logger.error("Input must be a numpy array.")
|
|
68
|
+
raise TypeError("Input must be a numpy array.")
|
|
69
|
+
if data.ndim != 1:
|
|
70
|
+
logger.error("Input data must be a one-dimensional array.")
|
|
71
|
+
raise ValueError("Input data must be a one-dimensional array.")
|
|
72
|
+
if len(data) == 0:
|
|
73
|
+
logger.error("Input data array is empty.")
|
|
74
|
+
raise ValueError("Input data array is empty.")
|
|
75
|
+
if np.any(np.isnan(data)):
|
|
76
|
+
logger.error("Input data contains NaN values.")
|
|
77
|
+
raise ValueError("Input data contains NaN values.")
|
|
78
|
+
if np.any(np.isinf(data)):
|
|
79
|
+
logger.error("Input data contains Inf values.")
|
|
80
|
+
raise ValueError("Input data contains Inf values.")
|
|
81
|
+
# Check for valid case
|
|
82
|
+
if case not in ['i', 'j']:
|
|
83
|
+
logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
84
|
+
raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
85
|
+
# arg validation
|
|
86
|
+
if isinstance(S, str):
|
|
87
|
+
if S != 'auto':
|
|
88
|
+
logger.error("S must be a float or 'auto'.")
|
|
89
|
+
raise ValueError("S must be a float or 'auto'.")
|
|
90
|
+
elif not isinstance(S, (int, float)):
|
|
91
|
+
logger.error("S must be a float or 'auto'.")
|
|
92
|
+
raise TypeError("S must be a float or 'auto'.")
|
|
93
|
+
# S proper value [0,2] suggested
|
|
94
|
+
if isinstance(S, (int)):
|
|
95
|
+
if S < 0 or S > 2:
|
|
96
|
+
logger.warning("S must be in the range [0, 2].")
|
|
97
|
+
# Check for valid data_form
|
|
98
|
+
if data_form not in ['a', 'm']:
|
|
99
|
+
logger.error("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
100
|
+
raise ValueError("data_form must be 'a' for additive or 'm' for multiplicative.")
|
|
101
|
+
|
|
102
|
+
# mean
|
|
103
|
+
logger.info("Calculating mean...")
|
|
104
|
+
m = mean(data, S=S, z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance)
|
|
105
|
+
|
|
106
|
+
# variance
|
|
107
|
+
logger.info("Calculating variance...")
|
|
108
|
+
v = np.abs(variance(data, case=case, S=S, z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance))
|
|
109
|
+
|
|
110
|
+
# if is str
|
|
111
|
+
if isinstance(S, str):
|
|
112
|
+
if S == 'auto':
|
|
113
|
+
logger.info("Optimizing S using EGDF...")
|
|
114
|
+
egdf = EGDF(z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance, verbose=verbose)
|
|
115
|
+
egdf.fit(data=data, plot=False)
|
|
116
|
+
S = egdf.S_opt
|
|
117
|
+
# S value limits [0.01, 1e3]
|
|
118
|
+
S = np.clip(S, 0.01, 1e3)
|
|
119
|
+
# std
|
|
120
|
+
if case.lower() == 'i':
|
|
121
|
+
logger.info("Calculating standard deviation the estimating geometry...")
|
|
122
|
+
std_value_ub = m * ((1 + np.sqrt(v)) / ( 1 - np.sqrt(v)))**(S/2)
|
|
123
|
+
std_value_lb = m * ((1 - np.sqrt(v)) / ( 1 + np.sqrt(v)))**(S/2)
|
|
124
|
+
if 1 - np.sqrt(v) <= 0:
|
|
125
|
+
logger.warning("Encountered negative sqrt value!")
|
|
126
|
+
return 0, 0
|
|
127
|
+
|
|
128
|
+
elif case.lower() == 'j':
|
|
129
|
+
logger.info("Calculating standard deviation the quantifying geometry...")
|
|
130
|
+
|
|
131
|
+
std_value_ub = m * ((np.sqrt(v)) + ( 1 + np.sqrt(v)))**(S/2)
|
|
132
|
+
# safe v
|
|
133
|
+
if 1 - np.sqrt(v) < 0:
|
|
134
|
+
logger.warning("Encountered negative sqrt value, returning 0,0. Use case 'i' for estimating geometry.")
|
|
135
|
+
return 0, std_value_ub
|
|
136
|
+
|
|
137
|
+
std_value_lb = m * ((np.sqrt(v)) + ( 1 - np.sqrt(v)))**(S/2)
|
|
138
|
+
|
|
139
|
+
else:
|
|
140
|
+
raise ValueError("case must be either 'i' or 'j'. i for estimating variance, j for quantifying variance.")
|
|
141
|
+
|
|
142
|
+
logger.info("Gnostic standard deviation calculation completed.")
|
|
143
|
+
|
|
144
|
+
return float(std_value_lb), float(std_value_ub)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Gnostic Variance of given sample data
|
|
3
|
+
|
|
4
|
+
method: variance()
|
|
5
|
+
|
|
6
|
+
Authors: Nirmal Parmar
|
|
7
|
+
Machine Gnostics
|
|
8
|
+
'''
|
|
9
|
+
import numpy as np
|
|
10
|
+
import logging
|
|
11
|
+
from machinegnostics.metrics.mean import mean
|
|
12
|
+
from machinegnostics.magcal import ELDF, QLDF
|
|
13
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
14
|
+
|
|
15
|
+
def variance(data: np.ndarray,
|
|
16
|
+
case: str = 'i',
|
|
17
|
+
S: float = 1,
|
|
18
|
+
z0_optimize: bool = True,
|
|
19
|
+
data_form: str = 'a',
|
|
20
|
+
tolerance: float = 1e-6,
|
|
21
|
+
verbose: bool = False) -> float:
|
|
22
|
+
"""
|
|
23
|
+
Calculate the gnostic variance of the given data.
|
|
24
|
+
|
|
25
|
+
The Gnostic variance metric is based on the principles of gnostic theory, which
|
|
26
|
+
provides robust estimates of data relationships. This metric leverages the concepts
|
|
27
|
+
of estimating irrelevances and quantifying irrelevances, which are robust measures
|
|
28
|
+
of data uncertainty. These irrelevances are aggregated differently.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
-----------
|
|
32
|
+
data : np.ndarray
|
|
33
|
+
Input data array.
|
|
34
|
+
case : str, optional
|
|
35
|
+
Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
|
|
36
|
+
'i' for estimating variance, 'j' for quantifying variance.
|
|
37
|
+
S : float, optional
|
|
38
|
+
Scaling parameter for ELDF. Default is 1.
|
|
39
|
+
z0_optimize : bool, optional
|
|
40
|
+
Whether to optimize z0 in ELDF. Default is True.
|
|
41
|
+
data_form : str, optional
|
|
42
|
+
Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
|
|
43
|
+
tolerance : float, optional
|
|
44
|
+
Tolerance for ELDF fitting. Default is 1e-6.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
--------
|
|
48
|
+
float
|
|
49
|
+
Gnostic variance of the data.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
--------
|
|
53
|
+
>>> import machinegnostics as mg
|
|
54
|
+
>>> import numpy as np
|
|
55
|
+
>>> data = np.array([1, 2, 3, 4, 5])
|
|
56
|
+
>>> mg.variance(data)
|
|
57
|
+
0.002685330177795109
|
|
58
|
+
"""
|
|
59
|
+
logger = get_logger('variance', level=logging.WARNING if not verbose else logging.INFO)
|
|
60
|
+
|
|
61
|
+
logger.info("Calculating gnostic variance...")
|
|
62
|
+
# Validate input
|
|
63
|
+
if not isinstance(data, np.ndarray):
|
|
64
|
+
logger.error("Input must be a numpy array.")
|
|
65
|
+
raise TypeError("Input must be a numpy array.")
|
|
66
|
+
if data.ndim != 1:
|
|
67
|
+
logger.error("Input data must be a one-dimensional array.")
|
|
68
|
+
raise ValueError("Input data must be a one-dimensional array.")
|
|
69
|
+
if len(data) == 0:
|
|
70
|
+
logger.error("Input data array is empty.")
|
|
71
|
+
raise ValueError("Input data array is empty.")
|
|
72
|
+
if np.any(np.isnan(data)):
|
|
73
|
+
logger.error("Input data contains NaN values.")
|
|
74
|
+
raise ValueError("Input data contains NaN values.")
|
|
75
|
+
if np.any(np.isinf(data)):
|
|
76
|
+
logger.error("Input data contains Inf values.")
|
|
77
|
+
raise ValueError("Input data contains Inf values.")
|
|
78
|
+
# Check for valid case
|
|
79
|
+
if case not in ['i', 'j']:
|
|
80
|
+
logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
81
|
+
raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
|
|
82
|
+
|
|
83
|
+
if case == 'i':
|
|
84
|
+
logger.info("Using ELDF for variance calculation...")
|
|
85
|
+
# Compute eldf
|
|
86
|
+
eldf = ELDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False, flush=False)
|
|
87
|
+
eldf.fit(data, plot=False)
|
|
88
|
+
hi = eldf.hi
|
|
89
|
+
hc = np.mean(hi**2)
|
|
90
|
+
|
|
91
|
+
if case == 'j':
|
|
92
|
+
logger.info("Using QLDF for variance calculation...")
|
|
93
|
+
# Compute qldf
|
|
94
|
+
qldf = QLDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False, flush=False)
|
|
95
|
+
qldf.fit(data)
|
|
96
|
+
hj = qldf.hj
|
|
97
|
+
hc = np.mean(hj**2)
|
|
98
|
+
|
|
99
|
+
logger.info(f"Gnostic variance calculated.")
|
|
100
|
+
|
|
101
|
+
return float(hc)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from machinegnostics.models.classification.mg_log_reg import LogisticRegressor
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from machinegnostics.models.classification.layer_param_log_reg import ParamLogisticRegressorBase
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
class HistoryRobustRegressor(ParamLogisticRegressorBase):
|
|
6
|
+
"""
|
|
7
|
+
History class for the Logistic Regressor model.
|
|
8
|
+
|
|
9
|
+
This class extends HistoryBase and ParamRobustRegressorBase to maintain a history
|
|
10
|
+
of model parameters and gnostic loss values during training iterations.
|
|
11
|
+
|
|
12
|
+
Parameters needed to record history:
|
|
13
|
+
- h_loss: Gnostic loss value at each iteration
|
|
14
|
+
- iteration: The iteration number
|
|
15
|
+
- weights: Model weights at each iteration
|
|
16
|
+
- coefficients: Model coefficients at each iteration
|
|
17
|
+
- degree: Degree of polynomial features used in the model
|
|
18
|
+
- rentropy: Entropy of the model at each iteration
|
|
19
|
+
- fi, hi, fj, hj, infoi, infoj, pi, pj, ei, ej: Additional gnostic information if calculated
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self,
|
|
23
|
+
degree: int = 1,
|
|
24
|
+
max_iter: int = 100,
|
|
25
|
+
tol: float = 1e-3,
|
|
26
|
+
early_stopping: bool = True,
|
|
27
|
+
verbose: bool = False,
|
|
28
|
+
scale: 'str | int | float' = 'auto',
|
|
29
|
+
data_form: str = 'a',
|
|
30
|
+
gnostic_characteristics:bool=True,
|
|
31
|
+
history: bool = True,
|
|
32
|
+
proba:str = 'gnostic'):
|
|
33
|
+
super().__init__(
|
|
34
|
+
degree=degree,
|
|
35
|
+
max_iter=max_iter,
|
|
36
|
+
tol=tol,
|
|
37
|
+
early_stopping=early_stopping,
|
|
38
|
+
verbose=verbose,
|
|
39
|
+
scale=scale,
|
|
40
|
+
data_form=data_form,
|
|
41
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
42
|
+
proba=proba
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
self.degree = degree
|
|
46
|
+
self.max_iter = max_iter
|
|
47
|
+
self.tol = tol
|
|
48
|
+
self.early_stopping = early_stopping
|
|
49
|
+
self.verbose = verbose
|
|
50
|
+
self.scale = scale
|
|
51
|
+
self.data_form = data_form
|
|
52
|
+
self.gnostic_characteristics = gnostic_characteristics
|
|
53
|
+
self.history = history
|
|
54
|
+
self.proba = proba
|
|
55
|
+
self.params = [
|
|
56
|
+
{
|
|
57
|
+
'iteration': 0,
|
|
58
|
+
'loss': None,
|
|
59
|
+
'weights': None,
|
|
60
|
+
'coefficients': None,
|
|
61
|
+
'degree': self.degree,
|
|
62
|
+
'rentropy': None,
|
|
63
|
+
'fi': None,
|
|
64
|
+
'hi': None,
|
|
65
|
+
'fj': None,
|
|
66
|
+
'hj': None,
|
|
67
|
+
'infoi': None,
|
|
68
|
+
'infoj': None,
|
|
69
|
+
'pi': None,
|
|
70
|
+
'pj': None,
|
|
71
|
+
'ei': None,
|
|
72
|
+
'ej': None
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# logger
|
|
77
|
+
self.logger.info("HistoryRobustRegressor initialized.")
|
|
78
|
+
|
|
79
|
+
def _fit(self, X: np.ndarray, y: np.ndarray):
|
|
80
|
+
"""
|
|
81
|
+
Fit the model to the data and record history.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
X : np.ndarray
|
|
86
|
+
Input features.
|
|
87
|
+
y : np.ndarray
|
|
88
|
+
Target values.
|
|
89
|
+
"""
|
|
90
|
+
self.logger.info("Starting fit process for HistoryRobustRegressor.")
|
|
91
|
+
# Call the parent fit method to perform fitting
|
|
92
|
+
super()._fit(X, y)
|
|
93
|
+
|
|
94
|
+
# Record the initial state in history as a dict
|
|
95
|
+
params_dict = {}
|
|
96
|
+
|
|
97
|
+
if self.gnostic_characteristics:
|
|
98
|
+
params_dict['iteration'] = self._iter + 1
|
|
99
|
+
params_dict['loss'] = self.loss
|
|
100
|
+
params_dict['weights'] = self.weights.copy() if self.weights is not None else None
|
|
101
|
+
params_dict['coefficients'] = self.coefficients.copy() if self.coefficients is not None else None
|
|
102
|
+
params_dict['degree'] = self.degree
|
|
103
|
+
params_dict['rentropy'] = self.re
|
|
104
|
+
params_dict['fi'] = self.fi
|
|
105
|
+
params_dict['hi'] = self.hi
|
|
106
|
+
params_dict['fj'] = self.fj
|
|
107
|
+
params_dict['hj'] = self.hj
|
|
108
|
+
params_dict['infoi'] = self.infoi
|
|
109
|
+
params_dict['infoj'] = self.infoj
|
|
110
|
+
params_dict['pi'] = self.pi
|
|
111
|
+
params_dict['pj'] = self.pj
|
|
112
|
+
params_dict['ei'] = self.ei
|
|
113
|
+
params_dict['ej'] = self.ej
|
|
114
|
+
else:
|
|
115
|
+
params_dict['iteration'] = 0
|
|
116
|
+
params_dict['loss'] = None
|
|
117
|
+
params_dict['weights'] = self.weights.copy() if self.weights is not None else None
|
|
118
|
+
params_dict['coefficients'] = self.coefficients .copy() if self.coefficients is not None else None
|
|
119
|
+
params_dict['degree'] = self.degree
|
|
120
|
+
|
|
121
|
+
self.params.append(params_dict)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from machinegnostics.magcal.layer_io_process_base import DataProcessLayerBase
|
|
3
|
+
from machinegnostics.models.classification.layer_mlflow_log_reg import InterfaceLogisticRegressor
|
|
4
|
+
from machinegnostics.magcal import disable_parent_docstring
|
|
5
|
+
|
|
6
|
+
@disable_parent_docstring
|
|
7
|
+
class DataProcessLogisticRegressor(DataProcessLayerBase, InterfaceLogisticRegressor):
|
|
8
|
+
"""
|
|
9
|
+
Data processing layer for the Robust Regressor model.
|
|
10
|
+
Handles data preprocessing specific to the Robust Regressor model.
|
|
11
|
+
"""
|
|
12
|
+
@disable_parent_docstring
|
|
13
|
+
def __init__(self,
|
|
14
|
+
degree: int = 1,
|
|
15
|
+
max_iter: int = 100,
|
|
16
|
+
tol: float = 1e-3,
|
|
17
|
+
early_stopping: bool = True,
|
|
18
|
+
verbose: bool = False,
|
|
19
|
+
scale: str | int | float = 'auto',
|
|
20
|
+
data_form: str = 'a',
|
|
21
|
+
gnostic_characteristics: bool = True,
|
|
22
|
+
history: bool = True,
|
|
23
|
+
proba: str = 'gnostic',
|
|
24
|
+
**kwargs):
|
|
25
|
+
super().__init__(
|
|
26
|
+
degree=degree,
|
|
27
|
+
max_iter=max_iter,
|
|
28
|
+
tol=tol,
|
|
29
|
+
early_stopping=early_stopping,
|
|
30
|
+
verbose=verbose,
|
|
31
|
+
scale=scale,
|
|
32
|
+
data_form=data_form,
|
|
33
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
34
|
+
history=history,
|
|
35
|
+
proba=proba,
|
|
36
|
+
**kwargs
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# logger
|
|
40
|
+
self.logger.info("DataProcessLogisticRegressor initialized.")
|
|
41
|
+
|
|
42
|
+
# --- argument checks ---
|
|
43
|
+
if not isinstance(degree, int) or degree < 1:
|
|
44
|
+
raise ValueError("Degree must be a positive integer.")
|
|
45
|
+
if not isinstance(max_iter, int) or max_iter < 1:
|
|
46
|
+
raise ValueError("max_iter must be a positive integer.")
|
|
47
|
+
if not isinstance(tol, (float, int)) or tol <= 0:
|
|
48
|
+
raise ValueError("tol must be a positive float or int.")
|
|
49
|
+
if not isinstance(scale, (str, int, float)):
|
|
50
|
+
raise ValueError("scale must be a string, int, or float.")
|
|
51
|
+
if isinstance(scale, (int, float)) and (scale < 0 or scale > 2):
|
|
52
|
+
raise ValueError("scale must be between 0 and 2 if it is a number.")
|
|
53
|
+
if data_form not in ['a', 'm']:
|
|
54
|
+
raise ValueError("data_form must be either 'a' (additive) or 'm' (multiplicative).")
|
|
55
|
+
if proba not in ['gnostic', 'sigmoid']:
|
|
56
|
+
raise ValueError("proba must be either 'gnostic' or 'sigmoid'.")
|
|
57
|
+
self.degree = degree
|
|
58
|
+
self.max_iter = max_iter
|
|
59
|
+
self.tol = tol
|
|
60
|
+
self.early_stopping = early_stopping
|
|
61
|
+
self.verbose = verbose
|
|
62
|
+
self.scale = scale
|
|
63
|
+
self.data_form = data_form
|
|
64
|
+
self.gnostic_characteristics = gnostic_characteristics
|
|
65
|
+
self.history = history
|
|
66
|
+
self.params = []
|
|
67
|
+
|
|
68
|
+
@disable_parent_docstring
|
|
69
|
+
def _fit(self, X, y):
|
|
70
|
+
"""
|
|
71
|
+
Fit the model to the data and preprocess it.
|
|
72
|
+
"""
|
|
73
|
+
self.logger.info("Starting fit process for DataProcessLogisticRegressor.")
|
|
74
|
+
X, y = self._fit_io(X, y)
|
|
75
|
+
# Call the fit method from the next class in the MRO
|
|
76
|
+
return super()._fit(X, y)
|
|
77
|
+
|
|
78
|
+
@disable_parent_docstring
|
|
79
|
+
def _predict(self, X) -> np.ndarray:
|
|
80
|
+
"""
|
|
81
|
+
Predict using the model after preprocessing the input data.
|
|
82
|
+
"""
|
|
83
|
+
self.logger.info("Making predictions with DataProcessLogisticRegressor.")
|
|
84
|
+
X = self._predict_io(X)
|
|
85
|
+
y_pred = super()._predict(X)
|
|
86
|
+
# y_pred = self._convert_output(y_pred, self.data_form)
|
|
87
|
+
return y_pred
|
|
88
|
+
|
|
89
|
+
@disable_parent_docstring
|
|
90
|
+
def _predict_proba(self, X) -> np.ndarray:
|
|
91
|
+
"""
|
|
92
|
+
Predict probabilities using the model after preprocessing the input data.
|
|
93
|
+
"""
|
|
94
|
+
self.logger.info("Calculating predicted probabilities with DataProcessLogisticRegressor.")
|
|
95
|
+
X = self._predict_io(X)
|
|
96
|
+
y_proba = super()._predict_proba(X)
|
|
97
|
+
# y_proba = self._convert_output(y_proba, self.data_form)
|
|
98
|
+
return y_proba
|