machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,119 @@
1
+ '''
2
+ ManGo - Machine Gnostics Library
3
+ Copyright (C) 2025 ManGo Team
4
+
5
+ Author: Nirmal Parmar
6
+ '''
7
+ import logging
8
+ from machinegnostics.magcal.util.logging import get_logger
9
+ import numpy as np
10
+ from machinegnostics.magcal.criteria_eval import CriteriaEvaluator
11
+
12
+ def robr2(y: np.ndarray, y_fit: np.ndarray, w: np.ndarray = None, verbose: bool = False) -> float:
13
+ """
14
+ Compute the Robust R-squared (RobR2) value for evaluating the goodness of fit between observed data and model predictions.
15
+
16
+ The Robust R-squared (RobR2) is a statistical metric that measures the proportion of variance in the observed data
17
+ explained by the fitted data, with robustness to outliers. Unlike the classical R-squared metric, RobR2 incorporates
18
+ weights and is less sensitive to outliers, making it suitable for datasets with noise or irregularities.
19
+
20
+ Parameters
21
+ ----------
22
+ y : np.ndarray
23
+ The observed data (ground truth). Must be a 1D array of numerical values.
24
+ y_fit : np.ndarray
25
+ The fitted data (model predictions). Must be a 1D array of the same shape as `y`.
26
+ w : np.ndarray, optional
27
+ Weights for the data points. Must be a 1D array of the same shape as `y` if provided. Defaults to `None`, in which
28
+ case equal weights are assumed.
29
+ verbose : bool, optional
30
+ If True, enables detailed logging for debugging purposes. Default is False.
31
+
32
+ Returns
33
+ -------
34
+ float
35
+ The computed Robust R-squared (RobR2) value. The value ranges from 0 to 1, where:
36
+ - 1 indicates a perfect fit.
37
+ - 0 indicates no explanatory power of the model.
38
+
39
+ Raises
40
+ ------
41
+ ValueError
42
+ If `y` and `y_fit` do not have the same shape.
43
+ ValueError
44
+ If `w` is provided and does not have the same shape as `y`.
45
+ ValueError
46
+ If `y` or `y_fit` are not 1D arrays.
47
+
48
+ Notes
49
+ -----
50
+ - The Robust R-squared (RobR2) is calculated using the formula:
51
+ RobR2 = 1 - (Σ(w_i * (e_i - ē)²) / Σ(w_i * (y_i - ȳ)²))
52
+ where:
53
+ - e_i = y_i - y_fit_i (residuals)
54
+ - ē = weighted mean of residuals
55
+ - ȳ = weighted mean of observed data
56
+ - w_i = weights for each data point
57
+ - This metric is robust to outliers due to the use of weights and is particularly useful for noisy datasets.
58
+ - If weights are not provided, equal weights are assumed for all data points.
59
+
60
+ References
61
+ ----------
62
+ - Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical Gnostics for Data Analysis, Chapter 19.3.4
63
+ - Robust R-squared (RobR2) is defined in Equation 19.7 of the reference.
64
+
65
+ Example
66
+ -------
67
+ >>> import numpy as np
68
+ >>> from mango.metrics import robr2
69
+ >>> y = np.array([1.0, 2.0, 3.0, 4.0])
70
+ >>> y_fit = np.array([1.1, 1.9, 3.2, 3.8])
71
+ >>> w = np.array([1.0, 1.0, 1.0, 1.0])
72
+ >>> result = robr2(y, y_fit, w)
73
+ >>> print(result)
74
+
75
+ Comparison with Classical R-squared
76
+ -----------------------------------
77
+ The classical R-squared metric assumes equal weights and is sensitive to outliers. RobR2, on the other hand,
78
+ incorporates weights and is robust to outliers, making it more reliable for datasets with irregularities or noise.
79
+ """
80
+ logger = get_logger('RobR2', level=logging.WARNING if not verbose else logging.INFO)
81
+ logger.info("Calculating Robust R-squared...")
82
+
83
+ # Check if y and y_fit are of the same shape
84
+ if y.shape != y_fit.shape:
85
+ logger.error("y and y_fit must have the same shape")
86
+ raise ValueError("y and y_fit must have the same shape")
87
+
88
+ # Check with w shape
89
+ if w is not None and y.shape != w.shape:
90
+ logger.error("y and w must have the same shape")
91
+ raise ValueError("y and w must have the same shape")
92
+
93
+ # 1D array check
94
+ if y.ndim != 1 or y_fit.ndim != 1:
95
+ logger.error("y and y_fit must be 1D arrays")
96
+ raise ValueError("y and y_fit must be 1D arrays")
97
+
98
+ # Convert to numpy arrays and flatten
99
+ y_true = np.asarray(y).flatten()
100
+ y_pred = np.asarray(y_fit).flatten()
101
+
102
+ # dimensions of y and y_fit validation
103
+ if y_true.ndim != 1 or y_pred.ndim != 1:
104
+ logger.error("y and y_fit must be 1D arrays")
105
+ raise ValueError("y and y_fit must be 1D arrays")
106
+
107
+ # inf and nan check
108
+ if np.any(np.isnan(y_true)) or np.any(np.isnan(y_pred)):
109
+ logger.error("y and y_fit must not contain NaN values")
110
+ raise ValueError("y and y_fit must not contain NaN values")
111
+
112
+ # CE
113
+ logger.info("Initializing CriteriaEvaluator...")
114
+ ce = CriteriaEvaluator(y=y, y_fit=y_fit, w=w, verbose=verbose)
115
+
116
+ # Compute the robust R-squared
117
+ robr2_value = ce._robr2()
118
+ logger.info(f"Gnostic Robust R-squared calculated.")
119
+ return robr2_value
@@ -0,0 +1,144 @@
1
+ '''
2
+ Gnostic standard deviation of given sample
3
+
4
+ method: std()
5
+
6
+ Authors: Nirmal Parmar
7
+ Machine Gnostics
8
+ '''
9
+
10
+ import logging
11
+ from machinegnostics.magcal.util.logging import get_logger
12
+ import numpy as np
13
+ from machinegnostics.metrics.mean import mean
14
+ from machinegnostics.metrics.variance import variance
15
+ from machinegnostics.magcal import EGDF
16
+
17
+ def std(data: np.ndarray,
18
+ case: str = 'i',
19
+ S: float = 'auto',
20
+ z0_optimize: bool = True,
21
+ data_form: str = 'a',
22
+ tolerance: float = 1e-6,
23
+ verbose: bool = False) -> tuple:
24
+ """
25
+ Calculate the standard deviation of the given data.
26
+
27
+ The Gnostic standard deviation metric is based on the principles of gnostic theory, which
28
+ provides robust estimates of data relationships. This metric leverages the concepts
29
+ of estimating irrelevances and fidelities, and quantifying irrelevances and fidelities, which are robust measures of data uncertainty. These irrelevances are aggregated differently.
30
+
31
+ Parameters:
32
+ -----------
33
+ data : np.ndarray
34
+ Input data array.
35
+ case : str, optional
36
+ Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
37
+ 'i' for estimating variance, 'j' for quantifying variance.
38
+ Scaling parameter for ELDF. Default is 1. Can be 'auto' to optimize using EGDF.
39
+ Suggested range is [0.01, 2].
40
+ z0_optimize : bool, optional
41
+ Whether to optimize z0 in ELDF. Default is True.
42
+ data_form : str, optional
43
+ Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
44
+ tolerance : float, optional
45
+ Tolerance for ELDF fitting. Default is 1e-6.
46
+ verbose : bool, optional
47
+ If True, enables detailed logging for debugging purposes. Default is False.
48
+
49
+ Returns:
50
+ --------
51
+ tuple
52
+ Lower and upper bounds of the standard deviation.
53
+
54
+ Example:
55
+ --------
56
+ >>> import machinegnostics as mg
57
+ >>> import numpy as np
58
+ >>> data = np.array([1, 2, 3, 4, 5])
59
+ >>> mg.std(data)
60
+ (2.9403976979154143, 3.0599336862362043)
61
+ """
62
+ logger = get_logger('std', level=logging.WARNING if not verbose else logging.INFO)
63
+ logger.info("Calculating standard deviation...")
64
+
65
+ # Validate input
66
+ if not isinstance(data, np.ndarray):
67
+ logger.error("Input must be a numpy array.")
68
+ raise TypeError("Input must be a numpy array.")
69
+ if data.ndim != 1:
70
+ logger.error("Input data must be a one-dimensional array.")
71
+ raise ValueError("Input data must be a one-dimensional array.")
72
+ if len(data) == 0:
73
+ logger.error("Input data array is empty.")
74
+ raise ValueError("Input data array is empty.")
75
+ if np.any(np.isnan(data)):
76
+ logger.error("Input data contains NaN values.")
77
+ raise ValueError("Input data contains NaN values.")
78
+ if np.any(np.isinf(data)):
79
+ logger.error("Input data contains Inf values.")
80
+ raise ValueError("Input data contains Inf values.")
81
+ # Check for valid case
82
+ if case not in ['i', 'j']:
83
+ logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
84
+ raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
85
+ # arg validation
86
+ if isinstance(S, str):
87
+ if S != 'auto':
88
+ logger.error("S must be a float or 'auto'.")
89
+ raise ValueError("S must be a float or 'auto'.")
90
+ elif not isinstance(S, (int, float)):
91
+ logger.error("S must be a float or 'auto'.")
92
+ raise TypeError("S must be a float or 'auto'.")
93
+ # S proper value [0,2] suggested
94
+ if isinstance(S, (int)):
95
+ if S < 0 or S > 2:
96
+ logger.warning("S must be in the range [0, 2].")
97
+ # Check for valid data_form
98
+ if data_form not in ['a', 'm']:
99
+ logger.error("data_form must be 'a' for additive or 'm' for multiplicative.")
100
+ raise ValueError("data_form must be 'a' for additive or 'm' for multiplicative.")
101
+
102
+ # mean
103
+ logger.info("Calculating mean...")
104
+ m = mean(data, S=S, z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance)
105
+
106
+ # variance
107
+ logger.info("Calculating variance...")
108
+ v = np.abs(variance(data, case=case, S=S, z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance))
109
+
110
+ # if is str
111
+ if isinstance(S, str):
112
+ if S == 'auto':
113
+ logger.info("Optimizing S using EGDF...")
114
+ egdf = EGDF(z0_optimize=z0_optimize, data_form=data_form, tolerance=tolerance, verbose=verbose)
115
+ egdf.fit(data=data, plot=False)
116
+ S = egdf.S_opt
117
+ # S value limits [0.01, 1e3]
118
+ S = np.clip(S, 0.01, 1e3)
119
+ # std
120
+ if case.lower() == 'i':
121
+ logger.info("Calculating standard deviation the estimating geometry...")
122
+ std_value_ub = m * ((1 + np.sqrt(v)) / ( 1 - np.sqrt(v)))**(S/2)
123
+ std_value_lb = m * ((1 - np.sqrt(v)) / ( 1 + np.sqrt(v)))**(S/2)
124
+ if 1 - np.sqrt(v) <= 0:
125
+ logger.warning("Encountered negative sqrt value!")
126
+ return 0, 0
127
+
128
+ elif case.lower() == 'j':
129
+ logger.info("Calculating standard deviation the quantifying geometry...")
130
+
131
+ std_value_ub = m * ((np.sqrt(v)) + ( 1 + np.sqrt(v)))**(S/2)
132
+ # safe v
133
+ if 1 - np.sqrt(v) < 0:
134
+ logger.warning("Encountered negative sqrt value, returning 0,0. Use case 'i' for estimating geometry.")
135
+ return 0, std_value_ub
136
+
137
+ std_value_lb = m * ((np.sqrt(v)) + ( 1 - np.sqrt(v)))**(S/2)
138
+
139
+ else:
140
+ raise ValueError("case must be either 'i' or 'j'. i for estimating variance, j for quantifying variance.")
141
+
142
+ logger.info("Gnostic standard deviation calculation completed.")
143
+
144
+ return float(std_value_lb), float(std_value_ub)
@@ -0,0 +1,101 @@
1
+ '''
2
+ Gnostic Variance of given sample data
3
+
4
+ method: variance()
5
+
6
+ Authors: Nirmal Parmar
7
+ Machine Gnostics
8
+ '''
9
+ import numpy as np
10
+ import logging
11
+ from machinegnostics.metrics.mean import mean
12
+ from machinegnostics.magcal import ELDF, QLDF
13
+ from machinegnostics.magcal.util.logging import get_logger
14
+
15
+ def variance(data: np.ndarray,
16
+ case: str = 'i',
17
+ S: float = 1,
18
+ z0_optimize: bool = True,
19
+ data_form: str = 'a',
20
+ tolerance: float = 1e-6,
21
+ verbose: bool = False) -> float:
22
+ """
23
+ Calculate the gnostic variance of the given data.
24
+
25
+ The Gnostic variance metric is based on the principles of gnostic theory, which
26
+ provides robust estimates of data relationships. This metric leverages the concepts
27
+ of estimating irrelevances and quantifying irrelevances, which are robust measures
28
+ of data uncertainty. These irrelevances are aggregated differently.
29
+
30
+ Parameters:
31
+ -----------
32
+ data : np.ndarray
33
+ Input data array.
34
+ case : str, optional
35
+ Case for irrelevance calculation ('i' or 'j'). Default is 'i'.
36
+ 'i' for estimating variance, 'j' for quantifying variance.
37
+ S : float, optional
38
+ Scaling parameter for ELDF. Default is 1.
39
+ z0_optimize : bool, optional
40
+ Whether to optimize z0 in ELDF. Default is True.
41
+ data_form : str, optional
42
+ Data form for ELDF. Default is 'a'. 'a' for additive, 'm' for multiplicative.
43
+ tolerance : float, optional
44
+ Tolerance for ELDF fitting. Default is 1e-6.
45
+
46
+ Returns:
47
+ --------
48
+ float
49
+ Gnostic variance of the data.
50
+
51
+ Example:
52
+ --------
53
+ >>> import machinegnostics as mg
54
+ >>> import numpy as np
55
+ >>> data = np.array([1, 2, 3, 4, 5])
56
+ >>> mg.variance(data)
57
+ 0.002685330177795109
58
+ """
59
+ logger = get_logger('variance', level=logging.WARNING if not verbose else logging.INFO)
60
+
61
+ logger.info("Calculating gnostic variance...")
62
+ # Validate input
63
+ if not isinstance(data, np.ndarray):
64
+ logger.error("Input must be a numpy array.")
65
+ raise TypeError("Input must be a numpy array.")
66
+ if data.ndim != 1:
67
+ logger.error("Input data must be a one-dimensional array.")
68
+ raise ValueError("Input data must be a one-dimensional array.")
69
+ if len(data) == 0:
70
+ logger.error("Input data array is empty.")
71
+ raise ValueError("Input data array is empty.")
72
+ if np.any(np.isnan(data)):
73
+ logger.error("Input data contains NaN values.")
74
+ raise ValueError("Input data contains NaN values.")
75
+ if np.any(np.isinf(data)):
76
+ logger.error("Input data contains Inf values.")
77
+ raise ValueError("Input data contains Inf values.")
78
+ # Check for valid case
79
+ if case not in ['i', 'j']:
80
+ logger.error("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
81
+ raise ValueError("Case must be 'i' for estimating variance or 'j' for quantifying variance.")
82
+
83
+ if case == 'i':
84
+ logger.info("Using ELDF for variance calculation...")
85
+ # Compute eldf
86
+ eldf = ELDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False, flush=False)
87
+ eldf.fit(data, plot=False)
88
+ hi = eldf.hi
89
+ hc = np.mean(hi**2)
90
+
91
+ if case == 'j':
92
+ logger.info("Using QLDF for variance calculation...")
93
+ # Compute qldf
94
+ qldf = QLDF(homogeneous=True, S=S, z0_optimize=z0_optimize, tolerance=tolerance, data_form=data_form, wedf=False, flush=False)
95
+ qldf.fit(data)
96
+ hj = qldf.hj
97
+ hc = np.mean(hj**2)
98
+
99
+ logger.info(f"Gnostic variance calculated.")
100
+
101
+ return float(hc)
@@ -0,0 +1,2 @@
1
+ from machinegnostics.models.cross_validation import CrossValidator
2
+ from machinegnostics.models.data_split import train_test_split
@@ -0,0 +1 @@
1
+ from machinegnostics.models.classification.mg_log_reg import LogisticRegressor
@@ -0,0 +1,121 @@
1
+ import numpy as np
2
+ from machinegnostics.models.classification.layer_param_log_reg import ParamLogisticRegressorBase
3
+ from dataclasses import dataclass
4
+
5
+ class HistoryRobustRegressor(ParamLogisticRegressorBase):
6
+ """
7
+ History class for the Logistic Regressor model.
8
+
9
+ This class extends HistoryBase and ParamRobustRegressorBase to maintain a history
10
+ of model parameters and gnostic loss values during training iterations.
11
+
12
+ Parameters needed to record history:
13
+ - h_loss: Gnostic loss value at each iteration
14
+ - iteration: The iteration number
15
+ - weights: Model weights at each iteration
16
+ - coefficients: Model coefficients at each iteration
17
+ - degree: Degree of polynomial features used in the model
18
+ - rentropy: Entropy of the model at each iteration
19
+ - fi, hi, fj, hj, infoi, infoj, pi, pj, ei, ej: Additional gnostic information if calculated
20
+ """
21
+
22
+ def __init__(self,
23
+ degree: int = 1,
24
+ max_iter: int = 100,
25
+ tol: float = 1e-3,
26
+ early_stopping: bool = True,
27
+ verbose: bool = False,
28
+ scale: 'str | int | float' = 'auto',
29
+ data_form: str = 'a',
30
+ gnostic_characteristics:bool=True,
31
+ history: bool = True,
32
+ proba:str = 'gnostic'):
33
+ super().__init__(
34
+ degree=degree,
35
+ max_iter=max_iter,
36
+ tol=tol,
37
+ early_stopping=early_stopping,
38
+ verbose=verbose,
39
+ scale=scale,
40
+ data_form=data_form,
41
+ gnostic_characteristics=gnostic_characteristics,
42
+ proba=proba
43
+ )
44
+
45
+ self.degree = degree
46
+ self.max_iter = max_iter
47
+ self.tol = tol
48
+ self.early_stopping = early_stopping
49
+ self.verbose = verbose
50
+ self.scale = scale
51
+ self.data_form = data_form
52
+ self.gnostic_characteristics = gnostic_characteristics
53
+ self.history = history
54
+ self.proba = proba
55
+ self.params = [
56
+ {
57
+ 'iteration': 0,
58
+ 'loss': None,
59
+ 'weights': None,
60
+ 'coefficients': None,
61
+ 'degree': self.degree,
62
+ 'rentropy': None,
63
+ 'fi': None,
64
+ 'hi': None,
65
+ 'fj': None,
66
+ 'hj': None,
67
+ 'infoi': None,
68
+ 'infoj': None,
69
+ 'pi': None,
70
+ 'pj': None,
71
+ 'ei': None,
72
+ 'ej': None
73
+ }
74
+ ]
75
+
76
+ # logger
77
+ self.logger.info("HistoryRobustRegressor initialized.")
78
+
79
+ def _fit(self, X: np.ndarray, y: np.ndarray):
80
+ """
81
+ Fit the model to the data and record history.
82
+
83
+ Parameters
84
+ ----------
85
+ X : np.ndarray
86
+ Input features.
87
+ y : np.ndarray
88
+ Target values.
89
+ """
90
+ self.logger.info("Starting fit process for HistoryRobustRegressor.")
91
+ # Call the parent fit method to perform fitting
92
+ super()._fit(X, y)
93
+
94
+ # Record the initial state in history as a dict
95
+ params_dict = {}
96
+
97
+ if self.gnostic_characteristics:
98
+ params_dict['iteration'] = self._iter + 1
99
+ params_dict['loss'] = self.loss
100
+ params_dict['weights'] = self.weights.copy() if self.weights is not None else None
101
+ params_dict['coefficients'] = self.coefficients.copy() if self.coefficients is not None else None
102
+ params_dict['degree'] = self.degree
103
+ params_dict['rentropy'] = self.re
104
+ params_dict['fi'] = self.fi
105
+ params_dict['hi'] = self.hi
106
+ params_dict['fj'] = self.fj
107
+ params_dict['hj'] = self.hj
108
+ params_dict['infoi'] = self.infoi
109
+ params_dict['infoj'] = self.infoj
110
+ params_dict['pi'] = self.pi
111
+ params_dict['pj'] = self.pj
112
+ params_dict['ei'] = self.ei
113
+ params_dict['ej'] = self.ej
114
+ else:
115
+ params_dict['iteration'] = 0
116
+ params_dict['loss'] = None
117
+ params_dict['weights'] = self.weights.copy() if self.weights is not None else None
118
+ params_dict['coefficients'] = self.coefficients .copy() if self.coefficients is not None else None
119
+ params_dict['degree'] = self.degree
120
+
121
+ self.params.append(params_dict)
@@ -0,0 +1,98 @@
1
+ import numpy as np
2
+ from machinegnostics.magcal.layer_io_process_base import DataProcessLayerBase
3
+ from machinegnostics.models.classification.layer_mlflow_log_reg import InterfaceLogisticRegressor
4
+ from machinegnostics.magcal import disable_parent_docstring
5
+
6
+ @disable_parent_docstring
7
+ class DataProcessLogisticRegressor(DataProcessLayerBase, InterfaceLogisticRegressor):
8
+ """
9
+ Data processing layer for the Robust Regressor model.
10
+ Handles data preprocessing specific to the Robust Regressor model.
11
+ """
12
+ @disable_parent_docstring
13
+ def __init__(self,
14
+ degree: int = 1,
15
+ max_iter: int = 100,
16
+ tol: float = 1e-3,
17
+ early_stopping: bool = True,
18
+ verbose: bool = False,
19
+ scale: str | int | float = 'auto',
20
+ data_form: str = 'a',
21
+ gnostic_characteristics: bool = True,
22
+ history: bool = True,
23
+ proba: str = 'gnostic',
24
+ **kwargs):
25
+ super().__init__(
26
+ degree=degree,
27
+ max_iter=max_iter,
28
+ tol=tol,
29
+ early_stopping=early_stopping,
30
+ verbose=verbose,
31
+ scale=scale,
32
+ data_form=data_form,
33
+ gnostic_characteristics=gnostic_characteristics,
34
+ history=history,
35
+ proba=proba,
36
+ **kwargs
37
+ )
38
+
39
+ # logger
40
+ self.logger.info("DataProcessLogisticRegressor initialized.")
41
+
42
+ # --- argument checks ---
43
+ if not isinstance(degree, int) or degree < 1:
44
+ raise ValueError("Degree must be a positive integer.")
45
+ if not isinstance(max_iter, int) or max_iter < 1:
46
+ raise ValueError("max_iter must be a positive integer.")
47
+ if not isinstance(tol, (float, int)) or tol <= 0:
48
+ raise ValueError("tol must be a positive float or int.")
49
+ if not isinstance(scale, (str, int, float)):
50
+ raise ValueError("scale must be a string, int, or float.")
51
+ if isinstance(scale, (int, float)) and (scale < 0 or scale > 2):
52
+ raise ValueError("scale must be between 0 and 2 if it is a number.")
53
+ if data_form not in ['a', 'm']:
54
+ raise ValueError("data_form must be either 'a' (additive) or 'm' (multiplicative).")
55
+ if proba not in ['gnostic', 'sigmoid']:
56
+ raise ValueError("proba must be either 'gnostic' or 'sigmoid'.")
57
+ self.degree = degree
58
+ self.max_iter = max_iter
59
+ self.tol = tol
60
+ self.early_stopping = early_stopping
61
+ self.verbose = verbose
62
+ self.scale = scale
63
+ self.data_form = data_form
64
+ self.gnostic_characteristics = gnostic_characteristics
65
+ self.history = history
66
+ self.params = []
67
+
68
+ @disable_parent_docstring
69
+ def _fit(self, X, y):
70
+ """
71
+ Fit the model to the data and preprocess it.
72
+ """
73
+ self.logger.info("Starting fit process for DataProcessLogisticRegressor.")
74
+ X, y = self._fit_io(X, y)
75
+ # Call the fit method from the next class in the MRO
76
+ return super()._fit(X, y)
77
+
78
+ @disable_parent_docstring
79
+ def _predict(self, X) -> np.ndarray:
80
+ """
81
+ Predict using the model after preprocessing the input data.
82
+ """
83
+ self.logger.info("Making predictions with DataProcessLogisticRegressor.")
84
+ X = self._predict_io(X)
85
+ y_pred = super()._predict(X)
86
+ # y_pred = self._convert_output(y_pred, self.data_form)
87
+ return y_pred
88
+
89
+ @disable_parent_docstring
90
+ def _predict_proba(self, X) -> np.ndarray:
91
+ """
92
+ Predict probabilities using the model after preprocessing the input data.
93
+ """
94
+ self.logger.info("Calculating predicted probabilities with DataProcessLogisticRegressor.")
95
+ X = self._predict_io(X)
96
+ y_proba = super()._predict_proba(X)
97
+ # y_proba = self._convert_output(y_proba, self.data_form)
98
+ return y_proba