machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,118 @@
1
+ import numpy as np
2
+ import logging
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+
5
+ class CrossValidator:
6
+ """
7
+ A custom implementation of k-Fold Cross-Validation for evaluating machine learning models.
8
+
9
+ Parameters
10
+ ----------
11
+ model : object
12
+ A machine learning model that implements `fit(X, y)` and `predict(X)` methods.
13
+
14
+ X : array-like of shape (n_samples, n_features)
15
+ Feature matrix.
16
+
17
+ y : array-like of shape (n_samples,)
18
+ Target labels.
19
+
20
+ k : int, default=5
21
+ Number of folds to use in cross-validation.
22
+
23
+ shuffle : bool, default=True
24
+ Whether to shuffle the dataset before splitting into folds.
25
+
26
+ random_seed : int or None, default=None
27
+ Seed used to shuffle the data. Ignored if `shuffle=False`.
28
+
29
+ verbose : bool, default=False
30
+ If True, enables detailed logging.
31
+
32
+ Attributes
33
+ ----------
34
+ folds : list of tuple
35
+ List of (train_indices, test_indices) for each fold.
36
+
37
+ Example
38
+ -------
39
+ >>> from machinegnostics.models import CrossValidator
40
+ >>> from machinegnostics.models import LinearRegressor
41
+ >>> from sklearn.metrics import mean_squared_error
42
+ >>> X = np.random.rand(100, 10)
43
+ >>> y = np.random.rand(100)
44
+ >>> model = LinearRegression()
45
+ >>> cv = CrossValidator(model, X, y, k=5, shuffle=True, random_seed=42)
46
+ >>> scores = cv.evaluate(mean_squared_error)
47
+ >>> print("Cross-Validation Scores:", scores)
48
+ >>> print("Mean Score:", np.mean(scores))
49
+ """
50
+
51
+ def __init__(self, model , X:np.ndarray, y:np.ndarray, k=5, shuffle=True, random_seed=None, verbose: bool = False):
52
+ self.model = model
53
+ self.X = np.array(X)
54
+ self.y = np.array(y)
55
+ self.k = k
56
+ self.shuffle = shuffle
57
+ self.random_seed = random_seed
58
+ self.verbose = verbose
59
+
60
+ self.logger = get_logger('CrossValidator', level=logging.WARNING if not verbose else logging.INFO)
61
+
62
+ def split(self):
63
+ """
64
+ Split the dataset into k folds.
65
+
66
+ Returns
67
+ -------
68
+ folds : list of tuple
69
+ A list of (train_indices, test_indices) for each fold.
70
+ """
71
+ self.logger.info("Starting k-Fold split...")
72
+ n_samples = len(self.X)
73
+ indices = np.arange(n_samples)
74
+
75
+ if self.shuffle:
76
+ rng = np.random.default_rng(self.random_seed)
77
+ rng.shuffle(indices)
78
+
79
+ fold_sizes = np.full(self.k, n_samples // self.k, dtype=int)
80
+ fold_sizes[:n_samples % self.k] += 1
81
+
82
+ current = 0
83
+ folds = []
84
+ for fold_size in fold_sizes:
85
+ start, stop = current, current + fold_size
86
+ test_idx = indices[start:stop]
87
+ train_idx = np.concatenate([indices[:start], indices[stop:]])
88
+ folds.append((train_idx, test_idx))
89
+ current = stop
90
+ self.logger.info("Completed k-Fold split.")
91
+ return folds
92
+
93
+ def evaluate(self, scoring_func):
94
+ """
95
+ Perform k-fold cross-validation and return the evaluation scores.
96
+
97
+ Parameters
98
+ ----------
99
+ scoring_func : callable
100
+ A function that takes `y_true` and `y_pred` and returns a numeric score (e.g., accuracy_score).
101
+
102
+ Returns
103
+ -------
104
+ scores : list of float
105
+ Evaluation scores for each fold.
106
+ """
107
+ self.logger.info("Starting cross-validation evaluation...")
108
+ scores = []
109
+ for train_idx, test_idx in self.split():
110
+ X_train, y_train = self.X[train_idx], self.y[train_idx]
111
+ X_test, y_test = self.X[test_idx], self.y[test_idx]
112
+
113
+ self.model.fit(X_train, y_train)
114
+ y_pred = self.model.predict(X_test)
115
+ score = scoring_func(y_test, y_pred)
116
+ scores.append(score)
117
+ self.logger.info("Completed cross-validation evaluation.")
118
+ return scores
@@ -0,0 +1,106 @@
1
+ import numpy as np
2
+ import logging
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+
5
+ def train_test_split(X:np.ndarray, y=None, test_size=0.25, shuffle=True, random_seed=None, verbose: bool = False):
6
+ """
7
+ Splits arrays or matrices into random train and test subsets.
8
+
9
+ Parameters
10
+ ----------
11
+ X : array-like (list, tuple, or np.ndarray)
12
+ Feature data to be split. Must be indexable and of consistent length.
13
+
14
+ y : array-like or None, optional (default=None)
15
+ Target data to be split alongside X. Must be same length as X.
16
+
17
+ test_size : float or int, optional (default=0.25)
18
+ If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split.
19
+ If int, represents the absolute number of test samples.
20
+
21
+ shuffle : bool, optional (default=True)
22
+ Whether or not to shuffle the data before splitting.
23
+
24
+ random_seed : int or None, optional (default=None)
25
+ Controls the shuffling applied to the data before splitting.
26
+
27
+ verbose : bool, optional (default=False)
28
+ If True, enables detailed logging.
29
+
30
+ Returns
31
+ -------
32
+ X_train, X_test : np.ndarray
33
+ Train-test split of X.
34
+
35
+ y_train, y_test : np.ndarray or None
36
+ Train-test split of y. If y is None, these will also be None.
37
+
38
+ Raises
39
+ ------
40
+ ValueError
41
+ If inputs are invalid or test_size is not appropriate.
42
+
43
+ Example
44
+ -------
45
+ >>> import numpy as np
46
+ >>> from machinegnostics.models import train_test_split
47
+ >>> X = np.arange(20).reshape(10, 2)
48
+ >>> y = np.arange(10)
49
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_seed=42)
50
+ >>> print("X_train:", X_train)
51
+ >>> print("X_test:", X_test)
52
+ >>> print("y_train:", y_train)
53
+ >>> print("y_test:", y_test)
54
+ """
55
+ logger = get_logger('train_test_split', level=logging.WARNING if not verbose else logging.INFO)
56
+ logger.info("Starting train_test_split...")
57
+
58
+ # Convert inputs to np arrays
59
+ X = np.asarray(X)
60
+ if y is not None:
61
+ y = np.asarray(y)
62
+
63
+ # Validate shapes
64
+ if y is not None and len(X) != len(y):
65
+ logger.error(f"X and y must have the same number of samples, got {len(X)} and {len(y)}.")
66
+ raise ValueError(f"X and y must have the same number of samples, got {len(X)} and {len(y)}.")
67
+
68
+ n_samples = len(X)
69
+
70
+ # Validate and compute test size
71
+ if isinstance(test_size, float):
72
+ if not 0.0 < test_size < 1.0:
73
+ logger.error("If test_size is a float, it must be between 0.0 and 1.0.")
74
+ raise ValueError("If test_size is a float, it must be between 0.0 and 1.0.")
75
+ n_test = int(np.ceil(test_size * n_samples))
76
+ elif isinstance(test_size, int):
77
+ if not 0 < test_size < n_samples:
78
+ logger.error("If test_size is an int, it must be between 1 and len(X) - 1.")
79
+ raise ValueError("If test_size is an int, it must be between 1 and len(X) - 1.")
80
+ n_test = test_size
81
+ else:
82
+ logger.error("test_size must be either a float or an int.")
83
+ raise TypeError("test_size must be either a float or an int.")
84
+
85
+ n_train = n_samples - n_test
86
+
87
+ # Create indices and shuffle
88
+ indices = np.arange(n_samples)
89
+ if shuffle:
90
+ rng = np.random.default_rng(seed=random_seed)
91
+ rng.shuffle(indices)
92
+
93
+ train_idx = indices[:n_train]
94
+ test_idx = indices[n_train:]
95
+
96
+ X_train = X[train_idx]
97
+ X_test = X[test_idx]
98
+
99
+ if y is not None:
100
+ y_train = y[train_idx]
101
+ y_test = y[test_idx]
102
+ else:
103
+ y_train = y_test = None
104
+ logger.info("Completed train_test_split.")
105
+
106
+ return X_train, X_test, y_train, y_test
@@ -0,0 +1,2 @@
1
+ from machinegnostics.models.regression.mg_poly_reg import PolynomialRegressor
2
+ from machinegnostics.models.regression.mg_lin_reg import LinearRegressor
@@ -0,0 +1,139 @@
1
+ import numpy as np
2
+ from machinegnostics.models.regression.layer_param_rob_reg import ParamRobustRegressorBase
3
+ from dataclasses import dataclass
4
+
5
+ @dataclass
6
+ class ParamRecord:
7
+ iteration: int
8
+ h_loss: float = None
9
+ weights: np.ndarray = None
10
+ coefficients: np.ndarray = None
11
+ degree: int = None
12
+ rentropy: float = None
13
+ fi: np.ndarray = None
14
+ hi: np.ndarray = None
15
+ fj: np.ndarray = None
16
+ hj: np.ndarray = None
17
+ infoi: dict = None
18
+ infoj: dict = None
19
+ pi: np.ndarray = None
20
+ pj: np.ndarray = None
21
+ ei: float = None
22
+ ej: float = None
23
+
24
+ class HistoryRobustRegressor(ParamRobustRegressorBase):
25
+ """
26
+ History class for the Robust Regressor model.
27
+
28
+ This class extends HistoryBase and ParamRobustRegressorBase to maintain a history
29
+ of model parameters and gnostic loss values during training iterations.
30
+
31
+ Parameters needed to record history:
32
+ - h_loss: Gnostic loss value at each iteration
33
+ - iteration: The iteration number
34
+ - weights: Model weights at each iteration
35
+ - coefficients: Model coefficients at each iteration
36
+ - degree: Degree of polynomial features used in the model
37
+ - rentropy: Entropy of the model at each iteration
38
+ - fi, hi, fj, hj, infoi, infoj, pi, pj, ei, ej: Additional gnostic information if calculated
39
+ """
40
+
41
+ def __init__(self,
42
+ degree: int = 1,
43
+ max_iter: int = 100,
44
+ tol: float = 1e-3,
45
+ mg_loss: str = 'hi',
46
+ early_stopping: bool = True,
47
+ verbose: bool = False,
48
+ scale: 'str | int | float' = 'auto',
49
+ data_form: str = 'a',
50
+ gnostic_characteristics:bool=True,
51
+ history: bool = True):
52
+ super().__init__(
53
+ degree=degree,
54
+ max_iter=max_iter,
55
+ tol=tol,
56
+ mg_loss=mg_loss,
57
+ early_stopping=early_stopping,
58
+ verbose=verbose,
59
+ scale=scale,
60
+ data_form=data_form,
61
+ gnostic_characteristics=gnostic_characteristics
62
+ )
63
+
64
+ self.degree = degree
65
+ self.max_iter = max_iter
66
+ self.tol = tol
67
+ self.mg_loss = mg_loss
68
+ self.early_stopping = early_stopping
69
+ self.verbose = verbose
70
+ self.scale = scale
71
+ self.data_form = data_form
72
+ self.gnostic_characteristics = gnostic_characteristics
73
+ self._history = history
74
+ self.params = [
75
+ {
76
+ 'iteration': 0,
77
+ 'loss': None,
78
+ 'weights': None,
79
+ 'coefficients': None,
80
+ 'degree': self.degree,
81
+ 'rentropy': None,
82
+ 'fi': None,
83
+ 'hi': None,
84
+ 'fj': None,
85
+ 'hj': None,
86
+ 'infoi': None,
87
+ 'infoj': None,
88
+ 'pi': None,
89
+ 'pj': None,
90
+ 'ei': None,
91
+ 'ej': None
92
+ }
93
+ ]
94
+
95
+ self.logger.info("HistoryRobustRegressor initialized.")
96
+
97
+ def _fit(self, X: np.ndarray, y: np.ndarray):
98
+ """
99
+ Fit the model to the data and record history.
100
+
101
+ Parameters
102
+ ----------
103
+ X : np.ndarray
104
+ Input features.
105
+ y : np.ndarray
106
+ Target values.
107
+ """
108
+ self.logger.info("Starting fit process for HistoryRobustRegressor.")
109
+ # Call the parent fit method to perform fitting
110
+ super()._fit(X, y)
111
+
112
+ # Record the initial state in history as a dict
113
+ params_dict = {}
114
+
115
+ if self.gnostic_characteristics:
116
+ params_dict['iteration'] = self._iter + 1
117
+ params_dict['loss'] = self.loss
118
+ params_dict['weights'] = self.weights.copy() if self.weights is not None else None
119
+ params_dict['coefficients'] = self.coefficients.copy() if self.coefficients is not None else None
120
+ params_dict['degree'] = self.degree
121
+ params_dict['rentropy'] = self.re
122
+ params_dict['fi'] = self.fi
123
+ params_dict['hi'] = self.hi
124
+ params_dict['fj'] = self.fj
125
+ params_dict['hj'] = self.hj
126
+ params_dict['infoi'] = self.infoi
127
+ params_dict['infoj'] = self.infoj
128
+ params_dict['pi'] = self.pi
129
+ params_dict['pj'] = self.pj
130
+ params_dict['ei'] = self.ei
131
+ params_dict['ej'] = self.ej
132
+ else:
133
+ params_dict['iteration'] = 0
134
+ params_dict['loss'] = None
135
+ params_dict['weights'] = self.weights.copy() if self.weights is not None else None
136
+ params_dict['coefficients'] = self.coefficients .copy() if self.coefficients is not None else None
137
+ params_dict['degree'] = self.degree
138
+
139
+ self.params.append(params_dict)
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ from machinegnostics.magcal import DataProcessLayerBase
3
+ from machinegnostics.models.regression.layer_mlflow_rob_reg import InterfaceRobustRegressor
4
+ from machinegnostics.magcal import disable_parent_docstring
5
+
6
+ @disable_parent_docstring
7
+ class DataProcessRobustRegressor(DataProcessLayerBase, InterfaceRobustRegressor):
8
+ """
9
+ Data processing layer for the Robust Regressor model.
10
+ Handles data preprocessing specific to the Robust Regressor model.
11
+ """
12
+ @disable_parent_docstring
13
+ def __init__(self,
14
+ degree: int = 1,
15
+ max_iter: int = 100,
16
+ tol: float = 1e-3,
17
+ mg_loss: str = 'hi',
18
+ early_stopping: bool = True,
19
+ verbose: bool = False,
20
+ scale: str | int | float = 'auto',
21
+ data_form: str = 'a',
22
+ gnostic_characteristics: bool = True,
23
+ history: bool = True,
24
+ **kwargs):
25
+ super().__init__(
26
+ degree=degree,
27
+ max_iter=max_iter,
28
+ tol=tol,
29
+ mg_loss=mg_loss,
30
+ early_stopping=early_stopping,
31
+ verbose=verbose,
32
+ scale=scale,
33
+ data_form=data_form,
34
+ gnostic_characteristics=gnostic_characteristics,
35
+ history=history,
36
+ **kwargs
37
+ )
38
+
39
+ # --- argument checks ---
40
+ if not isinstance(degree, int) or degree < 1:
41
+ raise ValueError("Degree must be a positive integer.")
42
+ if not isinstance(max_iter, int) or max_iter < 1:
43
+ raise ValueError("max_iter must be a positive integer.")
44
+ if not isinstance(tol, (float, int)) or tol <= 0:
45
+ raise ValueError("tol must be a positive float or int.")
46
+ if mg_loss not in ['hi', 'hj']:
47
+ raise ValueError("mg_loss must be either 'hi' or 'hj'.")
48
+ if not isinstance(scale, (str, int, float)):
49
+ raise ValueError("scale must be a string, int, or float.")
50
+ if isinstance(scale, (int, float)) and (scale < 0 or scale > 2):
51
+ raise ValueError("scale must be between 0 and 2 if it is a number.")
52
+ if data_form not in ['a', 'm']:
53
+ raise ValueError("data_form must be either 'a' (additive) or 'm' (multiplicative).")
54
+ self.degree = degree
55
+ self.max_iter = max_iter
56
+ self.tol = tol
57
+ self.mg_loss = mg_loss
58
+ self.early_stopping = early_stopping
59
+ self.verbose = verbose
60
+ self.scale = scale
61
+ self.data_form = data_form
62
+ self.gnostic_characteristics = gnostic_characteristics
63
+ self._history = history
64
+ self.params = []
65
+
66
+ # logger
67
+ self.logger.info("DataProcessRobustRegressor initialized.")
68
+
69
+ @disable_parent_docstring
70
+ def _fit(self, X: np.ndarray, y: np.ndarray):
71
+ """
72
+ Fit the model to the data and preprocess it.
73
+ """
74
+ self.logger.info("Starting fit process for DataProcessRobustRegressor.")
75
+ X, y = self._fit_io(X, y)
76
+ # Call the fit method from the next class in the MRO
77
+ return super()._fit(X, y)
78
+
79
+ @disable_parent_docstring
80
+ def _predict(self, X: np.ndarray) -> np.ndarray:
81
+ """
82
+ Predict using the model after preprocessing the input data.
83
+ """
84
+ self.logger.info("Making predictions with DataProcessRobustRegressor.")
85
+ X = self._predict_io(X)
86
+ y_pred = super()._predict(X)
87
+ # y_pred = self._convert_output(y_pred, self.data_form)
88
+ return y_pred
@@ -0,0 +1,134 @@
1
+ import numpy as np
2
+ from machinegnostics.models.regression.layer_histroy_rob_reg import HistoryRobustRegressor
3
+ import mlflow
4
+ import os
5
+ import joblib
6
+
7
+ class InterfaceRobustRegressor(HistoryRobustRegressor, mlflow.pyfunc.PythonModel):
8
+ """
9
+ Interface for the Robust Regressor model with MLflow integration.
10
+
11
+ This class extends HistoryRobustRegressor to provide an interface for
12
+ logging and tracking model parameters and performance metrics using MLflow.
13
+
14
+ Parameters needed for MLflow tracking:
15
+ - experiment_name: Name of the MLflow experiment
16
+ - run_name: Name of the MLflow run
17
+ """
18
+
19
+ def __init__(self,
20
+ degree: int = 1,
21
+ max_iter: int = 100,
22
+ tol: float = 1e-8,
23
+ mg_loss: str = 'hi',
24
+ early_stopping: bool = True,
25
+ verbose: bool = False,
26
+ scale: str | int | float = 'auto',
27
+ data_form: str = 'a',
28
+ gnostic_characteristics: bool = True,
29
+ history: bool = True):
30
+ super().__init__(
31
+ degree=degree,
32
+ max_iter=max_iter,
33
+ tol=tol,
34
+ mg_loss=mg_loss,
35
+ early_stopping=early_stopping,
36
+ verbose=verbose,
37
+ scale=scale,
38
+ data_form=data_form,
39
+ gnostic_characteristics=gnostic_characteristics,
40
+ history=history
41
+ )
42
+ self.coefficients = None
43
+ self.weights = None
44
+ self.degree = degree
45
+ self.max_iter = max_iter
46
+ self.tol = tol
47
+ self.mg_loss = mg_loss
48
+ self.early_stopping = early_stopping
49
+ self.verbose = verbose
50
+ self.scale = scale
51
+ self.data_form = data_form
52
+ self.gnostic_characteristics = gnostic_characteristics
53
+ self._history = history
54
+ self.params = []
55
+
56
+ # logger
57
+ self.logger.info("InterfaceRobustRegressor initialized.")
58
+
59
+ def _fit(self, X: np.ndarray, y: np.ndarray):
60
+ """
61
+ Fit the model to the data and log parameters to MLflow.
62
+
63
+ Parameters
64
+ ----------
65
+ X : np.ndarray
66
+ Input features.
67
+ y : np.ndarray
68
+ Target values.
69
+ """
70
+ # Call the fit method from HistoryRobustRegressor
71
+ self.logger.info("Starting fit process for InterfaceRobustRegressor. Logging to MLflow available.")
72
+ super()._fit(X, y)
73
+ return self
74
+
75
+ def _predict(self, model_input) -> np.ndarray:
76
+ """
77
+ Predict class labels for input data and log predictions to MLflow.
78
+
79
+ Accepts numpy arrays, pandas DataFrames, or pyspark DataFrames.
80
+
81
+ Parameters
82
+ ----------
83
+ model_input : np.ndarray, pd.DataFrame, pyspark.sql.DataFrame
84
+ Input data for prediction.
85
+
86
+ Returns
87
+ -------
88
+ np.ndarray
89
+ Predicted class labels.
90
+ """
91
+ self.logger.info("Making predictions with InterfaceRobustRegressor.")
92
+ predictions = super()._predict(model_input)
93
+ return predictions
94
+
95
+ def save_model(self, path:str):
96
+ """
97
+ Save the trained model to disk using joblib.
98
+
99
+ Parameters
100
+ ----------
101
+ path : str
102
+ Directory path where the model will be saved.
103
+ If the directory does not exist, it will be created.
104
+ If the model is already saved, it will be overwritten.
105
+ This method saves the model in a directory with a file named "model.pkl".
106
+ """
107
+ self.logger.info(f"Saving model to {path}.")
108
+ os.makedirs(path, exist_ok=True)
109
+ joblib.dump(self, os.path.join(path, "model.pkl"))
110
+
111
+ @classmethod
112
+ def load_model(cls, path:str):
113
+ """
114
+ Load a trained model from disk using joblib.
115
+
116
+ Parameters
117
+ ----------
118
+ path : str
119
+ Directory path where the model is saved.
120
+ This method loads the model from a file named "model.pkl" in the specified directory.
121
+ Returns
122
+ -------
123
+ MlflowInterfaceRobustRegressor
124
+ An instance of the model loaded from the specified path.
125
+ """
126
+ return joblib.load(os.path.join(path, "model.pkl"))
127
+
128
+ def save_model(self, path):
129
+ """
130
+ Save the trained model to disk using joblib.
131
+ """
132
+ self.logger.info(f"Saving model to {path}.")
133
+ os.makedirs(path, exist_ok=True)
134
+ joblib.dump(self, os.path.join(path, "model.pkl"))