machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,107 @@
1
+ '''
2
+ ManGo - Machine Gnostics Library
3
+ Copyright (C) 2025 Machine Gnostics Team
4
+
5
+ This work is licensed under the terms of the GNU General Public License version 3.0.
6
+
7
+ Author: Nirmal Parmar
8
+ Date: 2025-10-01
9
+ Description: Machine Gnostics logic for robust regression model and wrapping it with mlflow
10
+ '''
11
+
12
+ import os
13
+ import joblib
14
+ import mlflow
15
+ import numpy as np
16
+ from machinegnostics.models.classification.layer_history_log_reg import HistoryRobustRegressor
17
+
18
+ class InterfaceLogisticRegressor(HistoryRobustRegressor, mlflow.pyfunc.PythonModel):
19
+ """
20
+ _LogisticRegressor: MLflow-wrapped Gnostic Logistic Regression
21
+
22
+ Developer Notes:
23
+ ----------------
24
+ - Inherits from _LogisticRegressorParamBase for core logic and mlflow.pyfunc.PythonModel for MLflow integration.
25
+ - Supports saving/loading via joblib for reproducibility and deployment.
26
+ - Handles numpy arrays, pandas DataFrames, and pyspark DataFrames for prediction.
27
+ - Use fit(X, y) for training and predict(X) or predict_proba(X) for inference.
28
+ - Use save_model(path) and load_model(path) for model persistence.
29
+ """
30
+ def __init__(self,
31
+ degree: int = 1,
32
+ max_iter: int = 100,
33
+ tol: float = 1e-3,
34
+ early_stopping: bool = True,
35
+ verbose: bool = False,
36
+ scale: 'str | int | float' = 'auto',
37
+ data_form: str = 'a',
38
+ gnostic_characteristics:bool=True,
39
+ history: bool = True,
40
+ proba:str = 'gnostic'):
41
+ super().__init__(
42
+ degree=degree,
43
+ max_iter=max_iter,
44
+ tol=tol,
45
+ early_stopping=early_stopping,
46
+ verbose=verbose,
47
+ scale=scale,
48
+ data_form=data_form,
49
+ gnostic_characteristics=gnostic_characteristics,
50
+ proba=proba
51
+ )
52
+ self.degree = degree
53
+ self.max_iter = max_iter
54
+ self.tol = tol
55
+ self.early_stopping = early_stopping
56
+ self.verbose = verbose
57
+ self.scale = scale
58
+ self.data_form = data_form
59
+ self.gnostic_characteristics = gnostic_characteristics
60
+ self.history = history
61
+ self.proba = proba
62
+
63
+ # logger
64
+ self.logger.info("InterfaceLogisticRegressor initialized.")
65
+
66
+
67
+ def _fit(self, X, y):
68
+ """
69
+ Fit the logistic regression model using the parent class logic.
70
+ """
71
+ self.logger.info("Starting fit process for InterfaceLogisticRegressor.")
72
+ super()._fit(X, y)
73
+
74
+ self.coefficients = self.coefficients
75
+ self.weights = self.weights
76
+ return self
77
+
78
+ def _predict(self, model_input) -> np.ndarray:
79
+ """
80
+ Predict class labels for input data.
81
+ Accepts numpy arrays, pandas DataFrames, or pyspark DataFrames.
82
+ """
83
+ self.logger.info("Making predictions with InterfaceLogisticRegressor.")
84
+ return super()._predict(model_input)
85
+
86
+ def _predict_proba(self, model_input) -> np.ndarray:
87
+ """
88
+ Predict probabilities for input data.
89
+ Accepts numpy arrays, pandas DataFrames, or pyspark DataFrames.
90
+ """
91
+ self.logger.info("Calculating predicted probabilities with InterfaceLogisticRegressor.")
92
+ return super()._predict_proba(model_input)
93
+
94
+ def save_model(self, path):
95
+ """
96
+ Save the trained model to disk using joblib.
97
+ """
98
+ self.logger.info(f"Saving model to {path}.")
99
+ os.makedirs(path, exist_ok=True)
100
+ joblib.dump(self, os.path.join(path, "model.pkl"))
101
+
102
+ @classmethod
103
+ def load_model(cls, path):
104
+ """
105
+ Load a trained model from disk using joblib.
106
+ """
107
+ return joblib.load(os.path.join(path, "model.pkl"))
@@ -0,0 +1,275 @@
1
+ '''
2
+ Machine Gnostics - Machine Gnostics Library
3
+ Copyright (C) 2025 Machine Gnostics Team
4
+
5
+ This work is licensed under the terms of the GNU General Public License version 3.0.
6
+
7
+ Author: Nirmal Parmar
8
+ Date: 2025-05-31
9
+
10
+ Description:
11
+ Regressor param base class that can be used for robust classification models.
12
+ - logical regression
13
+
14
+ '''
15
+ import numpy as np
16
+ from machinegnostics.magcal import (ScaleParam,
17
+ GnosticsWeights,
18
+ ParamBase)
19
+ from machinegnostics.magcal.util.min_max_float import np_max_float, np_min_float
20
+
21
+ class ParamLogisticRegressorBase(ParamBase):
22
+ """
23
+ Parameters for the Logistic Regressor model.
24
+
25
+ Attributes
26
+ ----------
27
+ scale_param : ScaleParam
28
+ Scaling parameters for the model.
29
+ gnostics_weights : GnosticsWeights
30
+ Weights for the model.
31
+ """
32
+
33
+ def __init__(self,
34
+ degree: int = 1,
35
+ max_iter: int = 100,
36
+ tol: float = 1e-3,
37
+ early_stopping: bool = True,
38
+ verbose: bool = False,
39
+ scale: 'str | int | float' = 'auto',
40
+ data_form: str = 'a',
41
+ gnostic_characteristics:bool=True,
42
+ history: bool = True,
43
+ proba: str = 'gnostic'):
44
+ super().__init__(
45
+ degree=degree,
46
+ max_iter=max_iter,
47
+ tol=tol,
48
+ early_stopping=early_stopping,
49
+ verbose=verbose,
50
+ scale=scale,
51
+ data_form=data_form,
52
+ gnostic_characteristics=gnostic_characteristics,
53
+ proba=proba
54
+ )
55
+ self.degree = degree
56
+ self.max_iter = max_iter
57
+ self.tol = tol
58
+ self.early_stopping = early_stopping
59
+ self.verbose = verbose
60
+ self.scale = scale
61
+ self.data_form = data_form
62
+ self.gnostic_characteristics = gnostic_characteristics
63
+ self.proba = proba
64
+ self.mg_loss = 'hi'
65
+ # history option
66
+ if history:
67
+ self._history = []
68
+ # default history content
69
+ self._history.append({
70
+ 'iteration': 0,
71
+ 'log_loss': None,
72
+ 'coefficients': None,
73
+ 'rentropy': None,
74
+ 'weights': None,
75
+ })
76
+ else:
77
+ self._history = None
78
+
79
+ # logger
80
+ self.logger.info("ParamLogisticRegressorBase initialized.")
81
+
82
+ def _fit(self, X: np.ndarray, y: np.ndarray):
83
+ """
84
+ Fit the model to the data.
85
+
86
+ Parameters
87
+ ----------
88
+ X : np.ndarray
89
+ Input features.
90
+ y : np.ndarray
91
+ Target values.
92
+ """
93
+ self.logger.info("Starting fit process for Logistic Regressor.")
94
+ # Generate polynomial features
95
+ X_poly = self._generate_polynomial_features(X)
96
+
97
+ n_samples, n_features = X_poly.shape
98
+
99
+ # Initialize weights
100
+ self.weights = np.ones(n_samples)
101
+
102
+ # Initialize coefficients to zeros
103
+ self.coefficients = np.zeros(n_features)
104
+
105
+ for self._iter in range(self.max_iter):
106
+ self._iter += 1
107
+ self._prev_coef = self.coefficients.copy()
108
+
109
+ try:
110
+ # # Weighted least squares
111
+ # self.coefficients = self._weighted_least_squares(X_poly, y, self.weights)
112
+
113
+ # Update weights using gnostic approach
114
+ y0 = X_poly @ self.coefficients
115
+ residuals = y0 - y
116
+
117
+ # mg data conversion
118
+ z = self._data_conversion(residuals)
119
+ z_y = self._data_conversion(y)
120
+ z_y0 = self._data_conversion(y0)
121
+
122
+ # gnostic weights
123
+ gw = GnosticsWeights()
124
+ gw = gw._get_gnostic_weights(z)
125
+ new_weights = self.weights * gw
126
+ W = np.diag(new_weights)
127
+
128
+ # Compute scale and loss
129
+ if self.scale == 'auto':
130
+ scale = ScaleParam()
131
+ zz = z_y0 - z_y
132
+ # avoid division by zero
133
+ zz = np.where(zz == 0, np_min_float(), zz) # Replace zero with a very small value
134
+ # local scale
135
+ s = scale._gscale_loc((2 / (zz + 1/zz)))
136
+ else:
137
+ s = self.scale
138
+
139
+ # gnostic probabilities
140
+ if self.proba == 'gnostic':
141
+ # Gnostic probability calculation
142
+ p, info, re = self._gnostic_prob(z=z) # NOTE currently using p from local S, means ELDF. this can be improved in the future
143
+ elif self.proba == 'sigmoid':
144
+ # Sigmoid probability calculation
145
+ p = self._sigmoid(y0)
146
+ _, info, re = self._gnostic_prob(z=z)
147
+
148
+ # self.coefficients = self._wighted_least_squares_log_reg(p,
149
+ # y0,
150
+ # X_poly,
151
+ # y,
152
+ # W=W,
153
+ # n_features=n_features,
154
+ # )
155
+ # IRLS update
156
+ try:
157
+ XtW = X_poly.T @ W
158
+ XtWX = XtW @ X_poly + 1e-8 * np.eye(n_features)
159
+ XtWy = XtW @ (y0 + (y - p) / (p * (1 - p) + 1e-8))
160
+ self.coefficients = np.linalg.solve(XtWX, XtWy)
161
+ except np.linalg.LinAlgError:
162
+ self.coefficients = np.linalg.pinv(XtWX) @ XtWy
163
+
164
+ # --- Log loss calculation ---
165
+ proba_pred = np.clip(p, 1e-8, 1-1e-8)
166
+ self.log_loss = -np.mean(y * np.log(proba_pred) + (1 - y) * np.log(1 - proba_pred))
167
+
168
+ # history update for gnostic vs sigmoid
169
+ re = np.mean(re)
170
+ info = np.mean(info)
171
+
172
+ if self.gnostic_characteristics:
173
+ self.loss, self.re, self.hi, self.hj, self.fi, self.fj, \
174
+ self.pi, self.pj, self.ei, self.ej, self.infoi, self.infoj = self._gnostic_criterion(z=z_y0, z0=z_y, s=s)
175
+
176
+ # self.weights = new_weights / np.sum(new_weights) # NOTE : Normalizing weights
177
+
178
+
179
+ # capture history and append to history
180
+ # minimal history capture
181
+ if self._history is not None:
182
+ self._history.append({
183
+ 'iteration': self._iter,
184
+ 'log_loss': self.log_loss,
185
+ 'coefficients': self.coefficients.copy(),
186
+ 'rentropy': re,
187
+ 'weights': self.weights.copy(),
188
+ })
189
+
190
+ # Check convergence with early stopping and rentropy
191
+ # if entropy value is increasing, stop
192
+
193
+ # --- Unified convergence check: stop if mean rentropy or log_loss change is within tolerance ---
194
+ if self._iter > 0 and self.early_stopping:
195
+ prev_hist = self._history[-2] if len(self._history) > 1 else None
196
+ curr_re = np.mean(re)
197
+ curr_log_loss = self.log_loss
198
+ prev_re_val = np.mean(prev_hist['rentropy']) if prev_hist and prev_hist['rentropy'] is not None else None
199
+ prev_log_loss_val = prev_hist['log_loss'] if prev_hist and prev_hist['log_loss'] is not None else None
200
+
201
+ re_converged = prev_re_val is not None and np.abs(curr_re - prev_re_val) < self.tol
202
+ log_loss_converged = prev_log_loss_val is not None and np.abs(curr_log_loss - prev_log_loss_val) < self.tol
203
+
204
+ if re_converged or log_loss_converged:
205
+ if self.verbose:
206
+ self.logger.info(f"Converged at iteration {self._iter} (early stop):")
207
+ if re_converged:
208
+ self.logger.info(f"mean rentropy change below tolerance (rentropy={np.abs(curr_re - prev_re_val):.6e}).")
209
+ if log_loss_converged:
210
+ self.logger.info(f"log_loss change below tolerance (log_loss={np.abs(curr_log_loss - prev_log_loss_val):.6e}).")
211
+ break
212
+ if self.verbose:
213
+ self.logger.info(f"Iteration {self._iter}, Log Loss: {self.log_loss:.6f}, mean residual entropy: {np.mean(re):.6f}")
214
+
215
+ except (ZeroDivisionError, np.linalg.LinAlgError) as e:
216
+ # Handle exceptions during fitting
217
+ self.coefficients = self._prev_coef
218
+ self.weights = self.weights.copy()
219
+ if self.verbose:
220
+ self.logger.error(f"Error during fitting at iteration {self._iter}: {e}")
221
+ break
222
+
223
+ def _predict(self, X: np.ndarray, threshold=0.5) -> np.ndarray:
224
+ """
225
+ Predict class labels for the input data.
226
+
227
+ Parameters
228
+ ----------
229
+ X : array-like of shape (n_samples, n_features)
230
+ Input features to predict class labels for.
231
+ threshold : float, optional (default=0.5)
232
+ Threshold for classifying probabilities into binary classes.
233
+
234
+ Returns
235
+ -------
236
+ ndarray of shape (n_samples,)
237
+ Predicted class labels (0 or 1).
238
+ """
239
+ self.logger.info("Making predictions with Logistic Regressor.")
240
+ proba = self._predict_proba(X)
241
+ return (proba >= threshold).astype(int)
242
+
243
+ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
244
+ """
245
+ Predict probabilities for the input data.
246
+
247
+ Parameters
248
+ ----------
249
+ X : array-like of shape (n_samples, n_features)
250
+ Input features to predict probabilities for.
251
+
252
+ Returns
253
+ -------
254
+ ndarray of shape (n_samples,)
255
+ Predicted probabilities.
256
+ """
257
+ self.logger.info("Calculating predicted probabilities with Logistic Regressor.")
258
+ if self.coefficients is None:
259
+ raise ValueError("Model is not fitted yet. Call 'fit' before 'predict_proba'.")
260
+
261
+ X_poly = self._generate_polynomial_features(X)
262
+ linear_pred = X_poly @ self.coefficients
263
+
264
+ # gnostic vs sigmoid probability calculation
265
+ if self.proba == 'gnostic':
266
+ # Gnostic probability calculation
267
+ proba, info, re = self._gnostic_prob(-linear_pred)
268
+ elif self.proba == 'sigmoid':
269
+ # Sigmoid probability calculation
270
+ proba = self._sigmoid(linear_pred)
271
+ else:
272
+ self.logger.error("Invalid probability method. Must be 'gnostic' or 'sigmoid'.")
273
+ raise ValueError("Invalid probability method. Must be 'gnostic' or 'sigmoid'.")
274
+
275
+ return proba
@@ -0,0 +1,273 @@
1
+ '''
2
+ Machine Gnostics - Machine Gnostics Library
3
+ Copyright (C) 2025 Machine Gnostics Team
4
+
5
+ This work is licensed under the terms of the GNU General Public License version 3.0.
6
+
7
+ Author: Nirmal Parmar
8
+
9
+ Description:
10
+ This module implements a logistic regression model using mathematical gnostics principles.
11
+ '''
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ from machinegnostics.models.classification.layer_io_process_log_reg import DataProcessLogisticRegressor
16
+ from machinegnostics.metrics import f1_score
17
+ from machinegnostics.magcal import disable_parent_docstring
18
+ from typing import Union
19
+
20
+ class LogisticRegressor(DataProcessLogisticRegressor):
21
+ """
22
+ LogisticRegressor implements a logistic regression model based on Mathematical Gnostics principles.
23
+
24
+ This class prepared with Machine Gnostic framework, feature-rich logistic regression
25
+ implementation. It supports polynomial feature expansion, custom loss functions, early stopping,
26
+ gnostic-based probability estimation, and detailed training history tracking.
27
+
28
+ Key Features:
29
+ - Polynomial feature expansion up to a user-specified degree.
30
+ - Choice of probability estimation method: 'gnostic' (default) or standard 'sigmoid'.
31
+ - Calculation of gnostic characteristics for advanced model diagnostics.
32
+ - Early stopping based on convergence of loss or entropy.
33
+ - Verbose logging for monitoring training progress.
34
+ - Optional scaling and data processing modes.
35
+ - Maintains a history of model parameters and losses for analysis.
36
+
37
+ Parameters
38
+ ----------
39
+ degree : int, default=1
40
+ Degree of polynomial features to use for input expansion.
41
+ max_iter : int, default=100
42
+ Maximum number of iterations for the optimization algorithm.
43
+ tol : float, default=1e-3
44
+ Tolerance for convergence. Training stops if the change in loss or entropy is below this value.
45
+ mg_loss : str, default='hi'
46
+ Type of gnostic loss to use (e.g., 'hi', 'hj', etc.).
47
+ early_stopping : bool, default=True
48
+ Whether to stop training early if convergence is detected.
49
+ verbose : bool, default=False
50
+ If True, prints detailed logs during training.
51
+ scale : str | int | float, default='auto'
52
+ Scaling method for input features. Can be a string identifier or a numeric value.
53
+ data_form : str, default='a'
54
+ Data processing form: 'a' for additive, 'm' for multiplicative.
55
+ gnostic_characteristics : bool, default=True
56
+ If True, calculates and stores gnostic characteristics during training.
57
+ history : bool, default=True
58
+ If True, maintains a history of model parameters and losses.
59
+ proba : str, default='gnostic'
60
+ Probability estimation method: 'gnostic' for gnostic-based, 'sigmoid' for standard logistic regression.
61
+
62
+ Attributes
63
+ ----------
64
+ coefficients : np.ndarray
65
+ Fitted model coefficients after training.
66
+ weights : np.ndarray
67
+ Sample weights used during training.
68
+ _history : list
69
+ List of dictionaries containing training history (loss, coefficients, entropy, etc.).
70
+ params : list
71
+ List of model parameters (for compatibility and inspection).
72
+
73
+ Methods
74
+ -------
75
+ fit(X, y)
76
+ Fit the logistic regression model to the data.
77
+ predict(model_input)
78
+ Predict class labels for new data.
79
+ predict_proba(model_input)
80
+ Predict class probabilities for new data.
81
+ score(X, y)
82
+ Compute the F1 score of the model on given data.
83
+
84
+ Examples
85
+ --------
86
+ >>> from machinegnostics.models.classification.mg_log_reg import LogisticRegressor
87
+ >>> model = LogisticRegressor(degree=2, max_iter=200, verbose=True)
88
+ >>> model.fit(X_train, y_train)
89
+ >>> y_pred = model.predict(X_test)
90
+ >>> print("F1 Score:", model.score(X_test, y_test))
91
+
92
+ Notes
93
+ -----
94
+ - The model supports both binary and multiclass classification tasks.
95
+ - More information on gnostic characteristics can be found in the Machine Gnostics documentation.
96
+ - For more information, visit: https://machinegnostics.info/
97
+ """
98
+
99
+ @disable_parent_docstring
100
+ def __init__(self,
101
+ degree: int = 1,
102
+ max_iter: int = 100,
103
+ tol: float = 1e-3,
104
+ mg_loss: str = 'hi',
105
+ early_stopping: bool = True,
106
+ verbose: bool = False,
107
+ scale: 'str | int | float' = 'auto',
108
+ data_form: str = 'a',
109
+ gnostic_characteristics:bool=True,
110
+ history: bool = True,
111
+ proba:str = 'gnostic'):
112
+ """
113
+ Initialize the LogisticRegressor with specified parameters.
114
+
115
+ Parameters:
116
+ - degree: Degree of polynomial features.
117
+ - max_iter: Maximum number of iterations for convergence.
118
+ - tol: Tolerance for stopping criteria.
119
+ - early_stopping: Whether to stop training early if convergence is reached.
120
+ - verbose: Whether to print detailed logs during training.
121
+ - scale: Scaling method for input features.
122
+ - data_form: Form of data processing ('a' for additive, 'm' for multiplicative).
123
+ - gnostic_characteristics: Whether to calculate gnostic characteristics.
124
+ - history: Whether to maintain a history of model parameters and losses.
125
+ - proba: Probability estimation method ('gnostic' or 'sigmoid').
126
+
127
+ """
128
+ super().__init__(
129
+ degree=degree,
130
+ max_iter=max_iter,
131
+ tol=tol,
132
+ mg_loss=mg_loss,
133
+ early_stopping=early_stopping,
134
+ verbose=verbose,
135
+ scale=scale,
136
+ data_form=data_form,
137
+ gnostic_characteristics=gnostic_characteristics,
138
+ proba=proba
139
+ )
140
+
141
+ self.degree = degree
142
+ self.max_iter = max_iter
143
+ self.tol = tol
144
+ self.mg_loss = mg_loss
145
+ self.early_stopping = early_stopping
146
+ self.verbose = verbose
147
+ self.scale = scale
148
+ self.data_form = data_form
149
+ self.gnostic_characteristics = gnostic_characteristics
150
+ self.history = history
151
+ self.proba = proba
152
+ self.params = []
153
+ self._history = []
154
+
155
+ # logger
156
+ self.logger.info("LogisticRegressor initialized.")
157
+
158
+ def fit(self, X, y):
159
+ """
160
+ Fit the LogisticRegressor model to the training data.
161
+
162
+ This method trains the logistic regression model using the provided input features and target labels.
163
+ It supports polynomial feature expansion, gnostic or sigmoid probability estimation, and early stopping
164
+ based on convergence criteria. Training history, including loss and coefficients, is stored if enabled.
165
+
166
+ Parameters
167
+ ----------
168
+ X : array-like or DataFrame
169
+ Input features for training. Can be a NumPy array, pandas DataFrame, or compatible type.
170
+ y : array-like
171
+ Target labels for training. Should be a 1D array or Series of binary class labels (0 or 1).
172
+
173
+ Returns
174
+ -------
175
+ self : LogisticRegressor
176
+ Returns the fitted model instance for chaining.
177
+
178
+ Raises
179
+ ------
180
+ ValueError
181
+ If input shapes are incompatible or training fails due to numerical issues.
182
+
183
+ Examples
184
+ --------
185
+ >>> model = LogisticRegressor(degree=2, max_iter=200)
186
+ >>> model.fit(X_train, y_train)
187
+ """
188
+ self.logger.info("Starting fit process for LogisticRegressor.")
189
+ super()._fit(X, y)
190
+
191
+ self.coefficients = self.coefficients
192
+ self.weights = self.weights
193
+ return self
194
+
195
+ def predict(self, model_input) -> np.ndarray:
196
+ """
197
+ Predict class labels for new input data.
198
+
199
+ This method predicts binary class labels (0 or 1) for the provided input data using the trained model.
200
+ It supports input as NumPy arrays, pandas DataFrames, or PySpark DataFrames (if supported by the parent class).
201
+ The prediction threshold is typically 0.5 unless otherwise specified in the parent class.
202
+
203
+ Parameters
204
+ ----------
205
+ model_input : array-like or DataFrame
206
+ Input data for prediction. Can be a NumPy array, pandas DataFrame, or compatible type.
207
+
208
+ Returns
209
+ -------
210
+ np.ndarray
211
+ Array of predicted class labels (0 or 1).
212
+
213
+ Examples
214
+ --------
215
+ >>> y_pred = model.predict(X_test)
216
+ """
217
+ self.logger.info("Making predictions with LogisticRegressor.")
218
+ return super()._predict(model_input)
219
+
220
+ def predict_proba(self, model_input) -> np.ndarray:
221
+ """
222
+ Predict class probabilities for new input data.
223
+
224
+ This method returns the predicted probabilities for each input sample belonging to the positive class (label 1).
225
+ It supports input as NumPy arrays, pandas DataFrames, or PySpark DataFrames (if supported by the parent class).
226
+ The probability estimation method is determined by the `proba` parameter set during initialization
227
+ ('gnostic' for gnostic-based probabilities or 'sigmoid' for standard logistic regression probabilities).
228
+
229
+ Parameters
230
+ ----------
231
+ model_input : array-like or DataFrame
232
+ Input data for probability prediction. Can be a NumPy array, pandas DataFrame, or compatible type.
233
+
234
+ Returns
235
+ -------
236
+ np.ndarray
237
+ Array of predicted probabilities for the positive class (values between 0 and 1).
238
+
239
+ Examples
240
+ --------
241
+ >>> y_proba = model.predict_proba(X_test)
242
+ >>> print(y_proba[:5])
243
+ """
244
+ self.logger.info("Calculating predicted probabilities with LogisticRegressor.")
245
+ return super()._predict_proba(model_input)
246
+
247
+ def score(self, X, y) -> float:
248
+ """
249
+ Compute the F1 score of the model on the provided test data.
250
+
251
+ This method evaluates the performance of the trained model by computing the F1 score,
252
+ which is the harmonic mean of precision and recall, on the given input features and true labels.
253
+
254
+ Parameters
255
+ ----------
256
+ X : array-like or DataFrame
257
+ Input features for evaluation.
258
+ y : array-like
259
+ True binary labels for evaluation.
260
+
261
+ Returns
262
+ -------
263
+ float
264
+ F1 score of the model predictions on the provided data.
265
+
266
+ Examples
267
+ --------
268
+ >>> score = model.score(X_test, y_test)
269
+ >>> print("F1 Score:", score)
270
+ """
271
+ self.logger.info("Calculating F1 score for LogisticRegressor.")
272
+ y_pred = self.predict(X)
273
+ return f1_score(y, y_pred)