machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 Machine Gnostics Team
|
|
4
|
+
|
|
5
|
+
This work is licensed under the terms of the GNU General Public License version 3.0.
|
|
6
|
+
|
|
7
|
+
Author: Nirmal Parmar
|
|
8
|
+
Date: 2025-10-01
|
|
9
|
+
Description: Machine Gnostics logic for robust regression model and wrapping it with mlflow
|
|
10
|
+
'''
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import joblib
|
|
14
|
+
import mlflow
|
|
15
|
+
import numpy as np
|
|
16
|
+
from machinegnostics.models.classification.layer_history_log_reg import HistoryRobustRegressor
|
|
17
|
+
|
|
18
|
+
class InterfaceLogisticRegressor(HistoryRobustRegressor, mlflow.pyfunc.PythonModel):
|
|
19
|
+
"""
|
|
20
|
+
_LogisticRegressor: MLflow-wrapped Gnostic Logistic Regression
|
|
21
|
+
|
|
22
|
+
Developer Notes:
|
|
23
|
+
----------------
|
|
24
|
+
- Inherits from _LogisticRegressorParamBase for core logic and mlflow.pyfunc.PythonModel for MLflow integration.
|
|
25
|
+
- Supports saving/loading via joblib for reproducibility and deployment.
|
|
26
|
+
- Handles numpy arrays, pandas DataFrames, and pyspark DataFrames for prediction.
|
|
27
|
+
- Use fit(X, y) for training and predict(X) or predict_proba(X) for inference.
|
|
28
|
+
- Use save_model(path) and load_model(path) for model persistence.
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self,
|
|
31
|
+
degree: int = 1,
|
|
32
|
+
max_iter: int = 100,
|
|
33
|
+
tol: float = 1e-3,
|
|
34
|
+
early_stopping: bool = True,
|
|
35
|
+
verbose: bool = False,
|
|
36
|
+
scale: 'str | int | float' = 'auto',
|
|
37
|
+
data_form: str = 'a',
|
|
38
|
+
gnostic_characteristics:bool=True,
|
|
39
|
+
history: bool = True,
|
|
40
|
+
proba:str = 'gnostic'):
|
|
41
|
+
super().__init__(
|
|
42
|
+
degree=degree,
|
|
43
|
+
max_iter=max_iter,
|
|
44
|
+
tol=tol,
|
|
45
|
+
early_stopping=early_stopping,
|
|
46
|
+
verbose=verbose,
|
|
47
|
+
scale=scale,
|
|
48
|
+
data_form=data_form,
|
|
49
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
50
|
+
proba=proba
|
|
51
|
+
)
|
|
52
|
+
self.degree = degree
|
|
53
|
+
self.max_iter = max_iter
|
|
54
|
+
self.tol = tol
|
|
55
|
+
self.early_stopping = early_stopping
|
|
56
|
+
self.verbose = verbose
|
|
57
|
+
self.scale = scale
|
|
58
|
+
self.data_form = data_form
|
|
59
|
+
self.gnostic_characteristics = gnostic_characteristics
|
|
60
|
+
self.history = history
|
|
61
|
+
self.proba = proba
|
|
62
|
+
|
|
63
|
+
# logger
|
|
64
|
+
self.logger.info("InterfaceLogisticRegressor initialized.")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _fit(self, X, y):
|
|
68
|
+
"""
|
|
69
|
+
Fit the logistic regression model using the parent class logic.
|
|
70
|
+
"""
|
|
71
|
+
self.logger.info("Starting fit process for InterfaceLogisticRegressor.")
|
|
72
|
+
super()._fit(X, y)
|
|
73
|
+
|
|
74
|
+
self.coefficients = self.coefficients
|
|
75
|
+
self.weights = self.weights
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
def _predict(self, model_input) -> np.ndarray:
|
|
79
|
+
"""
|
|
80
|
+
Predict class labels for input data.
|
|
81
|
+
Accepts numpy arrays, pandas DataFrames, or pyspark DataFrames.
|
|
82
|
+
"""
|
|
83
|
+
self.logger.info("Making predictions with InterfaceLogisticRegressor.")
|
|
84
|
+
return super()._predict(model_input)
|
|
85
|
+
|
|
86
|
+
def _predict_proba(self, model_input) -> np.ndarray:
|
|
87
|
+
"""
|
|
88
|
+
Predict probabilities for input data.
|
|
89
|
+
Accepts numpy arrays, pandas DataFrames, or pyspark DataFrames.
|
|
90
|
+
"""
|
|
91
|
+
self.logger.info("Calculating predicted probabilities with InterfaceLogisticRegressor.")
|
|
92
|
+
return super()._predict_proba(model_input)
|
|
93
|
+
|
|
94
|
+
def save_model(self, path):
|
|
95
|
+
"""
|
|
96
|
+
Save the trained model to disk using joblib.
|
|
97
|
+
"""
|
|
98
|
+
self.logger.info(f"Saving model to {path}.")
|
|
99
|
+
os.makedirs(path, exist_ok=True)
|
|
100
|
+
joblib.dump(self, os.path.join(path, "model.pkl"))
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def load_model(cls, path):
|
|
104
|
+
"""
|
|
105
|
+
Load a trained model from disk using joblib.
|
|
106
|
+
"""
|
|
107
|
+
return joblib.load(os.path.join(path, "model.pkl"))
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Machine Gnostics - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 Machine Gnostics Team
|
|
4
|
+
|
|
5
|
+
This work is licensed under the terms of the GNU General Public License version 3.0.
|
|
6
|
+
|
|
7
|
+
Author: Nirmal Parmar
|
|
8
|
+
Date: 2025-05-31
|
|
9
|
+
|
|
10
|
+
Description:
|
|
11
|
+
Regressor param base class that can be used for robust classification models.
|
|
12
|
+
- logical regression
|
|
13
|
+
|
|
14
|
+
'''
|
|
15
|
+
import numpy as np
|
|
16
|
+
from machinegnostics.magcal import (ScaleParam,
|
|
17
|
+
GnosticsWeights,
|
|
18
|
+
ParamBase)
|
|
19
|
+
from machinegnostics.magcal.util.min_max_float import np_max_float, np_min_float
|
|
20
|
+
|
|
21
|
+
class ParamLogisticRegressorBase(ParamBase):
|
|
22
|
+
"""
|
|
23
|
+
Parameters for the Logistic Regressor model.
|
|
24
|
+
|
|
25
|
+
Attributes
|
|
26
|
+
----------
|
|
27
|
+
scale_param : ScaleParam
|
|
28
|
+
Scaling parameters for the model.
|
|
29
|
+
gnostics_weights : GnosticsWeights
|
|
30
|
+
Weights for the model.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self,
|
|
34
|
+
degree: int = 1,
|
|
35
|
+
max_iter: int = 100,
|
|
36
|
+
tol: float = 1e-3,
|
|
37
|
+
early_stopping: bool = True,
|
|
38
|
+
verbose: bool = False,
|
|
39
|
+
scale: 'str | int | float' = 'auto',
|
|
40
|
+
data_form: str = 'a',
|
|
41
|
+
gnostic_characteristics:bool=True,
|
|
42
|
+
history: bool = True,
|
|
43
|
+
proba: str = 'gnostic'):
|
|
44
|
+
super().__init__(
|
|
45
|
+
degree=degree,
|
|
46
|
+
max_iter=max_iter,
|
|
47
|
+
tol=tol,
|
|
48
|
+
early_stopping=early_stopping,
|
|
49
|
+
verbose=verbose,
|
|
50
|
+
scale=scale,
|
|
51
|
+
data_form=data_form,
|
|
52
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
53
|
+
proba=proba
|
|
54
|
+
)
|
|
55
|
+
self.degree = degree
|
|
56
|
+
self.max_iter = max_iter
|
|
57
|
+
self.tol = tol
|
|
58
|
+
self.early_stopping = early_stopping
|
|
59
|
+
self.verbose = verbose
|
|
60
|
+
self.scale = scale
|
|
61
|
+
self.data_form = data_form
|
|
62
|
+
self.gnostic_characteristics = gnostic_characteristics
|
|
63
|
+
self.proba = proba
|
|
64
|
+
self.mg_loss = 'hi'
|
|
65
|
+
# history option
|
|
66
|
+
if history:
|
|
67
|
+
self._history = []
|
|
68
|
+
# default history content
|
|
69
|
+
self._history.append({
|
|
70
|
+
'iteration': 0,
|
|
71
|
+
'log_loss': None,
|
|
72
|
+
'coefficients': None,
|
|
73
|
+
'rentropy': None,
|
|
74
|
+
'weights': None,
|
|
75
|
+
})
|
|
76
|
+
else:
|
|
77
|
+
self._history = None
|
|
78
|
+
|
|
79
|
+
# logger
|
|
80
|
+
self.logger.info("ParamLogisticRegressorBase initialized.")
|
|
81
|
+
|
|
82
|
+
def _fit(self, X: np.ndarray, y: np.ndarray):
|
|
83
|
+
"""
|
|
84
|
+
Fit the model to the data.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
X : np.ndarray
|
|
89
|
+
Input features.
|
|
90
|
+
y : np.ndarray
|
|
91
|
+
Target values.
|
|
92
|
+
"""
|
|
93
|
+
self.logger.info("Starting fit process for Logistic Regressor.")
|
|
94
|
+
# Generate polynomial features
|
|
95
|
+
X_poly = self._generate_polynomial_features(X)
|
|
96
|
+
|
|
97
|
+
n_samples, n_features = X_poly.shape
|
|
98
|
+
|
|
99
|
+
# Initialize weights
|
|
100
|
+
self.weights = np.ones(n_samples)
|
|
101
|
+
|
|
102
|
+
# Initialize coefficients to zeros
|
|
103
|
+
self.coefficients = np.zeros(n_features)
|
|
104
|
+
|
|
105
|
+
for self._iter in range(self.max_iter):
|
|
106
|
+
self._iter += 1
|
|
107
|
+
self._prev_coef = self.coefficients.copy()
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
# # Weighted least squares
|
|
111
|
+
# self.coefficients = self._weighted_least_squares(X_poly, y, self.weights)
|
|
112
|
+
|
|
113
|
+
# Update weights using gnostic approach
|
|
114
|
+
y0 = X_poly @ self.coefficients
|
|
115
|
+
residuals = y0 - y
|
|
116
|
+
|
|
117
|
+
# mg data conversion
|
|
118
|
+
z = self._data_conversion(residuals)
|
|
119
|
+
z_y = self._data_conversion(y)
|
|
120
|
+
z_y0 = self._data_conversion(y0)
|
|
121
|
+
|
|
122
|
+
# gnostic weights
|
|
123
|
+
gw = GnosticsWeights()
|
|
124
|
+
gw = gw._get_gnostic_weights(z)
|
|
125
|
+
new_weights = self.weights * gw
|
|
126
|
+
W = np.diag(new_weights)
|
|
127
|
+
|
|
128
|
+
# Compute scale and loss
|
|
129
|
+
if self.scale == 'auto':
|
|
130
|
+
scale = ScaleParam()
|
|
131
|
+
zz = z_y0 - z_y
|
|
132
|
+
# avoid division by zero
|
|
133
|
+
zz = np.where(zz == 0, np_min_float(), zz) # Replace zero with a very small value
|
|
134
|
+
# local scale
|
|
135
|
+
s = scale._gscale_loc((2 / (zz + 1/zz)))
|
|
136
|
+
else:
|
|
137
|
+
s = self.scale
|
|
138
|
+
|
|
139
|
+
# gnostic probabilities
|
|
140
|
+
if self.proba == 'gnostic':
|
|
141
|
+
# Gnostic probability calculation
|
|
142
|
+
p, info, re = self._gnostic_prob(z=z) # NOTE currently using p from local S, means ELDF. this can be improved in the future
|
|
143
|
+
elif self.proba == 'sigmoid':
|
|
144
|
+
# Sigmoid probability calculation
|
|
145
|
+
p = self._sigmoid(y0)
|
|
146
|
+
_, info, re = self._gnostic_prob(z=z)
|
|
147
|
+
|
|
148
|
+
# self.coefficients = self._wighted_least_squares_log_reg(p,
|
|
149
|
+
# y0,
|
|
150
|
+
# X_poly,
|
|
151
|
+
# y,
|
|
152
|
+
# W=W,
|
|
153
|
+
# n_features=n_features,
|
|
154
|
+
# )
|
|
155
|
+
# IRLS update
|
|
156
|
+
try:
|
|
157
|
+
XtW = X_poly.T @ W
|
|
158
|
+
XtWX = XtW @ X_poly + 1e-8 * np.eye(n_features)
|
|
159
|
+
XtWy = XtW @ (y0 + (y - p) / (p * (1 - p) + 1e-8))
|
|
160
|
+
self.coefficients = np.linalg.solve(XtWX, XtWy)
|
|
161
|
+
except np.linalg.LinAlgError:
|
|
162
|
+
self.coefficients = np.linalg.pinv(XtWX) @ XtWy
|
|
163
|
+
|
|
164
|
+
# --- Log loss calculation ---
|
|
165
|
+
proba_pred = np.clip(p, 1e-8, 1-1e-8)
|
|
166
|
+
self.log_loss = -np.mean(y * np.log(proba_pred) + (1 - y) * np.log(1 - proba_pred))
|
|
167
|
+
|
|
168
|
+
# history update for gnostic vs sigmoid
|
|
169
|
+
re = np.mean(re)
|
|
170
|
+
info = np.mean(info)
|
|
171
|
+
|
|
172
|
+
if self.gnostic_characteristics:
|
|
173
|
+
self.loss, self.re, self.hi, self.hj, self.fi, self.fj, \
|
|
174
|
+
self.pi, self.pj, self.ei, self.ej, self.infoi, self.infoj = self._gnostic_criterion(z=z_y0, z0=z_y, s=s)
|
|
175
|
+
|
|
176
|
+
# self.weights = new_weights / np.sum(new_weights) # NOTE : Normalizing weights
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# capture history and append to history
|
|
180
|
+
# minimal history capture
|
|
181
|
+
if self._history is not None:
|
|
182
|
+
self._history.append({
|
|
183
|
+
'iteration': self._iter,
|
|
184
|
+
'log_loss': self.log_loss,
|
|
185
|
+
'coefficients': self.coefficients.copy(),
|
|
186
|
+
'rentropy': re,
|
|
187
|
+
'weights': self.weights.copy(),
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
# Check convergence with early stopping and rentropy
|
|
191
|
+
# if entropy value is increasing, stop
|
|
192
|
+
|
|
193
|
+
# --- Unified convergence check: stop if mean rentropy or log_loss change is within tolerance ---
|
|
194
|
+
if self._iter > 0 and self.early_stopping:
|
|
195
|
+
prev_hist = self._history[-2] if len(self._history) > 1 else None
|
|
196
|
+
curr_re = np.mean(re)
|
|
197
|
+
curr_log_loss = self.log_loss
|
|
198
|
+
prev_re_val = np.mean(prev_hist['rentropy']) if prev_hist and prev_hist['rentropy'] is not None else None
|
|
199
|
+
prev_log_loss_val = prev_hist['log_loss'] if prev_hist and prev_hist['log_loss'] is not None else None
|
|
200
|
+
|
|
201
|
+
re_converged = prev_re_val is not None and np.abs(curr_re - prev_re_val) < self.tol
|
|
202
|
+
log_loss_converged = prev_log_loss_val is not None and np.abs(curr_log_loss - prev_log_loss_val) < self.tol
|
|
203
|
+
|
|
204
|
+
if re_converged or log_loss_converged:
|
|
205
|
+
if self.verbose:
|
|
206
|
+
self.logger.info(f"Converged at iteration {self._iter} (early stop):")
|
|
207
|
+
if re_converged:
|
|
208
|
+
self.logger.info(f"mean rentropy change below tolerance (rentropy={np.abs(curr_re - prev_re_val):.6e}).")
|
|
209
|
+
if log_loss_converged:
|
|
210
|
+
self.logger.info(f"log_loss change below tolerance (log_loss={np.abs(curr_log_loss - prev_log_loss_val):.6e}).")
|
|
211
|
+
break
|
|
212
|
+
if self.verbose:
|
|
213
|
+
self.logger.info(f"Iteration {self._iter}, Log Loss: {self.log_loss:.6f}, mean residual entropy: {np.mean(re):.6f}")
|
|
214
|
+
|
|
215
|
+
except (ZeroDivisionError, np.linalg.LinAlgError) as e:
|
|
216
|
+
# Handle exceptions during fitting
|
|
217
|
+
self.coefficients = self._prev_coef
|
|
218
|
+
self.weights = self.weights.copy()
|
|
219
|
+
if self.verbose:
|
|
220
|
+
self.logger.error(f"Error during fitting at iteration {self._iter}: {e}")
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
def _predict(self, X: np.ndarray, threshold=0.5) -> np.ndarray:
|
|
224
|
+
"""
|
|
225
|
+
Predict class labels for the input data.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
X : array-like of shape (n_samples, n_features)
|
|
230
|
+
Input features to predict class labels for.
|
|
231
|
+
threshold : float, optional (default=0.5)
|
|
232
|
+
Threshold for classifying probabilities into binary classes.
|
|
233
|
+
|
|
234
|
+
Returns
|
|
235
|
+
-------
|
|
236
|
+
ndarray of shape (n_samples,)
|
|
237
|
+
Predicted class labels (0 or 1).
|
|
238
|
+
"""
|
|
239
|
+
self.logger.info("Making predictions with Logistic Regressor.")
|
|
240
|
+
proba = self._predict_proba(X)
|
|
241
|
+
return (proba >= threshold).astype(int)
|
|
242
|
+
|
|
243
|
+
def _predict_proba(self, X: np.ndarray) -> np.ndarray:
|
|
244
|
+
"""
|
|
245
|
+
Predict probabilities for the input data.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
X : array-like of shape (n_samples, n_features)
|
|
250
|
+
Input features to predict probabilities for.
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
ndarray of shape (n_samples,)
|
|
255
|
+
Predicted probabilities.
|
|
256
|
+
"""
|
|
257
|
+
self.logger.info("Calculating predicted probabilities with Logistic Regressor.")
|
|
258
|
+
if self.coefficients is None:
|
|
259
|
+
raise ValueError("Model is not fitted yet. Call 'fit' before 'predict_proba'.")
|
|
260
|
+
|
|
261
|
+
X_poly = self._generate_polynomial_features(X)
|
|
262
|
+
linear_pred = X_poly @ self.coefficients
|
|
263
|
+
|
|
264
|
+
# gnostic vs sigmoid probability calculation
|
|
265
|
+
if self.proba == 'gnostic':
|
|
266
|
+
# Gnostic probability calculation
|
|
267
|
+
proba, info, re = self._gnostic_prob(-linear_pred)
|
|
268
|
+
elif self.proba == 'sigmoid':
|
|
269
|
+
# Sigmoid probability calculation
|
|
270
|
+
proba = self._sigmoid(linear_pred)
|
|
271
|
+
else:
|
|
272
|
+
self.logger.error("Invalid probability method. Must be 'gnostic' or 'sigmoid'.")
|
|
273
|
+
raise ValueError("Invalid probability method. Must be 'gnostic' or 'sigmoid'.")
|
|
274
|
+
|
|
275
|
+
return proba
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Machine Gnostics - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 Machine Gnostics Team
|
|
4
|
+
|
|
5
|
+
This work is licensed under the terms of the GNU General Public License version 3.0.
|
|
6
|
+
|
|
7
|
+
Author: Nirmal Parmar
|
|
8
|
+
|
|
9
|
+
Description:
|
|
10
|
+
This module implements a logistic regression model using mathematical gnostics principles.
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from machinegnostics.models.classification.layer_io_process_log_reg import DataProcessLogisticRegressor
|
|
16
|
+
from machinegnostics.metrics import f1_score
|
|
17
|
+
from machinegnostics.magcal import disable_parent_docstring
|
|
18
|
+
from typing import Union
|
|
19
|
+
|
|
20
|
+
class LogisticRegressor(DataProcessLogisticRegressor):
|
|
21
|
+
"""
|
|
22
|
+
LogisticRegressor implements a logistic regression model based on Mathematical Gnostics principles.
|
|
23
|
+
|
|
24
|
+
This class prepared with Machine Gnostic framework, feature-rich logistic regression
|
|
25
|
+
implementation. It supports polynomial feature expansion, custom loss functions, early stopping,
|
|
26
|
+
gnostic-based probability estimation, and detailed training history tracking.
|
|
27
|
+
|
|
28
|
+
Key Features:
|
|
29
|
+
- Polynomial feature expansion up to a user-specified degree.
|
|
30
|
+
- Choice of probability estimation method: 'gnostic' (default) or standard 'sigmoid'.
|
|
31
|
+
- Calculation of gnostic characteristics for advanced model diagnostics.
|
|
32
|
+
- Early stopping based on convergence of loss or entropy.
|
|
33
|
+
- Verbose logging for monitoring training progress.
|
|
34
|
+
- Optional scaling and data processing modes.
|
|
35
|
+
- Maintains a history of model parameters and losses for analysis.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
degree : int, default=1
|
|
40
|
+
Degree of polynomial features to use for input expansion.
|
|
41
|
+
max_iter : int, default=100
|
|
42
|
+
Maximum number of iterations for the optimization algorithm.
|
|
43
|
+
tol : float, default=1e-3
|
|
44
|
+
Tolerance for convergence. Training stops if the change in loss or entropy is below this value.
|
|
45
|
+
mg_loss : str, default='hi'
|
|
46
|
+
Type of gnostic loss to use (e.g., 'hi', 'hj', etc.).
|
|
47
|
+
early_stopping : bool, default=True
|
|
48
|
+
Whether to stop training early if convergence is detected.
|
|
49
|
+
verbose : bool, default=False
|
|
50
|
+
If True, prints detailed logs during training.
|
|
51
|
+
scale : str | int | float, default='auto'
|
|
52
|
+
Scaling method for input features. Can be a string identifier or a numeric value.
|
|
53
|
+
data_form : str, default='a'
|
|
54
|
+
Data processing form: 'a' for additive, 'm' for multiplicative.
|
|
55
|
+
gnostic_characteristics : bool, default=True
|
|
56
|
+
If True, calculates and stores gnostic characteristics during training.
|
|
57
|
+
history : bool, default=True
|
|
58
|
+
If True, maintains a history of model parameters and losses.
|
|
59
|
+
proba : str, default='gnostic'
|
|
60
|
+
Probability estimation method: 'gnostic' for gnostic-based, 'sigmoid' for standard logistic regression.
|
|
61
|
+
|
|
62
|
+
Attributes
|
|
63
|
+
----------
|
|
64
|
+
coefficients : np.ndarray
|
|
65
|
+
Fitted model coefficients after training.
|
|
66
|
+
weights : np.ndarray
|
|
67
|
+
Sample weights used during training.
|
|
68
|
+
_history : list
|
|
69
|
+
List of dictionaries containing training history (loss, coefficients, entropy, etc.).
|
|
70
|
+
params : list
|
|
71
|
+
List of model parameters (for compatibility and inspection).
|
|
72
|
+
|
|
73
|
+
Methods
|
|
74
|
+
-------
|
|
75
|
+
fit(X, y)
|
|
76
|
+
Fit the logistic regression model to the data.
|
|
77
|
+
predict(model_input)
|
|
78
|
+
Predict class labels for new data.
|
|
79
|
+
predict_proba(model_input)
|
|
80
|
+
Predict class probabilities for new data.
|
|
81
|
+
score(X, y)
|
|
82
|
+
Compute the F1 score of the model on given data.
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> from machinegnostics.models.classification.mg_log_reg import LogisticRegressor
|
|
87
|
+
>>> model = LogisticRegressor(degree=2, max_iter=200, verbose=True)
|
|
88
|
+
>>> model.fit(X_train, y_train)
|
|
89
|
+
>>> y_pred = model.predict(X_test)
|
|
90
|
+
>>> print("F1 Score:", model.score(X_test, y_test))
|
|
91
|
+
|
|
92
|
+
Notes
|
|
93
|
+
-----
|
|
94
|
+
- The model supports both binary and multiclass classification tasks.
|
|
95
|
+
- More information on gnostic characteristics can be found in the Machine Gnostics documentation.
|
|
96
|
+
- For more information, visit: https://machinegnostics.info/
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
@disable_parent_docstring
|
|
100
|
+
def __init__(self,
|
|
101
|
+
degree: int = 1,
|
|
102
|
+
max_iter: int = 100,
|
|
103
|
+
tol: float = 1e-3,
|
|
104
|
+
mg_loss: str = 'hi',
|
|
105
|
+
early_stopping: bool = True,
|
|
106
|
+
verbose: bool = False,
|
|
107
|
+
scale: 'str | int | float' = 'auto',
|
|
108
|
+
data_form: str = 'a',
|
|
109
|
+
gnostic_characteristics:bool=True,
|
|
110
|
+
history: bool = True,
|
|
111
|
+
proba:str = 'gnostic'):
|
|
112
|
+
"""
|
|
113
|
+
Initialize the LogisticRegressor with specified parameters.
|
|
114
|
+
|
|
115
|
+
Parameters:
|
|
116
|
+
- degree: Degree of polynomial features.
|
|
117
|
+
- max_iter: Maximum number of iterations for convergence.
|
|
118
|
+
- tol: Tolerance for stopping criteria.
|
|
119
|
+
- early_stopping: Whether to stop training early if convergence is reached.
|
|
120
|
+
- verbose: Whether to print detailed logs during training.
|
|
121
|
+
- scale: Scaling method for input features.
|
|
122
|
+
- data_form: Form of data processing ('a' for additive, 'm' for multiplicative).
|
|
123
|
+
- gnostic_characteristics: Whether to calculate gnostic characteristics.
|
|
124
|
+
- history: Whether to maintain a history of model parameters and losses.
|
|
125
|
+
- proba: Probability estimation method ('gnostic' or 'sigmoid').
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
super().__init__(
|
|
129
|
+
degree=degree,
|
|
130
|
+
max_iter=max_iter,
|
|
131
|
+
tol=tol,
|
|
132
|
+
mg_loss=mg_loss,
|
|
133
|
+
early_stopping=early_stopping,
|
|
134
|
+
verbose=verbose,
|
|
135
|
+
scale=scale,
|
|
136
|
+
data_form=data_form,
|
|
137
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
138
|
+
proba=proba
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self.degree = degree
|
|
142
|
+
self.max_iter = max_iter
|
|
143
|
+
self.tol = tol
|
|
144
|
+
self.mg_loss = mg_loss
|
|
145
|
+
self.early_stopping = early_stopping
|
|
146
|
+
self.verbose = verbose
|
|
147
|
+
self.scale = scale
|
|
148
|
+
self.data_form = data_form
|
|
149
|
+
self.gnostic_characteristics = gnostic_characteristics
|
|
150
|
+
self.history = history
|
|
151
|
+
self.proba = proba
|
|
152
|
+
self.params = []
|
|
153
|
+
self._history = []
|
|
154
|
+
|
|
155
|
+
# logger
|
|
156
|
+
self.logger.info("LogisticRegressor initialized.")
|
|
157
|
+
|
|
158
|
+
def fit(self, X, y):
|
|
159
|
+
"""
|
|
160
|
+
Fit the LogisticRegressor model to the training data.
|
|
161
|
+
|
|
162
|
+
This method trains the logistic regression model using the provided input features and target labels.
|
|
163
|
+
It supports polynomial feature expansion, gnostic or sigmoid probability estimation, and early stopping
|
|
164
|
+
based on convergence criteria. Training history, including loss and coefficients, is stored if enabled.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
X : array-like or DataFrame
|
|
169
|
+
Input features for training. Can be a NumPy array, pandas DataFrame, or compatible type.
|
|
170
|
+
y : array-like
|
|
171
|
+
Target labels for training. Should be a 1D array or Series of binary class labels (0 or 1).
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
self : LogisticRegressor
|
|
176
|
+
Returns the fitted model instance for chaining.
|
|
177
|
+
|
|
178
|
+
Raises
|
|
179
|
+
------
|
|
180
|
+
ValueError
|
|
181
|
+
If input shapes are incompatible or training fails due to numerical issues.
|
|
182
|
+
|
|
183
|
+
Examples
|
|
184
|
+
--------
|
|
185
|
+
>>> model = LogisticRegressor(degree=2, max_iter=200)
|
|
186
|
+
>>> model.fit(X_train, y_train)
|
|
187
|
+
"""
|
|
188
|
+
self.logger.info("Starting fit process for LogisticRegressor.")
|
|
189
|
+
super()._fit(X, y)
|
|
190
|
+
|
|
191
|
+
self.coefficients = self.coefficients
|
|
192
|
+
self.weights = self.weights
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def predict(self, model_input) -> np.ndarray:
|
|
196
|
+
"""
|
|
197
|
+
Predict class labels for new input data.
|
|
198
|
+
|
|
199
|
+
This method predicts binary class labels (0 or 1) for the provided input data using the trained model.
|
|
200
|
+
It supports input as NumPy arrays, pandas DataFrames, or PySpark DataFrames (if supported by the parent class).
|
|
201
|
+
The prediction threshold is typically 0.5 unless otherwise specified in the parent class.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
model_input : array-like or DataFrame
|
|
206
|
+
Input data for prediction. Can be a NumPy array, pandas DataFrame, or compatible type.
|
|
207
|
+
|
|
208
|
+
Returns
|
|
209
|
+
-------
|
|
210
|
+
np.ndarray
|
|
211
|
+
Array of predicted class labels (0 or 1).
|
|
212
|
+
|
|
213
|
+
Examples
|
|
214
|
+
--------
|
|
215
|
+
>>> y_pred = model.predict(X_test)
|
|
216
|
+
"""
|
|
217
|
+
self.logger.info("Making predictions with LogisticRegressor.")
|
|
218
|
+
return super()._predict(model_input)
|
|
219
|
+
|
|
220
|
+
def predict_proba(self, model_input) -> np.ndarray:
|
|
221
|
+
"""
|
|
222
|
+
Predict class probabilities for new input data.
|
|
223
|
+
|
|
224
|
+
This method returns the predicted probabilities for each input sample belonging to the positive class (label 1).
|
|
225
|
+
It supports input as NumPy arrays, pandas DataFrames, or PySpark DataFrames (if supported by the parent class).
|
|
226
|
+
The probability estimation method is determined by the `proba` parameter set during initialization
|
|
227
|
+
('gnostic' for gnostic-based probabilities or 'sigmoid' for standard logistic regression probabilities).
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
model_input : array-like or DataFrame
|
|
232
|
+
Input data for probability prediction. Can be a NumPy array, pandas DataFrame, or compatible type.
|
|
233
|
+
|
|
234
|
+
Returns
|
|
235
|
+
-------
|
|
236
|
+
np.ndarray
|
|
237
|
+
Array of predicted probabilities for the positive class (values between 0 and 1).
|
|
238
|
+
|
|
239
|
+
Examples
|
|
240
|
+
--------
|
|
241
|
+
>>> y_proba = model.predict_proba(X_test)
|
|
242
|
+
>>> print(y_proba[:5])
|
|
243
|
+
"""
|
|
244
|
+
self.logger.info("Calculating predicted probabilities with LogisticRegressor.")
|
|
245
|
+
return super()._predict_proba(model_input)
|
|
246
|
+
|
|
247
|
+
def score(self, X, y) -> float:
|
|
248
|
+
"""
|
|
249
|
+
Compute the F1 score of the model on the provided test data.
|
|
250
|
+
|
|
251
|
+
This method evaluates the performance of the trained model by computing the F1 score,
|
|
252
|
+
which is the harmonic mean of precision and recall, on the given input features and true labels.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
X : array-like or DataFrame
|
|
257
|
+
Input features for evaluation.
|
|
258
|
+
y : array-like
|
|
259
|
+
True binary labels for evaluation.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
float
|
|
264
|
+
F1 score of the model predictions on the provided data.
|
|
265
|
+
|
|
266
|
+
Examples
|
|
267
|
+
--------
|
|
268
|
+
>>> score = model.score(X_test, y_test)
|
|
269
|
+
>>> print("F1 Score:", score)
|
|
270
|
+
"""
|
|
271
|
+
self.logger.info("Calculating F1 score for LogisticRegressor.")
|
|
272
|
+
y_pred = self.predict(X)
|
|
273
|
+
return f1_score(y, y_pred)
|