machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
import logging
|
|
3
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# regression base class
|
|
7
|
+
class ModelBase(metaclass=ABCMeta):
|
|
8
|
+
"""
|
|
9
|
+
Abstract base class for regression models.
|
|
10
|
+
|
|
11
|
+
Abstract Methods:
|
|
12
|
+
----------------
|
|
13
|
+
|
|
14
|
+
- fit(X, y)
|
|
15
|
+
|
|
16
|
+
- predict(X)
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self):
|
|
19
|
+
# logger
|
|
20
|
+
self.logger = get_logger(self.__class__.__name__, logging.INFO) # Create a logger for this class
|
|
21
|
+
self.logger.info(f"ModelBase initialized.")
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def fit(self, X, y):
|
|
25
|
+
"""
|
|
26
|
+
Fit the regression model to the data.
|
|
27
|
+
"""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def predict(self, X):
|
|
32
|
+
"""
|
|
33
|
+
Predict using the fitted model.
|
|
34
|
+
"""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
# @abstractmethod
|
|
38
|
+
# def score(self, X, y):
|
|
39
|
+
# """
|
|
40
|
+
# Compute the score of the model.
|
|
41
|
+
# """
|
|
42
|
+
# pass
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from dataclasses import dataclass, asdict, field
|
|
3
|
+
import logging
|
|
4
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class GenericHistoryRecord:
|
|
8
|
+
# Store all attributes dynamically
|
|
9
|
+
data: dict = field(default_factory=dict)
|
|
10
|
+
|
|
11
|
+
class HistoryBase:
|
|
12
|
+
"""
|
|
13
|
+
Generic base class for maintaining a history of model parameters and metrics.
|
|
14
|
+
Can be used for any model by recording arbitrary key-value pairs per iteration.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, history: bool = True):
|
|
18
|
+
self._record_history = history
|
|
19
|
+
self.logger = get_logger(self.__class__.__name__, logging.INFO) # Create a logger for this class
|
|
20
|
+
if not isinstance(self._record_history, bool):
|
|
21
|
+
self.logger.error("record_history must be a boolean value.")
|
|
22
|
+
raise ValueError("record_history must be a boolean value.")
|
|
23
|
+
if self._record_history:
|
|
24
|
+
self._history = []
|
|
25
|
+
|
|
26
|
+
self.logger.info(f"HistoryBase initialized")
|
|
27
|
+
|
|
28
|
+
def record_history(self, **kwargs):
|
|
29
|
+
"""
|
|
30
|
+
Record the current state of model parameters and metrics.
|
|
31
|
+
Accepts any keyword arguments to store as a record.
|
|
32
|
+
"""
|
|
33
|
+
self.logger.info(f"Recording history: {kwargs}")
|
|
34
|
+
if self._record_history:
|
|
35
|
+
# Optionally, you can deepcopy arrays here if needed
|
|
36
|
+
record = GenericHistoryRecord(data={k: (np.copy(v) if isinstance(v, np.ndarray) else v) for k, v in kwargs.items()})
|
|
37
|
+
self._history.append(record)
|
|
38
|
+
|
|
39
|
+
def get_history(self, as_dict=False):
|
|
40
|
+
"""
|
|
41
|
+
Retrieve the recorded history.
|
|
42
|
+
If as_dict=True, returns a list of dicts. Otherwise, returns dataclass objects.
|
|
43
|
+
"""
|
|
44
|
+
self.logger.info("Retrieving history.")
|
|
45
|
+
if as_dict:
|
|
46
|
+
return [rec.data for rec in self._history]
|
|
47
|
+
return self._history
|
|
48
|
+
|
|
49
|
+
def clear_history(self):
|
|
50
|
+
"""Clear the stored history."""
|
|
51
|
+
self.logger.info("Clearing history.")
|
|
52
|
+
self._history = []
|
|
53
|
+
|
|
54
|
+
def prepare_history_for_output(self):
|
|
55
|
+
"""
|
|
56
|
+
Prepare the history for output as a dict of lists (for logging or saving).
|
|
57
|
+
"""
|
|
58
|
+
self.logger.info("Preparing history for output.")
|
|
59
|
+
if not self._history:
|
|
60
|
+
return {}
|
|
61
|
+
# Collect all unique keys
|
|
62
|
+
all_keys = set()
|
|
63
|
+
for rec in self._history:
|
|
64
|
+
all_keys.update(rec.data.keys())
|
|
65
|
+
output = {k: [] for k in all_keys}
|
|
66
|
+
for rec in self._history:
|
|
67
|
+
for k in all_keys:
|
|
68
|
+
output[k].append(rec.data.get(k, None))
|
|
69
|
+
return output
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def history(self):
|
|
73
|
+
"""Property to access the recorded history."""
|
|
74
|
+
return self.get_history()
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import logging
|
|
4
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
5
|
+
try:
|
|
6
|
+
from pyspark.sql import DataFrame as SparkDataFrame
|
|
7
|
+
except ImportError:
|
|
8
|
+
SparkDataFrame = None
|
|
9
|
+
|
|
10
|
+
class DataProcessLayerBase:
|
|
11
|
+
"""
|
|
12
|
+
A class to handle input/output processing for machine learning models.
|
|
13
|
+
|
|
14
|
+
This class provides methods for data type checking, validation, and conversion
|
|
15
|
+
to ensure that input data is in the correct format for model training and prediction.
|
|
16
|
+
"""
|
|
17
|
+
def __init__(self, verbose: bool = False, **kwargs):
|
|
18
|
+
"""
|
|
19
|
+
Initialize the DataProcessLayer with optional parameters.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
**kwargs : dict
|
|
24
|
+
Additional parameters for configuration.
|
|
25
|
+
""" # To store the type of input for output conversion
|
|
26
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
27
|
+
self.logger.info(f"{self.__class__.__name__} initialized:")
|
|
28
|
+
self.logger.info("DataProcessLayerBase initialized.")
|
|
29
|
+
|
|
30
|
+
def _identify_and_convert(self, data, is_y=False):
|
|
31
|
+
"""
|
|
32
|
+
Identify the type of data and convert it to a numpy array.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
data : array-like, pandas DataFrame, or pyspark DataFrame
|
|
37
|
+
Input data to be converted.
|
|
38
|
+
is_y : bool, default=False
|
|
39
|
+
Whether the data is target values (y).
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
np.ndarray
|
|
44
|
+
Converted numpy array.
|
|
45
|
+
"""
|
|
46
|
+
self.logger.info(f"Identifying and converting data of type: {type(data)}")
|
|
47
|
+
if isinstance(data, np.ndarray):
|
|
48
|
+
arr = data
|
|
49
|
+
self._input_type = 'numpy'
|
|
50
|
+
elif isinstance(data, pd.DataFrame):
|
|
51
|
+
arr = data.values
|
|
52
|
+
self._input_type = 'pandas'
|
|
53
|
+
elif SparkDataFrame is not None and isinstance(data, SparkDataFrame):
|
|
54
|
+
arr = np.array(data.collect())
|
|
55
|
+
self._input_type = 'spark'
|
|
56
|
+
else:
|
|
57
|
+
arr = np.array(data)
|
|
58
|
+
self._input_type = 'unknown'
|
|
59
|
+
|
|
60
|
+
if is_y:
|
|
61
|
+
arr = np.ravel(arr)
|
|
62
|
+
return arr
|
|
63
|
+
|
|
64
|
+
def _convert_output(self, output, reference_input):
|
|
65
|
+
"""
|
|
66
|
+
Convert output numpy array back to the original input format.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
output : np.ndarray
|
|
71
|
+
Output data to be converted.
|
|
72
|
+
reference_input : original input data
|
|
73
|
+
The original input data to infer the output format.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
Converted output in the original format.
|
|
78
|
+
"""
|
|
79
|
+
self.logger.info(f"Converting output to match reference input type: {type(reference_input)}")
|
|
80
|
+
if isinstance(reference_input, np.ndarray):
|
|
81
|
+
return output
|
|
82
|
+
elif isinstance(reference_input, pd.DataFrame):
|
|
83
|
+
return pd.DataFrame(output, index=reference_input.index, columns=getattr(reference_input, 'columns', None))
|
|
84
|
+
elif SparkDataFrame is not None and isinstance(reference_input, SparkDataFrame):
|
|
85
|
+
# For Spark, convert numpy array to pandas DataFrame, then to Spark DataFrame
|
|
86
|
+
import pyspark.sql
|
|
87
|
+
spark = pyspark.sql.SparkSession.builder.getOrCreate()
|
|
88
|
+
pdf = pd.DataFrame(output)
|
|
89
|
+
return spark.createDataFrame(pdf)
|
|
90
|
+
else:
|
|
91
|
+
return output
|
|
92
|
+
|
|
93
|
+
def _check_X(self, X, n_features=None):
|
|
94
|
+
"""
|
|
95
|
+
Check if the input X is valid.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
X : array-like
|
|
100
|
+
Input features.
|
|
101
|
+
n_features : int, optional
|
|
102
|
+
Expected number of features.
|
|
103
|
+
|
|
104
|
+
Raises
|
|
105
|
+
------
|
|
106
|
+
ValueError
|
|
107
|
+
If X is invalid.
|
|
108
|
+
"""
|
|
109
|
+
self.logger.info(f"Checking input X of type: {type(X)}")
|
|
110
|
+
X_arr = self._identify_and_convert(X)
|
|
111
|
+
|
|
112
|
+
# if X_qrr is 1 dimensional, reshape it to 2D
|
|
113
|
+
if X_arr.ndim == 1:
|
|
114
|
+
X_arr = X_arr.reshape(-1, 1)
|
|
115
|
+
if X_arr.ndim != 2:
|
|
116
|
+
raise ValueError("X should be a 1D or 2D or nD array-like structure.")
|
|
117
|
+
if n_features is not None and X_arr.shape[1] != n_features:
|
|
118
|
+
raise ValueError(f"X should have {n_features} features, got {X_arr.shape[1]}.")
|
|
119
|
+
if X_arr.shape[0] == 0:
|
|
120
|
+
raise ValueError("X is empty.")
|
|
121
|
+
return X_arr
|
|
122
|
+
|
|
123
|
+
def _check_y(self, y, n_samples=None):
|
|
124
|
+
"""
|
|
125
|
+
Check if the target y is valid.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
y : array-like
|
|
130
|
+
Target values.
|
|
131
|
+
n_samples : int, optional
|
|
132
|
+
Expected number of samples.
|
|
133
|
+
|
|
134
|
+
Raises
|
|
135
|
+
------
|
|
136
|
+
ValueError
|
|
137
|
+
If y is invalid.
|
|
138
|
+
"""
|
|
139
|
+
self.logger.info(f"Checking target y of type: {type(y)}")
|
|
140
|
+
y_arr = self._identify_and_convert(y, is_y=True)
|
|
141
|
+
if y_arr.ndim != 1:
|
|
142
|
+
raise ValueError("y should be a 1D array-like structure.")
|
|
143
|
+
if n_samples is not None and y_arr.shape[0] != n_samples:
|
|
144
|
+
raise ValueError(f"y should have {n_samples} samples, got {y_arr.shape[0]}.")
|
|
145
|
+
if y_arr.shape[0] == 0:
|
|
146
|
+
raise ValueError("y is empty.")
|
|
147
|
+
return y_arr
|
|
148
|
+
|
|
149
|
+
def _check_X_predict(self, X, n_features=None):
|
|
150
|
+
"""
|
|
151
|
+
Check if the input X for prediction is valid.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
X : array-like
|
|
156
|
+
Input features for prediction.
|
|
157
|
+
n_features : int, optional
|
|
158
|
+
Expected number of features.
|
|
159
|
+
|
|
160
|
+
Raises
|
|
161
|
+
------
|
|
162
|
+
ValueError
|
|
163
|
+
If X is invalid.
|
|
164
|
+
"""
|
|
165
|
+
self.logger.info(f"Checking input X for prediction of type: {type(X)}")
|
|
166
|
+
X = self._check_X(X, n_features=n_features)
|
|
167
|
+
# # output type
|
|
168
|
+
# if self._input_type is None:
|
|
169
|
+
# self._input_type = 'numpy'
|
|
170
|
+
# elif self._input_type == 'pandas':
|
|
171
|
+
# X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])
|
|
172
|
+
# elif self._input_type == 'spark':
|
|
173
|
+
# import pyspark.sql
|
|
174
|
+
# spark = pyspark.sql.SparkSession.builder.getOrCreate()
|
|
175
|
+
# X = spark.createDataFrame(X, schema=[f'feature_{i}' for i in range(X.shape[1])])
|
|
176
|
+
# elif self._input_type == 'unknown':
|
|
177
|
+
# raise ValueError("Unknown input type. Please provide a valid input format.")
|
|
178
|
+
return X
|
|
179
|
+
|
|
180
|
+
def _fit_io(self, X, y):
|
|
181
|
+
"""
|
|
182
|
+
Fit the model to the provided data after checking and verifying inputs.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
X : array-like of shape (n_samples, n_features)
|
|
187
|
+
Input features for training the model.
|
|
188
|
+
y : array-like of shape (n_samples,)
|
|
189
|
+
Target values corresponding to the input features.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
self : object
|
|
194
|
+
Returns self.
|
|
195
|
+
"""
|
|
196
|
+
self.logger.info("Starting fit input/output processing.")
|
|
197
|
+
X_checked = self._check_X(X)
|
|
198
|
+
y_checked = self._check_y(y, n_samples=X_checked.shape[0])
|
|
199
|
+
return X_checked, y_checked
|
|
200
|
+
|
|
201
|
+
def _predict_io(self, X):
|
|
202
|
+
"""
|
|
203
|
+
Predict using the model after checking and verifying inputs.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
X : array-like of shape (n_samples, n_features)
|
|
208
|
+
Input features for prediction.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
y_pred : array-like of shape (n_samples,)
|
|
213
|
+
Predicted values.
|
|
214
|
+
"""
|
|
215
|
+
self.logger.info("Starting predict input/output processing.")
|
|
216
|
+
X_checked = self._check_X_predict(X)
|
|
217
|
+
return X_checked
|
|
218
|
+
|
|
219
|
+
def _score_io(self, X, y):
|
|
220
|
+
"""
|
|
221
|
+
Return the score of the model after checking and verifying inputs.
|
|
222
|
+
|
|
223
|
+
Parameters
|
|
224
|
+
----------
|
|
225
|
+
X : array-like of shape (n_samples, n_features)
|
|
226
|
+
Test samples.
|
|
227
|
+
y : array-like of shape (n_samples,)
|
|
228
|
+
True values for X.
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
score : float
|
|
233
|
+
Score of the model.
|
|
234
|
+
"""
|
|
235
|
+
self.logger.info("Starting score input/output processing.")
|
|
236
|
+
X_checked = self._check_X_predict(X)
|
|
237
|
+
y_checked = self._check_y(y, n_samples=X_checked.shape[0])
|
|
238
|
+
return self
|