machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,42 @@
1
+ from abc import ABCMeta, abstractmethod
2
+ import logging
3
+ from machinegnostics.magcal.util.logging import get_logger
4
+
5
+
6
+ # regression base class
7
+ class ModelBase(metaclass=ABCMeta):
8
+ """
9
+ Abstract base class for regression models.
10
+
11
+ Abstract Methods:
12
+ ----------------
13
+
14
+ - fit(X, y)
15
+
16
+ - predict(X)
17
+ """
18
+ def __init__(self):
19
+ # logger
20
+ self.logger = get_logger(self.__class__.__name__, logging.INFO) # Create a logger for this class
21
+ self.logger.info(f"ModelBase initialized.")
22
+
23
+ @abstractmethod
24
+ def fit(self, X, y):
25
+ """
26
+ Fit the regression model to the data.
27
+ """
28
+ pass
29
+
30
+ @abstractmethod
31
+ def predict(self, X):
32
+ """
33
+ Predict using the fitted model.
34
+ """
35
+ pass
36
+
37
+ # @abstractmethod
38
+ # def score(self, X, y):
39
+ # """
40
+ # Compute the score of the model.
41
+ # """
42
+ # pass
@@ -0,0 +1,74 @@
1
+ import numpy as np
2
+ from dataclasses import dataclass, asdict, field
3
+ import logging
4
+ from machinegnostics.magcal.util.logging import get_logger
5
+
6
+ @dataclass
7
+ class GenericHistoryRecord:
8
+ # Store all attributes dynamically
9
+ data: dict = field(default_factory=dict)
10
+
11
+ class HistoryBase:
12
+ """
13
+ Generic base class for maintaining a history of model parameters and metrics.
14
+ Can be used for any model by recording arbitrary key-value pairs per iteration.
15
+ """
16
+
17
+ def __init__(self, history: bool = True):
18
+ self._record_history = history
19
+ self.logger = get_logger(self.__class__.__name__, logging.INFO) # Create a logger for this class
20
+ if not isinstance(self._record_history, bool):
21
+ self.logger.error("record_history must be a boolean value.")
22
+ raise ValueError("record_history must be a boolean value.")
23
+ if self._record_history:
24
+ self._history = []
25
+
26
+ self.logger.info(f"HistoryBase initialized")
27
+
28
+ def record_history(self, **kwargs):
29
+ """
30
+ Record the current state of model parameters and metrics.
31
+ Accepts any keyword arguments to store as a record.
32
+ """
33
+ self.logger.info(f"Recording history: {kwargs}")
34
+ if self._record_history:
35
+ # Optionally, you can deepcopy arrays here if needed
36
+ record = GenericHistoryRecord(data={k: (np.copy(v) if isinstance(v, np.ndarray) else v) for k, v in kwargs.items()})
37
+ self._history.append(record)
38
+
39
+ def get_history(self, as_dict=False):
40
+ """
41
+ Retrieve the recorded history.
42
+ If as_dict=True, returns a list of dicts. Otherwise, returns dataclass objects.
43
+ """
44
+ self.logger.info("Retrieving history.")
45
+ if as_dict:
46
+ return [rec.data for rec in self._history]
47
+ return self._history
48
+
49
+ def clear_history(self):
50
+ """Clear the stored history."""
51
+ self.logger.info("Clearing history.")
52
+ self._history = []
53
+
54
+ def prepare_history_for_output(self):
55
+ """
56
+ Prepare the history for output as a dict of lists (for logging or saving).
57
+ """
58
+ self.logger.info("Preparing history for output.")
59
+ if not self._history:
60
+ return {}
61
+ # Collect all unique keys
62
+ all_keys = set()
63
+ for rec in self._history:
64
+ all_keys.update(rec.data.keys())
65
+ output = {k: [] for k in all_keys}
66
+ for rec in self._history:
67
+ for k in all_keys:
68
+ output[k].append(rec.data.get(k, None))
69
+ return output
70
+
71
+ @property
72
+ def history(self):
73
+ """Property to access the recorded history."""
74
+ return self.get_history()
@@ -0,0 +1,238 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import logging
4
+ from machinegnostics.magcal.util.logging import get_logger
5
+ try:
6
+ from pyspark.sql import DataFrame as SparkDataFrame
7
+ except ImportError:
8
+ SparkDataFrame = None
9
+
10
+ class DataProcessLayerBase:
11
+ """
12
+ A class to handle input/output processing for machine learning models.
13
+
14
+ This class provides methods for data type checking, validation, and conversion
15
+ to ensure that input data is in the correct format for model training and prediction.
16
+ """
17
+ def __init__(self, verbose: bool = False, **kwargs):
18
+ """
19
+ Initialize the DataProcessLayer with optional parameters.
20
+
21
+ Parameters
22
+ ----------
23
+ **kwargs : dict
24
+ Additional parameters for configuration.
25
+ """ # To store the type of input for output conversion
26
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
27
+ self.logger.info(f"{self.__class__.__name__} initialized:")
28
+ self.logger.info("DataProcessLayerBase initialized.")
29
+
30
+ def _identify_and_convert(self, data, is_y=False):
31
+ """
32
+ Identify the type of data and convert it to a numpy array.
33
+
34
+ Parameters
35
+ ----------
36
+ data : array-like, pandas DataFrame, or pyspark DataFrame
37
+ Input data to be converted.
38
+ is_y : bool, default=False
39
+ Whether the data is target values (y).
40
+
41
+ Returns
42
+ -------
43
+ np.ndarray
44
+ Converted numpy array.
45
+ """
46
+ self.logger.info(f"Identifying and converting data of type: {type(data)}")
47
+ if isinstance(data, np.ndarray):
48
+ arr = data
49
+ self._input_type = 'numpy'
50
+ elif isinstance(data, pd.DataFrame):
51
+ arr = data.values
52
+ self._input_type = 'pandas'
53
+ elif SparkDataFrame is not None and isinstance(data, SparkDataFrame):
54
+ arr = np.array(data.collect())
55
+ self._input_type = 'spark'
56
+ else:
57
+ arr = np.array(data)
58
+ self._input_type = 'unknown'
59
+
60
+ if is_y:
61
+ arr = np.ravel(arr)
62
+ return arr
63
+
64
+ def _convert_output(self, output, reference_input):
65
+ """
66
+ Convert output numpy array back to the original input format.
67
+
68
+ Parameters
69
+ ----------
70
+ output : np.ndarray
71
+ Output data to be converted.
72
+ reference_input : original input data
73
+ The original input data to infer the output format.
74
+
75
+ Returns
76
+ -------
77
+ Converted output in the original format.
78
+ """
79
+ self.logger.info(f"Converting output to match reference input type: {type(reference_input)}")
80
+ if isinstance(reference_input, np.ndarray):
81
+ return output
82
+ elif isinstance(reference_input, pd.DataFrame):
83
+ return pd.DataFrame(output, index=reference_input.index, columns=getattr(reference_input, 'columns', None))
84
+ elif SparkDataFrame is not None and isinstance(reference_input, SparkDataFrame):
85
+ # For Spark, convert numpy array to pandas DataFrame, then to Spark DataFrame
86
+ import pyspark.sql
87
+ spark = pyspark.sql.SparkSession.builder.getOrCreate()
88
+ pdf = pd.DataFrame(output)
89
+ return spark.createDataFrame(pdf)
90
+ else:
91
+ return output
92
+
93
+ def _check_X(self, X, n_features=None):
94
+ """
95
+ Check if the input X is valid.
96
+
97
+ Parameters
98
+ ----------
99
+ X : array-like
100
+ Input features.
101
+ n_features : int, optional
102
+ Expected number of features.
103
+
104
+ Raises
105
+ ------
106
+ ValueError
107
+ If X is invalid.
108
+ """
109
+ self.logger.info(f"Checking input X of type: {type(X)}")
110
+ X_arr = self._identify_and_convert(X)
111
+
112
+ # if X_qrr is 1 dimensional, reshape it to 2D
113
+ if X_arr.ndim == 1:
114
+ X_arr = X_arr.reshape(-1, 1)
115
+ if X_arr.ndim != 2:
116
+ raise ValueError("X should be a 1D or 2D or nD array-like structure.")
117
+ if n_features is not None and X_arr.shape[1] != n_features:
118
+ raise ValueError(f"X should have {n_features} features, got {X_arr.shape[1]}.")
119
+ if X_arr.shape[0] == 0:
120
+ raise ValueError("X is empty.")
121
+ return X_arr
122
+
123
+ def _check_y(self, y, n_samples=None):
124
+ """
125
+ Check if the target y is valid.
126
+
127
+ Parameters
128
+ ----------
129
+ y : array-like
130
+ Target values.
131
+ n_samples : int, optional
132
+ Expected number of samples.
133
+
134
+ Raises
135
+ ------
136
+ ValueError
137
+ If y is invalid.
138
+ """
139
+ self.logger.info(f"Checking target y of type: {type(y)}")
140
+ y_arr = self._identify_and_convert(y, is_y=True)
141
+ if y_arr.ndim != 1:
142
+ raise ValueError("y should be a 1D array-like structure.")
143
+ if n_samples is not None and y_arr.shape[0] != n_samples:
144
+ raise ValueError(f"y should have {n_samples} samples, got {y_arr.shape[0]}.")
145
+ if y_arr.shape[0] == 0:
146
+ raise ValueError("y is empty.")
147
+ return y_arr
148
+
149
+ def _check_X_predict(self, X, n_features=None):
150
+ """
151
+ Check if the input X for prediction is valid.
152
+
153
+ Parameters
154
+ ----------
155
+ X : array-like
156
+ Input features for prediction.
157
+ n_features : int, optional
158
+ Expected number of features.
159
+
160
+ Raises
161
+ ------
162
+ ValueError
163
+ If X is invalid.
164
+ """
165
+ self.logger.info(f"Checking input X for prediction of type: {type(X)}")
166
+ X = self._check_X(X, n_features=n_features)
167
+ # # output type
168
+ # if self._input_type is None:
169
+ # self._input_type = 'numpy'
170
+ # elif self._input_type == 'pandas':
171
+ # X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])
172
+ # elif self._input_type == 'spark':
173
+ # import pyspark.sql
174
+ # spark = pyspark.sql.SparkSession.builder.getOrCreate()
175
+ # X = spark.createDataFrame(X, schema=[f'feature_{i}' for i in range(X.shape[1])])
176
+ # elif self._input_type == 'unknown':
177
+ # raise ValueError("Unknown input type. Please provide a valid input format.")
178
+ return X
179
+
180
+ def _fit_io(self, X, y):
181
+ """
182
+ Fit the model to the provided data after checking and verifying inputs.
183
+
184
+ Parameters
185
+ ----------
186
+ X : array-like of shape (n_samples, n_features)
187
+ Input features for training the model.
188
+ y : array-like of shape (n_samples,)
189
+ Target values corresponding to the input features.
190
+
191
+ Returns
192
+ -------
193
+ self : object
194
+ Returns self.
195
+ """
196
+ self.logger.info("Starting fit input/output processing.")
197
+ X_checked = self._check_X(X)
198
+ y_checked = self._check_y(y, n_samples=X_checked.shape[0])
199
+ return X_checked, y_checked
200
+
201
+ def _predict_io(self, X):
202
+ """
203
+ Predict using the model after checking and verifying inputs.
204
+
205
+ Parameters
206
+ ----------
207
+ X : array-like of shape (n_samples, n_features)
208
+ Input features for prediction.
209
+
210
+ Returns
211
+ -------
212
+ y_pred : array-like of shape (n_samples,)
213
+ Predicted values.
214
+ """
215
+ self.logger.info("Starting predict input/output processing.")
216
+ X_checked = self._check_X_predict(X)
217
+ return X_checked
218
+
219
+ def _score_io(self, X, y):
220
+ """
221
+ Return the score of the model after checking and verifying inputs.
222
+
223
+ Parameters
224
+ ----------
225
+ X : array-like of shape (n_samples, n_features)
226
+ Test samples.
227
+ y : array-like of shape (n_samples,)
228
+ True values for X.
229
+
230
+ Returns
231
+ -------
232
+ score : float
233
+ Score of the model.
234
+ """
235
+ self.logger.info("Starting score input/output processing.")
236
+ X_checked = self._check_X_predict(X)
237
+ y_checked = self._check_y(y, n_samples=X_checked.shape[0])
238
+ return self