machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,197 @@
1
+ '''
2
+ Gnostic - Homoscedasticity and Heteroscedasticity
3
+
4
+ This module to check for homoscedasticity and heteroscedasticity in data.
5
+
6
+ Author: Nirmal Parmar
7
+ Machine Gnostics
8
+ '''
9
+ import numpy as np
10
+ import logging
11
+ from machinegnostics.magcal.util.logging import get_logger
12
+
13
+ class DataScedasticity:
14
+ """
15
+ Gnostic Scedasticity Test for Homoscedasticity and Heteroscedasticity
16
+
17
+ This class provides a method to check for homoscedasticity and heteroscedasticity in data,
18
+ inspired by fundamental principles rather than standard statistical tests. Unlike classical
19
+ approaches, this implementation uses gnostic variance and gnostic linear regression, which are
20
+ based on the Machine Gnostics framework.
21
+
22
+ Key Differences from Standard Methods:
23
+ - **Variance Calculation:** The variance used here is the gnostic variance, which may differ in
24
+ definition and properties from classical statistical variance. It is designed to capture
25
+ uncertainty and spread in a way that aligns with gnostic principles.
26
+ - **Regression Model:** The linear regression model employed is a gnostic linear regression,
27
+ not the standard least squares regression. This model is tailored to the gnostic approach and
28
+ may use different loss functions, optimization criteria, or regularization.
29
+ - **Test Philosophy:** This is not a formal statistical test (such as Breusch-Pagan or White's test),
30
+ but rather a diagnostic inspired by the fundamentals of the gnostic framework. The method splits
31
+ residuals based on the median of the independent variable and compares the gnostic variances of
32
+ the squared residuals in each half.
33
+
34
+ Usage:
35
+ 1. Initialize the class with desired gnostic regression parameters.
36
+ 2. Call `fit(x, y)` with your data.
37
+ 3. Check the `is_homoscedastic` attribute or returned value to determine if the data is
38
+ homoscedastic (equal gnostic variance across splits) or heteroscedastic.
39
+
40
+ Attributes:
41
+ x (np.ndarray): Independent variable data.
42
+ y (np.ndarray): Dependent variable data.
43
+ model (LinearRegressor): Gnostic linear regression model.
44
+ residuals (np.ndarray): Residuals from the fitted model.
45
+ params (dict): Stores calculated variances and variance ratio.
46
+ variance_ratio (float): Ratio of gnostic variances between data splits.
47
+ is_homoscedastic (bool): True if data is homoscedastic under gnostic test, else False.
48
+
49
+ Example:
50
+ >>> import numpy as np
51
+ >>> from machinegnostics.magcal import DataScedasticity
52
+ >>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
53
+ >>> y = np.array([2.1, 4.2, 6.1, 8.3, 10.2, 12.1, 14.2, 16.1, 18.2, 20.1])
54
+ >>> sced = DataScedasticity()
55
+ >>> is_homo = sced.fit(x, y)
56
+ >>> print(f"Is data homoscedastic? {is_homo}")
57
+ >>> print(f"Variance ratio: {sced.variance_ratio}")
58
+
59
+ Note:
60
+ This class is intended for users interested in gnostic data analysis. Results and interpretations
61
+ may not align with classical statistical methods. For more details on gnostic variance and regression,
62
+ refer to the Machine Gnostics documentation.
63
+ """
64
+
65
+ def __init__(self,
66
+ scale: str | int | float = 'auto',
67
+ max_iter: int = 100,
68
+ tol: float = 0.001,
69
+ mg_loss: str = 'hi',
70
+ early_stopping: bool = True,
71
+ verbose: bool = False,
72
+ data_form: str = 'a',
73
+ gnostic_characteristics: bool = True,
74
+ history: bool = True):
75
+
76
+ from machinegnostics.models.regression import LinearRegressor
77
+ self.x = None
78
+ self.y = None
79
+ self.model = LinearRegressor(scale=scale,
80
+ max_iter=max_iter,
81
+ tol=tol,
82
+ mg_loss=mg_loss,
83
+ early_stopping=early_stopping,
84
+ verbose=verbose,
85
+ data_form=data_form,
86
+ gnostic_characteristics=gnostic_characteristics,
87
+ history=history)
88
+ self.residuals = None
89
+ self.params = {}
90
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
91
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
92
+
93
+
94
+ def _split_residuals(self):
95
+ """
96
+ Split residuals into two halves based on the median of x. zip x and residuals.
97
+ sorted(zip(x, residuals))
98
+ """
99
+ self.logger.info("Splitting residuals based on median of x.")
100
+ median_x = np.median(self.x)
101
+ left_half = [(xi, ri) for xi, ri in zip(self.x, self.residuals) if xi <= median_x]
102
+ right_half = [(xi, ri) for xi, ri in zip(self.x, self.residuals) if xi > median_x]
103
+ return left_half, right_half
104
+
105
+ def _variance_ratio(self):
106
+ """
107
+ Calculate the variance ratio of the squared residuals in the two halves.
108
+
109
+ Returns:
110
+ float: Variance ratio of the squared residuals.
111
+ """
112
+ from machinegnostics import variance
113
+ self.logger.info("Calculating variance ratio.")
114
+ left_half, right_half = self._split_residuals()
115
+ left_residuals = np.array([ri for xi, ri in left_half])
116
+ right_residuals = np.array([ri for xi, ri in right_half])
117
+ var_left = variance(left_residuals ** 2)
118
+ var_right = variance(right_residuals ** 2)
119
+
120
+ self.logger.debug(f"Left variance: {var_left}, Right variance: {var_right}")
121
+ # cap values between [1, 1e-9]
122
+ var_left = float(var_left)
123
+ var_right = float(np.maximum(var_right, 1e-9)) # to avoid division by zero
124
+ if var_right == 0 and var_left == 0:
125
+ variance_ratio = 1.0
126
+ elif var_right == 0:
127
+ variance_ratio = np.inf
128
+ else:
129
+ variance_ratio = var_left / var_right
130
+
131
+ # params
132
+ self.logger.info(f"Variance ratio calculated: {variance_ratio}")
133
+ self.params['var_left'] = var_left
134
+ self.params['var_right'] = var_right
135
+ self.params['variance_ratio'] = variance_ratio
136
+ return variance_ratio
137
+
138
+
139
+ def _is_homoscedastic(self, threshold: float = 0.001):
140
+ """
141
+ Check if the data is homoscedastic based on the variance ratio.
142
+
143
+ Args:
144
+ threshold (float): Threshold to determine homoscedasticity.
145
+
146
+ Returns:
147
+ bool: True if homoscedastic, False if heteroscedastic.
148
+ """
149
+ if self.variance_ratio is None:
150
+ self.logger.error("Variance ratio not calculated. Please run fit() first.")
151
+ raise ValueError("Variance ratio not calculated. Please run fit() first.")
152
+ return abs(self.variance_ratio - 1) < threshold
153
+
154
+ def fit(self, x: np.ndarray, y: np.ndarray) -> bool:
155
+ """
156
+ Fit the gnostic linear regression model to the data and assess scedasticity.
157
+
158
+ This method fits the gnostic linear regression model to the provided data, computes the residuals,
159
+ and evaluates homoscedasticity or heteroscedasticity using the gnostic variance approach. Unlike
160
+ standard statistical tests, this method uses gnostic variance and gnostic regression, which are
161
+ based on the Machine Gnostics framework and may yield different results from classical methods.
162
+
163
+ The method splits the data based on the median of the independent variable, calculates the gnostic
164
+ variance of squared residuals in each half, and determines if the data is homoscedastic (equal
165
+ gnostic variance) or heteroscedastic.
166
+
167
+ Args:
168
+ x (np.ndarray): Independent variable data.
169
+ y (np.ndarray): Dependent variable data.
170
+
171
+ Returns:
172
+ bool: True if data is homoscedastic under the gnostic test, False if heteroscedastic.
173
+
174
+ Note:
175
+ This is not a standard statistical test. For details on the gnostic approach, see the
176
+ Machine Gnostics documentation.
177
+ """
178
+ self.logger.info("Fitting DataScedasticity model...")
179
+ self.x = x
180
+ self.y = y
181
+
182
+ self.logger.info("Fitting gnostic regression model.")
183
+ self.model.fit(x, y)
184
+ self.logger.debug(f"Model calculations complete.")
185
+
186
+ self.logger.info("Calculating residuals.")
187
+ self.residuals = y - self.model.predict(x)
188
+
189
+ # calculate variance ratio
190
+ self.logger.info("Calculating variance ratio.")
191
+ self.variance_ratio = self._variance_ratio()
192
+
193
+ # check
194
+ self.logger.info("Checking homoscedasticity.")
195
+ self.is_homoscedastic = self._is_homoscedastic()
196
+ self.logger.info(f"Homoscedasticity check result - is_homoscedastic: {self.is_homoscedastic}")
197
+ return self.is_homoscedastic
@@ -0,0 +1,181 @@
1
+ from machinegnostics.magcal.util.logging import get_logger
2
+ import numpy as np
3
+ import logging
4
+
5
+ class WEDF:
6
+ """
7
+ Weighted Empirical Distribution Function (WEDF)
8
+
9
+ This class implements the WEDF that accounts for data weights, which is useful
10
+ when dealing with repeated values or data points of varying importance.
11
+ """
12
+
13
+ def __init__(self, data, weights=None, data_lb=None, data_ub=None, verbose=False):
14
+ """
15
+ Initialize the WEDF with data points and optional weights.
16
+
17
+ Parameters
18
+ ----------
19
+ data : array-like
20
+ Input data values
21
+ weights : array-like, optional
22
+ A priori weights for each data point. If None, equal weights are assigned.
23
+ data_lb : float, optional
24
+ Lower bound for the data range
25
+ data_ub : float, optional
26
+ Upper bound for the data range
27
+ verbose : bool, optional
28
+ If True, set logging level to DEBUG. Default is False.
29
+ """
30
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
31
+ self.logger.debug(f"{self.__class__.__name__} initialized with parameters: %s", self.__dict__)
32
+
33
+ # Convert inputs to numpy arrays and sort data
34
+ self.data = np.asarray(data)
35
+ if data_lb is None:
36
+ self.data_lb = np.min(self.data)
37
+ else:
38
+ self.data_lb = data_lb
39
+ if data_ub is None:
40
+ self.data_ub = np.max(self.data)
41
+ else:
42
+ self.data_ub = data_ub
43
+ if self.data_lb >= self.data_ub:
44
+ self.logger.info("data_lb must be less than data_ub")
45
+ if self.data.size == 0:
46
+ self.logger.error("data must contain at least one element")
47
+ raise ValueError("data must contain at least one element")
48
+ if not np.issubdtype(self.data.dtype, np.number):
49
+ self.logger.error("data must be numeric")
50
+ raise ValueError("data must be numeric")
51
+
52
+ # Sort data and corresponding weights
53
+ sort_idx = np.argsort(self.data)
54
+ self.data = self.data[sort_idx]
55
+
56
+ if weights is None:
57
+ # Equal weights if none provided
58
+ self.weights = np.ones_like(self.data)
59
+ else:
60
+ weights = np.asarray(weights)
61
+ self.weights = weights[sort_idx]
62
+
63
+ # Normalize weights
64
+ self.normalized_weights = self.weights / np.sum(self.weights)
65
+
66
+ # Calculate WEDF values
67
+ self._calculate_wedf()
68
+
69
+ def _calculate_wedf(self):
70
+ """Calculate the WEDF values at each data point."""
71
+ n = len(self.data)
72
+ self.wedf_values = np.zeros(n)
73
+
74
+ # First value
75
+ self.wedf_values[0] = self.normalized_weights[0] / 2
76
+
77
+ # Remaining values using recursive relation
78
+ for k in range(1, n):
79
+ self.wedf_values[k] = (self.wedf_values[k-1] +
80
+ (self.normalized_weights[k-1] + self.normalized_weights[k]) / 2)
81
+
82
+ def fit(self, z):
83
+ """
84
+ Fit the WEDF at given points.
85
+
86
+ Parameters
87
+ ----------
88
+ z : float or array-like
89
+ Points at which to fit the WEDF
90
+
91
+ Returns
92
+ -------
93
+ float or ndarray
94
+ WEDF values at the given points
95
+ """
96
+ self.logger.info("Fitting WEDF at given points.")
97
+ z = np.asarray(z)
98
+ single_value = z.ndim == 0
99
+
100
+ if single_value:
101
+ z = np.array([z])
102
+
103
+ result = np.zeros_like(z, dtype=float)
104
+
105
+ for i, point in enumerate(z):
106
+ if point <= self.data[0]:
107
+ result[i] = 0.0
108
+ elif point >= self.data[-1]:
109
+ result[i] = 1.0
110
+ else:
111
+ # Find the index of the largest data point less than z
112
+ idx = np.searchsorted(self.data, point) - 1
113
+ result[i] = self.wedf_values[idx]
114
+
115
+ self.logger.info("WEDF fitting completed.")
116
+ return result[0] if single_value else result
117
+
118
+ def plot(self, ax=None):
119
+ """
120
+ Plot the WEDF.
121
+
122
+ Parameters
123
+ ----------
124
+ ax : matplotlib.axes.Axes, optional
125
+ Axes to plot on. If None, a new figure and axes are created.
126
+
127
+ Returns
128
+ -------
129
+ matplotlib.axes.Axes
130
+ The axes containing the plot
131
+ """
132
+ try:
133
+ import matplotlib.pyplot as plt
134
+ if ax is None:
135
+ fig, ax = plt.subplots()
136
+
137
+ # Create a step function representation
138
+ x = np.repeat(self.data, 2)[1:]
139
+ y = np.repeat(self.wedf_values, 2)[:-1]
140
+
141
+ # Add endpoints for proper step function
142
+ x = np.concatenate([[self.data[0]], x, [self.data[-1]]])
143
+ y = np.concatenate([[0], y, [1]])
144
+
145
+ ax.plot(x, y, 'b-', label='WEDF')
146
+ ax.set_xlabel('Data Value')
147
+ ax.set_ylabel('Cumulative Probability')
148
+ ax.set_title('Weighted Empirical Distribution Function')
149
+ ax.grid(True)
150
+ return ax
151
+
152
+ except ImportError:
153
+ self.logger.warning("Matplotlib is required for plotting.")
154
+ return None
155
+
156
+ def generate_ks_points(self, num_points=None):
157
+ """
158
+ Generate Kolmogorov-Smirnov points for distribution fitting.
159
+
160
+ Parameters
161
+ ----------
162
+ num_points : int, optional
163
+ Number of K-S points to generate. If None, uses the length of the data.
164
+ Returns
165
+ -------
166
+ Z0 : ndarray
167
+ Generated K-S points
168
+ ks_probs : ndarray
169
+ Corresponding probabilities for the K-S points
170
+ """
171
+ # Use data length if not specified
172
+ L = num_points if num_points is not None else len(self.data)
173
+
174
+ # Generate K-S probabilities
175
+ ks_probs = np.arange(1, 2*L, 2) / (2*L)
176
+
177
+ # Generate corresponding points
178
+ data_range = self.data_ub - self.data_lb
179
+ Z0 = self.data_lb + data_range * ks_probs
180
+
181
+ return Z0, ks_probs