machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
'''
|
|
2
|
+
DataMembership
|
|
3
|
+
|
|
4
|
+
- Membership test: "Is a value Zξ a potential member of the given sample Z?" In other words: "Will the homogeneous sample Z remain homogeneous after extension by Zξ"?
|
|
5
|
+
- This only works with EGDF
|
|
6
|
+
- logic process:
|
|
7
|
+
1. Check if the sample Z is homogeneous using DataHomogeneity. For that first look into egdf.params['is_homogeneous']. If not present, run DataHomogeneity on Z.
|
|
8
|
+
2. If Z is homogeneous, extend egdf.data sample with Zξ in range of [lb, ub] and check if the extended sample remains homogeneous using DataHomogeneity.
|
|
9
|
+
3. We need to find two bounds, lower sample bound LSB and upper sample bound USB. for LSB search range is [LB, DLB] and for USB search range is [DUB, UB]. where DL is the data limit (min and max of Z). LB and UB are the lower and upper bounds of the data universe.
|
|
10
|
+
4. need to find minimum and maximum values of Zξ that keeps the extended sample homogeneous.
|
|
11
|
+
|
|
12
|
+
'''
|
|
13
|
+
import logging
|
|
14
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
15
|
+
import numpy as np
|
|
16
|
+
import matplotlib.pyplot as plt
|
|
17
|
+
from typing import Dict, Any, Tuple, Optional
|
|
18
|
+
from machinegnostics.magcal.gdf.egdf import EGDF
|
|
19
|
+
from machinegnostics.magcal.gdf.homogeneity import DataHomogeneity
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataMembership:
|
|
23
|
+
"""
|
|
24
|
+
DataMembership
|
|
25
|
+
|
|
26
|
+
This class provides functionality to test whether a given value can be considered a member of a homogeneous data sample. It uses the EGDF (Empirical Generalized Distribution Function) framework to determine the homogeneity of the data sample and to calculate the bounds within which new data points can be added without disrupting the homogeneity.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
egdf (EGDF): An instance of the EGDF class containing the data sample and its parameters.
|
|
30
|
+
verbose (bool): If True, detailed logs are printed during execution.
|
|
31
|
+
catch (bool): If True, errors and warnings are stored in the `params` attribute.
|
|
32
|
+
tolerance (float): The tolerance level for numerical calculations.
|
|
33
|
+
max_iterations (int): The maximum number of iterations for bound search.
|
|
34
|
+
initial_step_factor (float): The initial step size factor for adaptive bound search.
|
|
35
|
+
LSB (float): The calculated Lower Sample Bound (LSB).
|
|
36
|
+
USB (float): The calculated Upper Sample Bound (USB).
|
|
37
|
+
is_homogeneous (bool): Indicates whether the original data sample is homogeneous.
|
|
38
|
+
params (dict): Stores results, errors, warnings, and other parameters.
|
|
39
|
+
_fitted (bool): Indicates whether the membership analysis has been completed.
|
|
40
|
+
|
|
41
|
+
Methods:
|
|
42
|
+
fit():
|
|
43
|
+
Performs the membership analysis to determine the LSB and USB.
|
|
44
|
+
Returns a tuple of (LSB, USB).
|
|
45
|
+
|
|
46
|
+
plot(plot_smooth=True, plot='both', bounds=True, figsize=(12, 8)):
|
|
47
|
+
Generates a plot of the EGDF and PDF with membership bounds and other relevant information.
|
|
48
|
+
|
|
49
|
+
results():
|
|
50
|
+
Returns the analysis results stored in the `params` attribute.
|
|
51
|
+
|
|
52
|
+
fitted:
|
|
53
|
+
A property that indicates whether the membership analysis has been completed.
|
|
54
|
+
|
|
55
|
+
Usage:
|
|
56
|
+
>>> egdf_instance = EGDF(...)
|
|
57
|
+
>>> membership = DataMembership(egdf_instance)
|
|
58
|
+
>>> membership.fit()
|
|
59
|
+
>>> membership.plot()
|
|
60
|
+
>>> results = membership.results()
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> from machinegnostics.magcal import EGDF, DataMembership
|
|
64
|
+
>>> egdf_instance = EGDF(data=[1.2, 1.5, 1.7, 1.9], S=2.0)
|
|
65
|
+
>>> egdf_instance.fit()
|
|
66
|
+
>>> membership = DataMembership(egdf_instance, verbose=True)
|
|
67
|
+
>>> lsb, usb = membership.fit()
|
|
68
|
+
>>> print(f"Lower Bound: {lsb}, Upper Bound: {usb}")
|
|
69
|
+
>>> membership.plot()
|
|
70
|
+
>>> results = membership.results()
|
|
71
|
+
>>> print(results)
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self,
|
|
75
|
+
egdf: EGDF,
|
|
76
|
+
verbose: bool = True,
|
|
77
|
+
catch: bool = True,
|
|
78
|
+
tolerance: float = 1e-3,
|
|
79
|
+
max_iterations: int = 100,
|
|
80
|
+
initial_step_factor: float = 0.001):
|
|
81
|
+
|
|
82
|
+
self.egdf = egdf
|
|
83
|
+
self.verbose = verbose
|
|
84
|
+
self.catch = catch
|
|
85
|
+
self.tolerance = tolerance
|
|
86
|
+
self.max_iterations = max_iterations
|
|
87
|
+
self.initial_step_factor = initial_step_factor
|
|
88
|
+
|
|
89
|
+
# Set up logger
|
|
90
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
91
|
+
self.logger.debug(f"{self.__class__.__name__} initialized: ")
|
|
92
|
+
|
|
93
|
+
# Validate EGDF object
|
|
94
|
+
self._validate_egdf()
|
|
95
|
+
|
|
96
|
+
self.LSB = None
|
|
97
|
+
self.USB = None
|
|
98
|
+
self.is_homogeneous = None
|
|
99
|
+
self._fitted = False
|
|
100
|
+
self.params = {}
|
|
101
|
+
|
|
102
|
+
if self.catch:
|
|
103
|
+
self.params['errors'] = []
|
|
104
|
+
self.params['warnings'] = []
|
|
105
|
+
|
|
106
|
+
def _validate_egdf(self):
|
|
107
|
+
self.logger.debug("Validating EGDF object for DataMembership analysis")
|
|
108
|
+
if not hasattr(self.egdf, '__class__'):
|
|
109
|
+
self.logger.error("Input must be an EGDF object")
|
|
110
|
+
raise ValueError("Input must be an EGDF object")
|
|
111
|
+
|
|
112
|
+
class_name = self.egdf.__class__.__name__
|
|
113
|
+
if 'EGDF' not in class_name:
|
|
114
|
+
self.logger.error(f"Only EGDF objects are supported. Got {class_name}")
|
|
115
|
+
raise ValueError(f"Only EGDF objects are supported. Got {class_name}")
|
|
116
|
+
|
|
117
|
+
if not hasattr(self.egdf, '_fitted') or not self.egdf._fitted:
|
|
118
|
+
self.logger.error("EGDF object must be fitted before membership analysis")
|
|
119
|
+
raise ValueError("EGDF object must be fitted before membership analysis")
|
|
120
|
+
|
|
121
|
+
if not hasattr(self.egdf, 'data') or self.egdf.data is None:
|
|
122
|
+
self.logger.error("EGDF object must contain data")
|
|
123
|
+
raise ValueError("EGDF object must contain data")
|
|
124
|
+
|
|
125
|
+
def _append_error(self, error_message: str, exception_type: str = None):
|
|
126
|
+
self.logger.error(error_message)
|
|
127
|
+
if self.catch:
|
|
128
|
+
error_entry = {
|
|
129
|
+
'method': 'DataMembership',
|
|
130
|
+
'error': error_message,
|
|
131
|
+
'exception_type': exception_type or 'DataMembershipError'
|
|
132
|
+
}
|
|
133
|
+
self.params['errors'].append(error_entry)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _append_warning(self, warning_message: str):
|
|
137
|
+
self.logger.warning(warning_message)
|
|
138
|
+
if self.catch:
|
|
139
|
+
warning_entry = {
|
|
140
|
+
'method': 'DataMembership',
|
|
141
|
+
'warning': warning_message
|
|
142
|
+
}
|
|
143
|
+
self.params['warnings'].append(warning_entry)
|
|
144
|
+
|
|
145
|
+
def _check_original_homogeneity(self) -> bool:
|
|
146
|
+
self.logger.info("Checking original sample homogeneity")
|
|
147
|
+
|
|
148
|
+
if (hasattr(self.egdf, 'params') and
|
|
149
|
+
self.egdf.params and
|
|
150
|
+
'is_homogeneous' in self.egdf.params):
|
|
151
|
+
|
|
152
|
+
is_homogeneous = self.egdf.params['is_homogeneous']
|
|
153
|
+
self.logger.info(f"Found existing homogeneity result: {is_homogeneous}")
|
|
154
|
+
return is_homogeneous
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
self.logger.info("Running DataHomogeneity analysis...")
|
|
158
|
+
homogeneity = DataHomogeneity(
|
|
159
|
+
gdf=self.egdf,
|
|
160
|
+
verbose=self.verbose,
|
|
161
|
+
catch=self.catch
|
|
162
|
+
)
|
|
163
|
+
is_homogeneous = homogeneity.fit()
|
|
164
|
+
|
|
165
|
+
self.logger.info(f"Homogeneity analysis result: {is_homogeneous}")
|
|
166
|
+
|
|
167
|
+
return is_homogeneous
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
error_msg = f"Error in homogeneity check: {str(e)}"
|
|
171
|
+
self._append_error(error_msg, type(e).__name__)
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
def _test_membership_at_point(self, test_point: float) -> bool:
|
|
175
|
+
self.logger.debug(f"Testing membership at point: {test_point:.6f}")
|
|
176
|
+
try:
|
|
177
|
+
extended_data = np.append(self.egdf.data, test_point)
|
|
178
|
+
|
|
179
|
+
extended_egdf = EGDF(S=self.egdf.S,
|
|
180
|
+
verbose=False,
|
|
181
|
+
catch=True,
|
|
182
|
+
flush=True,
|
|
183
|
+
z0_optimize=self.egdf.z0_optimize,
|
|
184
|
+
tolerance=self.egdf.tolerance,
|
|
185
|
+
data_form=self.egdf.data_form,
|
|
186
|
+
n_points=self.egdf.n_points,
|
|
187
|
+
homogeneous=self.egdf.homogeneous,
|
|
188
|
+
opt_method=self.egdf.opt_method,
|
|
189
|
+
max_data_size=self.egdf.max_data_size,
|
|
190
|
+
wedf=self.egdf.wedf,
|
|
191
|
+
weights=None)
|
|
192
|
+
extended_egdf.fit(data=extended_data, plot=False)
|
|
193
|
+
|
|
194
|
+
homogeneity = DataHomogeneity(
|
|
195
|
+
gdf=extended_egdf,
|
|
196
|
+
verbose=False,
|
|
197
|
+
catch=True
|
|
198
|
+
)
|
|
199
|
+
is_homogeneous = homogeneity.fit()
|
|
200
|
+
|
|
201
|
+
return is_homogeneous
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
self.logger.error(f"Error testing point {test_point:.6f}: {str(e)}")
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
def _calculate_adaptive_step(self, data_range: float, iteration: int) -> float:
|
|
208
|
+
self.logger.debug(f"Calculating adaptive step size at iteration {iteration}")
|
|
209
|
+
base_step = data_range * self.initial_step_factor
|
|
210
|
+
decay_factor = 1.0 / (1.0 + 0.1 * iteration)
|
|
211
|
+
return base_step * decay_factor
|
|
212
|
+
|
|
213
|
+
def _find_sample_bound(self, bound_type: str) -> Optional[float]:
|
|
214
|
+
self.logger.info(f"Finding {bound_type} sample bound")
|
|
215
|
+
if bound_type not in ['lower', 'upper']:
|
|
216
|
+
self.logger.error("Invalid bound_type")
|
|
217
|
+
raise ValueError("bound_type must be either 'lower' or 'upper'")
|
|
218
|
+
|
|
219
|
+
data_range = self.egdf.DUB - self.egdf.DLB
|
|
220
|
+
|
|
221
|
+
if bound_type == 'lower':
|
|
222
|
+
search_start = self.egdf.DLB
|
|
223
|
+
search_end = self.egdf.LB if self.egdf.LB is not None else self.egdf.DLB - data_range
|
|
224
|
+
direction = "LSB"
|
|
225
|
+
move_direction = -1
|
|
226
|
+
else:
|
|
227
|
+
search_start = self.egdf.DUB
|
|
228
|
+
search_end = self.egdf.UB if self.egdf.UB is not None else self.egdf.DUB + data_range
|
|
229
|
+
direction = "USB"
|
|
230
|
+
move_direction = 1
|
|
231
|
+
|
|
232
|
+
self.logger.info(f"Searching for {direction} from {search_start:.6f} towards {search_end:.6f}")
|
|
233
|
+
|
|
234
|
+
# Check if the starting point (data boundary) is homogeneous
|
|
235
|
+
first_test = self._test_membership_at_point(search_start)
|
|
236
|
+
|
|
237
|
+
if not first_test:
|
|
238
|
+
# If data boundary itself is not homogeneous, return the data boundary
|
|
239
|
+
self.logger.info(f"Data boundary {search_start:.6f} is not homogeneous")
|
|
240
|
+
self.logger.info(f"{direction} = {search_start:.6f} (data boundary)")
|
|
241
|
+
return search_start
|
|
242
|
+
|
|
243
|
+
current_point = search_start
|
|
244
|
+
best_bound = search_start
|
|
245
|
+
step_size = self._calculate_adaptive_step(data_range, 0)
|
|
246
|
+
|
|
247
|
+
for iteration in range(self.max_iterations):
|
|
248
|
+
current_point += move_direction * step_size
|
|
249
|
+
|
|
250
|
+
# Check bounds
|
|
251
|
+
if bound_type == 'lower' and current_point <= search_end:
|
|
252
|
+
break
|
|
253
|
+
if bound_type == 'upper' and current_point >= search_end:
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
is_homogeneous = self._test_membership_at_point(current_point)
|
|
257
|
+
|
|
258
|
+
if iteration % 10 == 0:
|
|
259
|
+
self.logger.info(f"{direction} iteration {iteration}: "
|
|
260
|
+
f"testing point {current_point:.6f} (homogeneous: {is_homogeneous})")
|
|
261
|
+
|
|
262
|
+
if is_homogeneous:
|
|
263
|
+
best_bound = current_point
|
|
264
|
+
# Adaptive step size
|
|
265
|
+
step_size = self._calculate_adaptive_step(data_range, iteration)
|
|
266
|
+
else:
|
|
267
|
+
# Found the boundary where homogeneity is lost
|
|
268
|
+
break
|
|
269
|
+
|
|
270
|
+
if best_bound is not None:
|
|
271
|
+
self.logger.info(f"Found {direction} = {best_bound:.6f} after {iteration + 1} iterations")
|
|
272
|
+
else:
|
|
273
|
+
warning_msg = f"Could not find {direction} within search range"
|
|
274
|
+
self._append_warning(warning_msg)
|
|
275
|
+
|
|
276
|
+
return best_bound
|
|
277
|
+
|
|
278
|
+
def fit(self) -> Tuple[Optional[float], Optional[float]]:
|
|
279
|
+
"""
|
|
280
|
+
Performs the membership analysis to determine the Lower Sample Bound (LSB) and Upper Sample Bound (USB).
|
|
281
|
+
|
|
282
|
+
This method checks the homogeneity of the original data sample and calculates the bounds within which new data points can be added without disrupting the homogeneity.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Tuple[Optional[float], Optional[float]]: The calculated LSB and USB values. Returns None for a bound if it cannot be determined.
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
RuntimeError: If the original data sample is not homogeneous.
|
|
289
|
+
Exception: For any other errors encountered during the analysis.
|
|
290
|
+
"""
|
|
291
|
+
self.logger.info("Starting membership analysis...")
|
|
292
|
+
try:
|
|
293
|
+
self.is_homogeneous = self._check_original_homogeneity()
|
|
294
|
+
|
|
295
|
+
if not self.is_homogeneous:
|
|
296
|
+
error_msg = "Original sample is not homogeneous. Membership analysis requires homogeneous data."
|
|
297
|
+
self._append_error(error_msg)
|
|
298
|
+
raise RuntimeError(error_msg)
|
|
299
|
+
|
|
300
|
+
self.logger.info("Original sample is homogeneous. Proceeding with bound search...")
|
|
301
|
+
|
|
302
|
+
self.logger.info("Finding Lower Sample Bound (LSB)...")
|
|
303
|
+
self.LSB = self._find_sample_bound('lower')
|
|
304
|
+
|
|
305
|
+
self.logger.info("Finding Upper Sample Bound (USB)...")
|
|
306
|
+
self.USB = self._find_sample_bound('upper')
|
|
307
|
+
|
|
308
|
+
if self.catch:
|
|
309
|
+
self.params.update({
|
|
310
|
+
'LSB': float(self.LSB) if self.LSB is not None else None,
|
|
311
|
+
'USB': float(self.USB) if self.USB is not None else None,
|
|
312
|
+
'is_homogeneous': self.is_homogeneous,
|
|
313
|
+
'membership_fitted': True,
|
|
314
|
+
'search_parameters': {
|
|
315
|
+
'tolerance': self.tolerance,
|
|
316
|
+
'max_iterations': self.max_iterations,
|
|
317
|
+
'initial_step_factor': self.initial_step_factor
|
|
318
|
+
}
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
if hasattr(self.egdf, 'params') and self.egdf.params:
|
|
322
|
+
self.egdf.params.update({
|
|
323
|
+
'LSB': float(self.LSB) if self.LSB is not None else None,
|
|
324
|
+
'USB': float(self.USB) if self.USB is not None else None,
|
|
325
|
+
'membership_checked': True
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
self.logger.info("Results written to EGDF params dictionary")
|
|
329
|
+
|
|
330
|
+
self._fitted = True
|
|
331
|
+
|
|
332
|
+
self.logger.info("Analysis completed successfully")
|
|
333
|
+
if self.LSB is not None:
|
|
334
|
+
self.logger.info(f"Lower Sample Bound (LSB) = {self.LSB:.6f}")
|
|
335
|
+
if self.USB is not None:
|
|
336
|
+
self.logger.info(f"Upper Sample Bound (USB) = {self.USB:.6f}")
|
|
337
|
+
|
|
338
|
+
return self.LSB, self.USB
|
|
339
|
+
|
|
340
|
+
except Exception as e:
|
|
341
|
+
error_msg = f"Error during membership analysis: {str(e)}"
|
|
342
|
+
self._append_error(error_msg, type(e).__name__)
|
|
343
|
+
raise
|
|
344
|
+
|
|
345
|
+
def plot(self,
|
|
346
|
+
plot_smooth: bool = True,
|
|
347
|
+
plot: str = 'both',
|
|
348
|
+
bounds: bool = True,
|
|
349
|
+
figsize: tuple = (12, 8)):
|
|
350
|
+
"""
|
|
351
|
+
Generates a plot of the EGDF and PDF with membership bounds and other relevant information.
|
|
352
|
+
|
|
353
|
+
Parameters:
|
|
354
|
+
plot_smooth (bool): If True, plots a smoothed version of the EGDF and PDF.
|
|
355
|
+
plot (str): Specifies what to plot. Options are 'gdf', 'pdf', or 'both'.
|
|
356
|
+
bounds (bool): If True, includes data bounds (DLB, DUB, LB, UB) in the plot.
|
|
357
|
+
figsize (tuple): The size of the plot figure.
|
|
358
|
+
|
|
359
|
+
Raises:
|
|
360
|
+
RuntimeError: If the `fit` method has not been called before plotting.
|
|
361
|
+
Exception: For any errors encountered during plotting.
|
|
362
|
+
"""
|
|
363
|
+
self.logger.info("Generating membership plot...")
|
|
364
|
+
|
|
365
|
+
if not self._fitted:
|
|
366
|
+
self.logger.error("Must call fit() before plotting")
|
|
367
|
+
raise RuntimeError("Must call fit() before plotting")
|
|
368
|
+
|
|
369
|
+
if not self.egdf.catch:
|
|
370
|
+
self.logger.warning("Plot is not available with EGDF catch=False")
|
|
371
|
+
return
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
import matplotlib.pyplot as plt
|
|
375
|
+
|
|
376
|
+
# Create a fresh figure
|
|
377
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
378
|
+
|
|
379
|
+
# Get EGDF data
|
|
380
|
+
x_points = self.egdf.data
|
|
381
|
+
egdf_data = self.egdf.params.get('egdf')
|
|
382
|
+
pdf_data = self.egdf.params.get('pdf')
|
|
383
|
+
|
|
384
|
+
# Debug info
|
|
385
|
+
self.logger.info(f"LSB = {self.LSB}, USB = {self.USB}")
|
|
386
|
+
self.logger.info(f"Data range: {x_points.min():.3f} to {x_points.max():.3f}")
|
|
387
|
+
|
|
388
|
+
# Plot EGDF if requested
|
|
389
|
+
if plot in ['gdf', 'both'] and egdf_data is not None:
|
|
390
|
+
# Plot EGDF points
|
|
391
|
+
ax1.plot(x_points, egdf_data, 'o', color='blue', label='EGDF', markersize=4)
|
|
392
|
+
|
|
393
|
+
# Plot smooth EGDF if available
|
|
394
|
+
if (plot_smooth and hasattr(self.egdf, 'di_points_n') and
|
|
395
|
+
hasattr(self.egdf, 'egdf_points') and
|
|
396
|
+
self.egdf.di_points_n is not None and
|
|
397
|
+
self.egdf.egdf_points is not None):
|
|
398
|
+
ax1.plot(self.egdf.di_points_n, self.egdf.egdf_points,
|
|
399
|
+
color='blue', linestyle='-', linewidth=2, alpha=0.8)
|
|
400
|
+
|
|
401
|
+
ax1.set_ylabel('EGDF', color='blue')
|
|
402
|
+
ax1.tick_params(axis='y', labelcolor='blue')
|
|
403
|
+
ax1.set_ylim(0, 1)
|
|
404
|
+
|
|
405
|
+
# Plot PDF if requested
|
|
406
|
+
if plot in ['pdf', 'both'] and pdf_data is not None:
|
|
407
|
+
if plot == 'pdf':
|
|
408
|
+
# PDF only plot
|
|
409
|
+
ax1.plot(x_points, pdf_data, 'o', color='red', label='PDF', markersize=4)
|
|
410
|
+
if (plot_smooth and hasattr(self.egdf, 'di_points_n') and
|
|
411
|
+
hasattr(self.egdf, 'pdf_points') and
|
|
412
|
+
self.egdf.di_points_n is not None and
|
|
413
|
+
self.egdf.pdf_points is not None):
|
|
414
|
+
ax1.plot(self.egdf.di_points_n, self.egdf.pdf_points,
|
|
415
|
+
color='red', linestyle='-', linewidth=2, alpha=0.8)
|
|
416
|
+
ax1.set_ylabel('PDF', color='red')
|
|
417
|
+
ax1.tick_params(axis='y', labelcolor='red')
|
|
418
|
+
max_pdf = np.max(pdf_data)
|
|
419
|
+
ax1.set_ylim(0, max_pdf * 1.1)
|
|
420
|
+
else:
|
|
421
|
+
# Both EGDF and PDF - create second y-axis
|
|
422
|
+
ax2 = ax1.twinx()
|
|
423
|
+
ax2.plot(x_points, pdf_data, 'o', color='red', label='PDF', markersize=4)
|
|
424
|
+
if (plot_smooth and hasattr(self.egdf, 'di_points_n') and
|
|
425
|
+
hasattr(self.egdf, 'pdf_points') and
|
|
426
|
+
self.egdf.di_points_n is not None and
|
|
427
|
+
self.egdf.pdf_points is not None):
|
|
428
|
+
ax2.plot(self.egdf.di_points_n, self.egdf.pdf_points,
|
|
429
|
+
color='red', linestyle='-', linewidth=2, alpha=0.8)
|
|
430
|
+
ax2.set_ylabel('PDF', color='red')
|
|
431
|
+
ax2.tick_params(axis='y', labelcolor='red')
|
|
432
|
+
max_pdf = np.max(pdf_data)
|
|
433
|
+
ax2.set_ylim(0, max_pdf * 1.1)
|
|
434
|
+
ax2.legend(loc='upper right')
|
|
435
|
+
|
|
436
|
+
# Add LSB vertical line
|
|
437
|
+
if self.LSB is not None:
|
|
438
|
+
ax1.axvline(x=self.LSB, color='red', linestyle='--', linewidth=1.5,
|
|
439
|
+
alpha=0.9, label=f'LSB = {self.LSB:.3f}', zorder=10)
|
|
440
|
+
self.logger.info(f"Added LSB line at {self.LSB}")
|
|
441
|
+
|
|
442
|
+
# Add USB vertical line
|
|
443
|
+
if self.USB is not None:
|
|
444
|
+
ax1.axvline(x=self.USB, color='blue', linestyle='--', linewidth=1.5,
|
|
445
|
+
alpha=0.9, label=f'USB = {self.USB:.3f}', zorder=10)
|
|
446
|
+
self.logger.info(f"Added USB line at {self.USB}")
|
|
447
|
+
|
|
448
|
+
# Add membership range shading if both bounds exist
|
|
449
|
+
if self.LSB is not None and self.USB is not None:
|
|
450
|
+
ax1.axvspan(self.LSB, self.USB, alpha=0.05, color='green',
|
|
451
|
+
label='Membership Range', zorder=1)
|
|
452
|
+
self.logger.info("Added membership range shading")
|
|
453
|
+
|
|
454
|
+
# Add bounds if requested
|
|
455
|
+
if bounds:
|
|
456
|
+
bound_info = [
|
|
457
|
+
(self.egdf.params.get('DLB'), 'green', '-', 'DLB'),
|
|
458
|
+
(self.egdf.params.get('DUB'), 'orange', '-', 'DUB'),
|
|
459
|
+
(self.egdf.params.get('LB'), 'purple', '--', 'LB'),
|
|
460
|
+
(self.egdf.params.get('UB'), 'brown', '--', 'UB')
|
|
461
|
+
]
|
|
462
|
+
|
|
463
|
+
for bound, color, style, name in bound_info:
|
|
464
|
+
if bound is not None:
|
|
465
|
+
ax1.axvline(x=bound, color=color, linestyle=style, linewidth=2,
|
|
466
|
+
alpha=0.8, label=f"{name}={bound:.3f}")
|
|
467
|
+
|
|
468
|
+
# Add Z0 if available
|
|
469
|
+
if hasattr(self.egdf, 'z0') and self.egdf.z0 is not None:
|
|
470
|
+
ax1.axvline(x=self.egdf.z0, color='magenta', linestyle='-.', linewidth=1,
|
|
471
|
+
alpha=0.8, label=f'Z0={self.egdf.z0:.3f}')
|
|
472
|
+
|
|
473
|
+
# Set formatting
|
|
474
|
+
ax1.set_xlabel('Data Points')
|
|
475
|
+
ax1.grid(True, alpha=0.3)
|
|
476
|
+
|
|
477
|
+
# Set title
|
|
478
|
+
membership_info = []
|
|
479
|
+
if self.LSB is not None:
|
|
480
|
+
membership_info.append(f"LSB={self.LSB:.3f}")
|
|
481
|
+
if self.USB is not None:
|
|
482
|
+
membership_info.append(f"USB={self.USB:.3f}")
|
|
483
|
+
|
|
484
|
+
if membership_info:
|
|
485
|
+
title = f"EGDF Membership Analysis: {', '.join(membership_info)}"
|
|
486
|
+
else:
|
|
487
|
+
title = "EGDF Membership Analysis"
|
|
488
|
+
|
|
489
|
+
ax1.set_title(title, fontsize=12)
|
|
490
|
+
|
|
491
|
+
# Set x-limits with some padding
|
|
492
|
+
data_range = self.egdf.params['DUB'] - self.egdf.params['DLB']
|
|
493
|
+
padding = data_range * 0.1
|
|
494
|
+
ax1.set_xlim(self.egdf.params['DLB'] - padding, self.egdf.params['DUB'] + padding)
|
|
495
|
+
|
|
496
|
+
# Add legend
|
|
497
|
+
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
|
|
498
|
+
|
|
499
|
+
plt.tight_layout()
|
|
500
|
+
plt.show()
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
error_msg = f"Error creating plot: {str(e)}"
|
|
504
|
+
self._append_error(error_msg, type(e).__name__)
|
|
505
|
+
raise
|
|
506
|
+
|
|
507
|
+
def results(self) -> Dict[str, Any]:
|
|
508
|
+
"""
|
|
509
|
+
Returns the analysis results stored in the `params` attribute.
|
|
510
|
+
|
|
511
|
+
This method provides the calculated LSB, USB, and other relevant parameters, as well as any errors or warnings encountered during the analysis.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
Dict[str, Any]: A dictionary containing the analysis results, errors, warnings, and other parameters.
|
|
515
|
+
|
|
516
|
+
Raises:
|
|
517
|
+
RuntimeError: If the `fit` method has not been called before accessing results.
|
|
518
|
+
RuntimeError: If `catch` is set to False during initialization, as no results are stored.
|
|
519
|
+
"""
|
|
520
|
+
self.logger.info("Retrieving analysis results...")
|
|
521
|
+
if not self._fitted:
|
|
522
|
+
raise RuntimeError("No analysis results available. Call fit() method first")
|
|
523
|
+
|
|
524
|
+
if not self.catch:
|
|
525
|
+
raise RuntimeError("No results stored. Ensure catch=True during initialization")
|
|
526
|
+
|
|
527
|
+
return self.params.copy()
|
|
528
|
+
|
|
529
|
+
@property
|
|
530
|
+
def fitted(self) -> bool:
|
|
531
|
+
return self._fitted
|
|
532
|
+
|
|
533
|
+
def __repr__(self):
|
|
534
|
+
return (f"<DataMembership(fitted={self._fitted}, "
|
|
535
|
+
f"LSB={self.LSB}, USB={self.USB}, "
|
|
536
|
+
f"is_homogeneous={self.is_homogeneous})>")
|