machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Marginal Interval Analysis
|
|
3
|
+
|
|
4
|
+
Take care of end-2-end gnostic process. Primarily work with ELDF.
|
|
5
|
+
|
|
6
|
+
This module implements the `DataIntervals` class, which provides robust, adaptive, and diagnostic interval estimation for GDF classes such as ELDF, EGDF, QLDF, and QGDF. It estimates meaningful data intervals (such as tolerance and typical intervals) based on the behavior of the GDF's central parameter (Z0) as the data is extended, while enforcing ordering constraints and providing detailed diagnostics.
|
|
7
|
+
|
|
8
|
+
Author: Nirmal Parmar
|
|
9
|
+
Machine Gnostics
|
|
10
|
+
'''
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import warnings
|
|
14
|
+
import logging
|
|
15
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
16
|
+
from machinegnostics.magcal import ELDF, EGDF, DataHomogeneity, DataIntervals, DataCluster, DataMembership
|
|
17
|
+
|
|
18
|
+
class IntervalAnalysis:
|
|
19
|
+
"""
|
|
20
|
+
End-to-End Marginal Interval Analysis for Gnostic Distribution Functions (GDF)
|
|
21
|
+
|
|
22
|
+
The `IntervalAnalysis` class provides a robust, automated workflow for estimating meaningful data intervals
|
|
23
|
+
(such as tolerance and typical intervals) using Gnostic Distribution Functions (GDFs) like ELDF and EGDF.
|
|
24
|
+
It is designed for reliability, diagnostics, and adaptive interval estimation in scientific and engineering data analysis.
|
|
25
|
+
|
|
26
|
+
This class orchestrates the complete process:
|
|
27
|
+
- Fits an EGDF to the data for global distribution analysis and homogeneity testing.
|
|
28
|
+
- Optionally re-fits for non-homogeneous data and issues warnings if needed.
|
|
29
|
+
- Fits an ELDF for local distribution analysis.
|
|
30
|
+
- Computes robust data intervals using the DataIntervals engine, enforcing ordering constraints.
|
|
31
|
+
- Provides detailed diagnostics, warnings, and error tracking.
|
|
32
|
+
- Offers visualization methods for both the fitted distributions and the estimated intervals.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
DLB : float, optional
|
|
37
|
+
Data Lower Bound (absolute minimum possible value for the data).
|
|
38
|
+
DUB : float, optional
|
|
39
|
+
Data Upper Bound (absolute maximum possible value for the data).
|
|
40
|
+
LB : float, optional
|
|
41
|
+
Lower Probable Bound (practical lower limit for the distribution).
|
|
42
|
+
UB : float, optional
|
|
43
|
+
Upper Probable Bound (practical upper limit for the distribution).
|
|
44
|
+
S : float or str, default='auto'
|
|
45
|
+
Scale parameter for the distribution. Use 'auto' for automatic estimation.
|
|
46
|
+
z0_optimize : bool, default=True
|
|
47
|
+
Whether to optimize the central parameter Z0 during fitting.
|
|
48
|
+
tolerance : float, default=1e-9
|
|
49
|
+
Convergence tolerance for optimization.
|
|
50
|
+
data_form : str, default='a'
|
|
51
|
+
Data processing form: 'a' for additive, 'm' for multiplicative.
|
|
52
|
+
n_points : int, default=100
|
|
53
|
+
Number of points for distribution evaluation.
|
|
54
|
+
homogeneous : bool, default=True
|
|
55
|
+
Whether to assume data homogeneity (enables homogeneity testing).
|
|
56
|
+
catch : bool, default=True
|
|
57
|
+
If True, stores warnings/errors and intermediate results.
|
|
58
|
+
weights : np.ndarray, optional
|
|
59
|
+
Prior weights for data points.
|
|
60
|
+
wedf : bool, default=False
|
|
61
|
+
Use Weighted Empirical Distribution Function if True.
|
|
62
|
+
opt_method : str, default='L-BFGS-B'
|
|
63
|
+
Optimization method for parameter estimation.
|
|
64
|
+
verbose : bool, default=False
|
|
65
|
+
Print detailed progress and diagnostics if True.
|
|
66
|
+
max_data_size : int, default=1000
|
|
67
|
+
Maximum data size for smooth GDF generation.
|
|
68
|
+
flush : bool, default=True
|
|
69
|
+
Flush intermediate arrays after fitting to save memory.
|
|
70
|
+
dense_zone_fraction : float, default=0.4
|
|
71
|
+
Fraction of search domain near Z0 for dense interval search.
|
|
72
|
+
dense_points_fraction : float, default=0.7
|
|
73
|
+
Fraction of search points allocated to the dense zone.
|
|
74
|
+
convergence_window : int, default=15
|
|
75
|
+
Window size for convergence detection in interval search.
|
|
76
|
+
convergence_threshold : float, default=1e-6
|
|
77
|
+
Threshold for Z0 convergence in interval search.
|
|
78
|
+
min_search_points : int, default=30
|
|
79
|
+
Minimum search points before checking for convergence.
|
|
80
|
+
boundary_margin_factor : float, default=0.001
|
|
81
|
+
Margin factor to avoid searching exactly at the boundaries.
|
|
82
|
+
extrema_search_tolerance : float, default=1e-6
|
|
83
|
+
Tolerance for detecting extrema in Z0 variation.
|
|
84
|
+
gdf_recompute : bool, default=False
|
|
85
|
+
If True, recompute the GDF for each candidate datum in interval search.
|
|
86
|
+
gnostic_filter : bool, default=False
|
|
87
|
+
If True, apply gnostic clustering to filter outlier Z0 values in interval search.
|
|
88
|
+
|
|
89
|
+
Attributes
|
|
90
|
+
----------
|
|
91
|
+
params : dict
|
|
92
|
+
Stores all warnings, errors, and diagnostic information from the analysis.
|
|
93
|
+
|
|
94
|
+
Methods
|
|
95
|
+
-------
|
|
96
|
+
fit(data, plot=False)
|
|
97
|
+
Run the complete interval analysis workflow on the input data.
|
|
98
|
+
results()
|
|
99
|
+
Return a dictionary of estimated interval results and bounds.
|
|
100
|
+
plot(GDF=True, intervals=True)
|
|
101
|
+
Visualize the fitted GDFs and the estimated intervals.
|
|
102
|
+
|
|
103
|
+
Usage Example
|
|
104
|
+
-------------
|
|
105
|
+
>>> from machinegnostics.magcal import IntervalAnalysis
|
|
106
|
+
>>> data = np.array([...])
|
|
107
|
+
>>> ia = IntervalAnalysis(verbose=True)
|
|
108
|
+
>>> ia.fit(data, plot=True)
|
|
109
|
+
>>> print(ia.results())
|
|
110
|
+
>>> ia.plot()
|
|
111
|
+
|
|
112
|
+
Notes
|
|
113
|
+
-----
|
|
114
|
+
- The class is designed for robust, end-to-end interval estimation and diagnostics.
|
|
115
|
+
- Homogeneity of the data is checked automatically; warnings are issued if violated.
|
|
116
|
+
- For best results, use with ELDF/EGDF and set 'wedf=False' for interval estimation.
|
|
117
|
+
- The class is suitable for scientific, engineering, and reliability applications.
|
|
118
|
+
- All warnings and errors are stored in the `params` attribute for later inspection.
|
|
119
|
+
|
|
120
|
+
See Also
|
|
121
|
+
--------
|
|
122
|
+
ELDF, EGDF, DataIntervals
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
def __init__(self,
|
|
126
|
+
DLB: float = None,
|
|
127
|
+
DUB: float = None,
|
|
128
|
+
LB: float = None,
|
|
129
|
+
UB: float = None,
|
|
130
|
+
S: str = 'auto',
|
|
131
|
+
z0_optimize: bool = True,
|
|
132
|
+
tolerance: float = 1e-5,
|
|
133
|
+
data_form: str = 'a',
|
|
134
|
+
n_points: int = 100,
|
|
135
|
+
homogeneous: bool = True,
|
|
136
|
+
catch: bool = True,
|
|
137
|
+
weights: np.ndarray = None,
|
|
138
|
+
wedf: bool = False,
|
|
139
|
+
opt_method: str = 'L-BFGS-B',
|
|
140
|
+
verbose: bool = False,
|
|
141
|
+
max_data_size: int = 1000,
|
|
142
|
+
flush: bool = True,
|
|
143
|
+
dense_zone_fraction: float = 0.4,
|
|
144
|
+
dense_points_fraction: float = 0.7,
|
|
145
|
+
convergence_window: int = 15,
|
|
146
|
+
convergence_threshold: float = 0.000001,
|
|
147
|
+
min_search_points: int = 30,
|
|
148
|
+
boundary_margin_factor: float = 0.001,
|
|
149
|
+
extrema_search_tolerance: float = 0.000001,
|
|
150
|
+
gdf_recompute: bool = False,
|
|
151
|
+
gnostic_filter: bool = False,
|
|
152
|
+
cluster_bounds: bool = True,
|
|
153
|
+
membership_bounds: bool = True
|
|
154
|
+
):
|
|
155
|
+
|
|
156
|
+
self.DLB = DLB
|
|
157
|
+
self.DUB = DUB
|
|
158
|
+
self.LB = LB
|
|
159
|
+
self.UB = UB
|
|
160
|
+
self.S = S
|
|
161
|
+
self.z0_optimize = z0_optimize
|
|
162
|
+
self.tolerance = tolerance
|
|
163
|
+
self.data_form = data_form
|
|
164
|
+
self.n_points = n_points
|
|
165
|
+
self.homogeneous = homogeneous
|
|
166
|
+
self.catch = catch
|
|
167
|
+
self.weights = weights
|
|
168
|
+
self.wedf = wedf
|
|
169
|
+
self.opt_method = opt_method
|
|
170
|
+
self.verbose = verbose
|
|
171
|
+
self.max_data_size = max_data_size
|
|
172
|
+
self.flush = flush
|
|
173
|
+
self.dense_zone_fraction = dense_zone_fraction
|
|
174
|
+
self.dense_points_fraction = dense_points_fraction
|
|
175
|
+
self.convergence_window = convergence_window
|
|
176
|
+
self.convergence_threshold = convergence_threshold
|
|
177
|
+
self.min_search_points = min_search_points
|
|
178
|
+
self.boundary_margin_factor = boundary_margin_factor
|
|
179
|
+
self.extrema_search_tolerance = extrema_search_tolerance
|
|
180
|
+
self.gdf_recompute = gdf_recompute
|
|
181
|
+
self.gnostic_filter = gnostic_filter
|
|
182
|
+
self.cluster_bounds = cluster_bounds
|
|
183
|
+
self.membership_bounds = membership_bounds
|
|
184
|
+
self._fitted = False
|
|
185
|
+
|
|
186
|
+
self.params = {}
|
|
187
|
+
self.params['error'] = []
|
|
188
|
+
self.params['warnings'] = []
|
|
189
|
+
|
|
190
|
+
# logger setup
|
|
191
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
192
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
193
|
+
|
|
194
|
+
def _add_warning(self, warning: str):
|
|
195
|
+
self.params['warnings'].append(warning)
|
|
196
|
+
self.logger.warning(f'Warning: {warning}')
|
|
197
|
+
|
|
198
|
+
def _add_error(self, error: str):
|
|
199
|
+
self.params['error'].append(error)
|
|
200
|
+
self.logger.error(f'Error: {error}')
|
|
201
|
+
|
|
202
|
+
def _input_data_check(self, data: np.ndarray):
|
|
203
|
+
self.logger.info("Checking input data validity.")
|
|
204
|
+
if not isinstance(data, np.ndarray):
|
|
205
|
+
self.logger.error(f'Error: Data must be a numpy array.')
|
|
206
|
+
raise TypeError("Data must be a numpy array.")
|
|
207
|
+
if data.ndim != 1:
|
|
208
|
+
self.logger.error(f'Error: Data must be a 1D array.')
|
|
209
|
+
raise ValueError("Data must be a 1D array.")
|
|
210
|
+
if data.size < 4:
|
|
211
|
+
self.logger.error(f'Error: Data must contain at least 4 elements.')
|
|
212
|
+
raise ValueError("Data must contain at least 4 elements.")
|
|
213
|
+
# no NaN or Inf values
|
|
214
|
+
if np.any(np.isnan(data)) or np.any(np.isinf(data)):
|
|
215
|
+
self.logger.error(f'Error: Data contains NaN or Inf values.')
|
|
216
|
+
raise ValueError("Data contains NaN or Inf values.")
|
|
217
|
+
|
|
218
|
+
def _check_egdf_homogeneity(self, egdf: EGDF):
|
|
219
|
+
self.logger.info("Checking data homogeneity using EGDF.")
|
|
220
|
+
# check homogeneity
|
|
221
|
+
if self.homogeneous:
|
|
222
|
+
self.dh = DataHomogeneity(gdf=egdf, verbose=self.verbose)
|
|
223
|
+
is_homogeneous = self.dh.fit()
|
|
224
|
+
if not is_homogeneous:
|
|
225
|
+
warning_msg = "Data is not homogeneous. Interval estimation may get affected."
|
|
226
|
+
self._add_warning(warning_msg)
|
|
227
|
+
if self.catch:
|
|
228
|
+
self.params['warnings'].append(warning_msg)
|
|
229
|
+
self.params['DataHomogeneity'] = self.dh.params.copy()
|
|
230
|
+
else:
|
|
231
|
+
self.logger.warning(warning_msg)
|
|
232
|
+
else:
|
|
233
|
+
warning_msg = "Homogeneity check is disabled. Proceeding without checking."
|
|
234
|
+
self._add_warning(warning_msg)
|
|
235
|
+
is_homogeneous = True
|
|
236
|
+
if self.catch:
|
|
237
|
+
self.params['warnings'].append(warning_msg)
|
|
238
|
+
else:
|
|
239
|
+
self.logger.warning(warning_msg)
|
|
240
|
+
return is_homogeneous
|
|
241
|
+
|
|
242
|
+
def _get_cluster_bounds(self):
|
|
243
|
+
self.logger.info("Estimating clustering bounds if required.")
|
|
244
|
+
# clustering bounds if required
|
|
245
|
+
if self.cluster_bounds:
|
|
246
|
+
self.logger.info("Cluster bound estimation...")
|
|
247
|
+
self._data_cluster = DataCluster(gdf=self._eldf, verbose=self.verbose, catch=self.catch)
|
|
248
|
+
self.LCB, self.UCB = self._data_cluster.fit()
|
|
249
|
+
if self.catch:
|
|
250
|
+
self.params['DataCluster'] = self._data_cluster.params.copy()
|
|
251
|
+
self.logger.info(f"Updated LCB={self.LCB}, UCB={self.UCB} based on clustering.")
|
|
252
|
+
else:
|
|
253
|
+
self.LCB, self.UCB = None, None
|
|
254
|
+
self.logger.info("Skipping clustering for bound estimation.")
|
|
255
|
+
|
|
256
|
+
def _get_membership_bounds(self):
|
|
257
|
+
self.logger.info("Estimating membership bounds if required.")
|
|
258
|
+
# membership bounds if required
|
|
259
|
+
if self.membership_bounds:
|
|
260
|
+
self.logger.info("Estimating data membership bounds...")
|
|
261
|
+
self._data_membership = DataMembership(egdf=self._egdf, verbose=self.verbose, catch=self.catch)
|
|
262
|
+
self.LSB, self.USB = self._data_membership.fit()
|
|
263
|
+
if self.catch:
|
|
264
|
+
self.params['DataMembership'] = self._data_membership.params.copy()
|
|
265
|
+
self.logger.info(f"Updated DLB={self.DLB}, DUB={self.DUB} based on membership.")
|
|
266
|
+
else:
|
|
267
|
+
self.LSB, self.USB = None, None
|
|
268
|
+
self.logger.info("Skipping membership bound estimation.")
|
|
269
|
+
|
|
270
|
+
def fit(self, data: np.ndarray, plot: bool = False) -> dict:
|
|
271
|
+
"""
|
|
272
|
+
Run the complete marginal interval analysis workflow on the input data. This method takes a 1D numpy array of data and automatically performs all necessary steps to estimate robust data intervals using gnostic distribution functions. It handles data validation, fits the required models, checks for homogeneity, and computes both tolerance and typical intervals with diagnostics. Optionally, it can generate diagnostic plots to help visualize the results. The method returns a dictionary containing the estimated interval bounds and relevant diagnostic information.
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
data : np.ndarray
|
|
278
|
+
1D numpy array of input data for interval analysis. Must contain at least 4 elements and no NaN/Inf values.
|
|
279
|
+
plot : bool, default=False
|
|
280
|
+
If True, automatically generates diagnostic plots after fitting.
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
results : dict
|
|
285
|
+
Dictionary containing estimated interval bounds, tolerance/typical intervals, and diagnostic information.
|
|
286
|
+
|
|
287
|
+
Raises
|
|
288
|
+
------
|
|
289
|
+
TypeError
|
|
290
|
+
If the input data is not a numpy array.
|
|
291
|
+
ValueError
|
|
292
|
+
If the data is not 1D, contains fewer than 4 elements, or contains NaN/Inf values.
|
|
293
|
+
|
|
294
|
+
Notes
|
|
295
|
+
-----
|
|
296
|
+
- All warnings and errors encountered during fitting are stored in the `params` attribute.
|
|
297
|
+
- For best results, ensure the data is representative and free of gross outliers.
|
|
298
|
+
- The method sets the `_fitted` attribute to True upon successful completion.
|
|
299
|
+
|
|
300
|
+
Example
|
|
301
|
+
-------
|
|
302
|
+
>>> ia = IntervalAnalysis(verbose=True)
|
|
303
|
+
>>> ia.fit(data, plot=True)
|
|
304
|
+
>>> print(ia.results())
|
|
305
|
+
"""
|
|
306
|
+
self.logger.info("Starting fit process for IntervalAnalysis.")
|
|
307
|
+
try:
|
|
308
|
+
# check input data
|
|
309
|
+
self.logger.info("Checking input data...")
|
|
310
|
+
self._input_data_check(data)
|
|
311
|
+
kwargs = {
|
|
312
|
+
'DLB': self.DLB,
|
|
313
|
+
'DUB': self.DUB,
|
|
314
|
+
'LB': self.LB,
|
|
315
|
+
'UB': self.UB,
|
|
316
|
+
'S': self.S,
|
|
317
|
+
'z0_optimize': self.z0_optimize,
|
|
318
|
+
'tolerance': self.tolerance,
|
|
319
|
+
'data_form': self.data_form,
|
|
320
|
+
'n_points': self.n_points,
|
|
321
|
+
'homogeneous': True,
|
|
322
|
+
'catch': self.catch,
|
|
323
|
+
'weights': self.weights,
|
|
324
|
+
'wedf': self.wedf,
|
|
325
|
+
'opt_method': self.opt_method,
|
|
326
|
+
'verbose': self.verbose,
|
|
327
|
+
'max_data_size': self.max_data_size,
|
|
328
|
+
'flush': self.flush
|
|
329
|
+
}
|
|
330
|
+
# estimate EGDF
|
|
331
|
+
self.logger.info("Estimating EGDF...")
|
|
332
|
+
self._egdf = EGDF(**kwargs)
|
|
333
|
+
self._egdf.fit(data)
|
|
334
|
+
if self.catch:
|
|
335
|
+
self.params['EGDF'] = self._egdf.params.copy()
|
|
336
|
+
|
|
337
|
+
# check homogeneity
|
|
338
|
+
self.logger.info("Checking data homogeneity...")
|
|
339
|
+
is_homogeneous_1 = self._check_egdf_homogeneity(self._egdf)
|
|
340
|
+
|
|
341
|
+
# data must be homogeneous
|
|
342
|
+
if not is_homogeneous_1:
|
|
343
|
+
kwargs_h = {
|
|
344
|
+
'DLB': self.DLB,
|
|
345
|
+
'DUB': self.DUB,
|
|
346
|
+
'LB': self.LB,
|
|
347
|
+
'UB': self.UB,
|
|
348
|
+
'S': self.S,
|
|
349
|
+
'z0_optimize': self.z0_optimize,
|
|
350
|
+
'tolerance': self.tolerance,
|
|
351
|
+
'data_form': self.data_form,
|
|
352
|
+
'n_points': self.n_points,
|
|
353
|
+
'homogeneous': False, # for treating gnostic weight for non-homogeneous data
|
|
354
|
+
'catch': self.catch,
|
|
355
|
+
'weights': self.weights,
|
|
356
|
+
'wedf': self.wedf,
|
|
357
|
+
'opt_method': self.opt_method,
|
|
358
|
+
'verbose': self.verbose,
|
|
359
|
+
'max_data_size': self.max_data_size,
|
|
360
|
+
'flush': self.flush
|
|
361
|
+
}
|
|
362
|
+
self._egdf = EGDF(**kwargs_h)
|
|
363
|
+
self._egdf.fit(data)
|
|
364
|
+
if self.catch:
|
|
365
|
+
self.params['EGDF_non_homogeneous'] = self._egdf.params.copy()
|
|
366
|
+
|
|
367
|
+
# check homogeneity
|
|
368
|
+
self.logger.info("Checking data homogeneity again...")
|
|
369
|
+
is_homogeneous_2 = self._check_egdf_homogeneity(self._egdf)
|
|
370
|
+
|
|
371
|
+
# final check on homogeneity, raise warning, that cannot converted to homogeneous, check data
|
|
372
|
+
if not is_homogeneous_2:
|
|
373
|
+
warning_msg = "Data is not homogeneous after re-estimation."
|
|
374
|
+
warning_msg += "Suggested to switch S=1, to improve stability of interval analysis. Advised to process with outliers and re-run OR set S value manually."
|
|
375
|
+
self._add_warning(warning_msg)
|
|
376
|
+
if self.catch:
|
|
377
|
+
self.params['warnings'].append(warning_msg)
|
|
378
|
+
self.params['DataHomogeneity'] = self.dh.params.copy()
|
|
379
|
+
else:
|
|
380
|
+
self.logger.warning(warning_msg)
|
|
381
|
+
|
|
382
|
+
# estimate ELDF
|
|
383
|
+
self.logger.info("Estimating ELDF...")
|
|
384
|
+
kwargs_el = {
|
|
385
|
+
'DLB': self.DLB,
|
|
386
|
+
'DUB': self.DUB,
|
|
387
|
+
'LB': self.LB,
|
|
388
|
+
'UB': self.UB,
|
|
389
|
+
'S': self.S, #if (is_homogeneous_1 and self.S == 'auto') else 1, # for non-homogeneous data, set S=1
|
|
390
|
+
'z0_optimize': self.z0_optimize,
|
|
391
|
+
'tolerance': self.tolerance,
|
|
392
|
+
'data_form': self.data_form,
|
|
393
|
+
'n_points': self.n_points,
|
|
394
|
+
'homogeneous': self.homogeneous, # ELDF always assumes homogeneous data
|
|
395
|
+
'catch': self.catch,
|
|
396
|
+
'weights': self.weights,
|
|
397
|
+
'wedf': self.wedf,
|
|
398
|
+
'opt_method': self.opt_method,
|
|
399
|
+
'verbose': self.verbose,
|
|
400
|
+
'max_data_size': self.max_data_size,
|
|
401
|
+
'flush': self.flush
|
|
402
|
+
}
|
|
403
|
+
self._eldf = ELDF(**kwargs_el)
|
|
404
|
+
self._eldf.fit(data)
|
|
405
|
+
if self.catch:
|
|
406
|
+
self.params['ELDF'] = self._eldf.params.copy()
|
|
407
|
+
|
|
408
|
+
# get clustering bounds if required
|
|
409
|
+
self.logger.info("Estimating clustering and membership bounds if required.")
|
|
410
|
+
self._get_cluster_bounds()
|
|
411
|
+
|
|
412
|
+
# get membership bounds if required
|
|
413
|
+
self.logger.info("Estimating membership bounds if required.")
|
|
414
|
+
if is_homogeneous_2:
|
|
415
|
+
self._get_membership_bounds()
|
|
416
|
+
else:
|
|
417
|
+
self.LSB, self.USB = None, None
|
|
418
|
+
if self.verbose:
|
|
419
|
+
self._add_warning("Skipping membership bound estimation due to non-homogeneous data.")
|
|
420
|
+
self.LSB, self.USB = None, None
|
|
421
|
+
|
|
422
|
+
# estimate intervals with DataIntervals, minimum compute settings
|
|
423
|
+
self.logger.info("Estimating intervals using DataIntervals engine...")
|
|
424
|
+
di_kwargs = {
|
|
425
|
+
'gdf': self._eldf,
|
|
426
|
+
'n_points': self.n_points,
|
|
427
|
+
'dense_zone_fraction': self.dense_zone_fraction,
|
|
428
|
+
'dense_points_fraction': self.dense_points_fraction,
|
|
429
|
+
'convergence_window': self.convergence_window,
|
|
430
|
+
'convergence_threshold': self.convergence_threshold,
|
|
431
|
+
'min_search_points': self.min_search_points,
|
|
432
|
+
'boundary_margin_factor': self.boundary_margin_factor,
|
|
433
|
+
'extrema_search_tolerance': self.extrema_search_tolerance,
|
|
434
|
+
'gdf_recompute': self.gdf_recompute,
|
|
435
|
+
'gnostic_filter': self.gnostic_filter,
|
|
436
|
+
'catch': self.catch,
|
|
437
|
+
'verbose': self.verbose,
|
|
438
|
+
'flush': self.flush
|
|
439
|
+
}
|
|
440
|
+
self._intv_engine = DataIntervals(**di_kwargs)
|
|
441
|
+
self._intv_engine.fit()
|
|
442
|
+
|
|
443
|
+
# z0 and intervals
|
|
444
|
+
self.Z0 = getattr(self._intv_engine, 'Z0', None)
|
|
445
|
+
self.Z0L = getattr(self._intv_engine, 'Z0L', None)
|
|
446
|
+
self.Z0U = getattr(self._intv_engine, 'Z0U', None)
|
|
447
|
+
self.ZL = getattr(self._intv_engine, 'ZL', None)
|
|
448
|
+
self.ZU = getattr(self._intv_engine, 'ZU', None)
|
|
449
|
+
self.LSD = getattr(self._intv_engine, 'LSD', None)
|
|
450
|
+
self.USD = getattr(self._intv_engine, 'USD', None)
|
|
451
|
+
self.DLB = getattr(self._eldf, 'DLB', self.DLB)
|
|
452
|
+
self.DUB = getattr(self._eldf, 'DUB', self.DUB)
|
|
453
|
+
self.LB = getattr(self._eldf, 'LB', self.LB)
|
|
454
|
+
self.UB = getattr(self._eldf, 'UB', self.UB)
|
|
455
|
+
|
|
456
|
+
if self.catch:
|
|
457
|
+
self.params['DataIntervals'] = self._intv_engine.params.copy()
|
|
458
|
+
|
|
459
|
+
# fit status
|
|
460
|
+
self._fitted = True
|
|
461
|
+
self.logger.info("Fit process completed successfully.")
|
|
462
|
+
|
|
463
|
+
if self.catch:
|
|
464
|
+
self.params['fitted'] = self._fitted
|
|
465
|
+
|
|
466
|
+
# if plot is True, generate diagnostic plots
|
|
467
|
+
if plot:
|
|
468
|
+
self.logger.info("Generating diagnostic plots as requested.")
|
|
469
|
+
self._intv_engine.plot()
|
|
470
|
+
|
|
471
|
+
return self.results()
|
|
472
|
+
|
|
473
|
+
except Exception as e:
|
|
474
|
+
self._add_error(str(e))
|
|
475
|
+
raise e
|
|
476
|
+
|
|
477
|
+
def results(self) -> dict:
|
|
478
|
+
"""
|
|
479
|
+
Return a dictionary of estimated interval results and bounds.
|
|
480
|
+
|
|
481
|
+
Returns
|
|
482
|
+
-------
|
|
483
|
+
results : dict
|
|
484
|
+
A dictionary containing the following keys (values may be None if not available):
|
|
485
|
+
- 'LB', 'LSB', 'DLB', 'LCB': Lower bounds (various types, if available)
|
|
486
|
+
- 'ZL': Lower bound of the typical data interval
|
|
487
|
+
- 'Z0L': Lower bound of the tolerance interval (Z0-based)
|
|
488
|
+
- 'Z0': Central value (Z0) of the original GDF
|
|
489
|
+
- 'Z0U': Upper bound of the tolerance interval (Z0-based)
|
|
490
|
+
- 'ZU': Upper bound of the typical data interval
|
|
491
|
+
- 'UCB', 'DUB', 'USB', 'UB': Upper bounds (various types, if available)
|
|
492
|
+
|
|
493
|
+
Example
|
|
494
|
+
-------
|
|
495
|
+
>>> intervals = di.results()
|
|
496
|
+
>>> print(intervals['Z0L'], intervals['Z0U'])
|
|
497
|
+
"""
|
|
498
|
+
self.logger.info("Compiling results dictionary.")
|
|
499
|
+
|
|
500
|
+
results = {
|
|
501
|
+
'LB': float(self.LB) if self.LB is not None else None,
|
|
502
|
+
'LSB': float(self.LSB) if self.LSB is not None else None,
|
|
503
|
+
'DLB': float(self.DLB) if self.DLB is not None else None,
|
|
504
|
+
'LCB': float(self.LCB) if self.LCB is not None else None,
|
|
505
|
+
'LSD': float(self.LSD) if self.LSD is not None else None,
|
|
506
|
+
'ZL': float(self.ZL) if self.ZL is not None else None,
|
|
507
|
+
'Z0L': float(self.Z0L) if self.Z0L is not None else None,
|
|
508
|
+
'Z0': float(self.Z0) if self.Z0 is not None else None,
|
|
509
|
+
'Z0U': float(self.Z0U) if self.Z0U is not None else None,
|
|
510
|
+
'ZU': float(self.ZU) if self.ZU is not None else None,
|
|
511
|
+
'USD': float(self.USD) if self.USD is not None else None,
|
|
512
|
+
'UCB': float(self.UCB) if self.UCB is not None else None,
|
|
513
|
+
'DUB': float(self.DUB) if self.DUB is not None else None,
|
|
514
|
+
'USB': float(self.USB) if self.USB is not None else None,
|
|
515
|
+
'UB': float(self.UB) if self.UB is not None else None
|
|
516
|
+
}
|
|
517
|
+
return results
|
|
518
|
+
|
|
519
|
+
def plot(self, GDF: bool = True, intervals: bool = True) -> None:
|
|
520
|
+
"""
|
|
521
|
+
Visualize the fitted GDFs (ELDF) and the estimated intervals.
|
|
522
|
+
|
|
523
|
+
This method generates diagnostic plots to help interpret the results of the interval analysis:
|
|
524
|
+
- If `GDF` is True, plots the fitted local distribution (ELDF) and its PDF/CDF.
|
|
525
|
+
- If `intervals` is True, plots the Z0 variation and the estimated intervals on the data domain.
|
|
526
|
+
|
|
527
|
+
Parameters
|
|
528
|
+
----------
|
|
529
|
+
GDF : bool, default=True
|
|
530
|
+
If True, plot the fitted ELDF (local distribution function).
|
|
531
|
+
intervals : bool, default=True
|
|
532
|
+
If True, plot the estimated intervals and Z0 variation.
|
|
533
|
+
|
|
534
|
+
Returns
|
|
535
|
+
-------
|
|
536
|
+
None
|
|
537
|
+
|
|
538
|
+
Notes
|
|
539
|
+
-----
|
|
540
|
+
- Requires matplotlib to be installed.
|
|
541
|
+
- Can be called after `fit()` to visualize the results interactively or in scripts.
|
|
542
|
+
|
|
543
|
+
Example
|
|
544
|
+
-------
|
|
545
|
+
>>> ia.plot(GDF=True, intervals=True)
|
|
546
|
+
"""
|
|
547
|
+
self.logger.info("Generating plots for fitted GDF and intervals.")
|
|
548
|
+
|
|
549
|
+
if hasattr(self, '_intv_engine') and hasattr(self, '_eldf'):
|
|
550
|
+
if GDF:
|
|
551
|
+
self._eldf.plot()
|
|
552
|
+
if intervals:
|
|
553
|
+
self._intv_engine.plot_intervals()
|
|
554
|
+
self._intv_engine.plot()
|
|
555
|
+
|
|
556
|
+
def __repr__(self):
|
|
557
|
+
return f"IntervalAnalysis(fitted={self._fitted}, verbose={self.verbose}, results={self.results()})"
|
|
558
|
+
|