machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,853 @@
|
|
|
1
|
+
'''
|
|
2
|
+
DataIntervals
|
|
3
|
+
|
|
4
|
+
Interval Analysis Engine
|
|
5
|
+
|
|
6
|
+
Author: Nirmal Parmar
|
|
7
|
+
Machine Gnostics
|
|
8
|
+
'''
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import numpy as np
|
|
12
|
+
from typing import Optional, Union, Dict
|
|
13
|
+
from scipy.signal import savgol_filter, find_peaks
|
|
14
|
+
from machinegnostics.magcal import ELDF, EGDF, QLDF, QGDF, DataCluster
|
|
15
|
+
from machinegnostics.metrics.std import std
|
|
16
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
17
|
+
|
|
18
|
+
class DataIntervals:
|
|
19
|
+
"""
|
|
20
|
+
Robust Interval Estimation Engine for GDF Classes
|
|
21
|
+
|
|
22
|
+
The `DataIntervals` class provides robust, adaptive, and diagnostic interval estimation for
|
|
23
|
+
Gnostics Distribution Function (GDF) classes such as ELDF, EGDF, QLDF, and QGDF.
|
|
24
|
+
It is designed to estimate meaningful data intervals (such as tolerance and typical intervals)
|
|
25
|
+
based on the behavior of the GDF's central parameter (Z0) as the data is extended,
|
|
26
|
+
while enforcing ordering constraints and providing detailed diagnostics.
|
|
27
|
+
|
|
28
|
+
Key Features:
|
|
29
|
+
-------------
|
|
30
|
+
- **Adaptive Search:**
|
|
31
|
+
Efficiently scans the data domain with a dense search near the central value (Z0) and
|
|
32
|
+
sparser search near the boundaries, balancing computational cost and accuracy.
|
|
33
|
+
- **Robustness:**
|
|
34
|
+
Supports optional recomputation of the GDF for each candidate datum, with optional
|
|
35
|
+
gnostic filtering (clustering) to enhance robustness against outliers and noise.
|
|
36
|
+
- **Diagnostics:**
|
|
37
|
+
Provides warnings and errors for suboptimal settings, ordering violations, and
|
|
38
|
+
insufficient data, and stores detailed parameters and results for further analysis.
|
|
39
|
+
- **Ordering Constraint:**
|
|
40
|
+
Ensures that the estimated intervals satisfy the natural ordering:
|
|
41
|
+
ZL < Z0L < Z0 < Z0U < ZU, where ZL/ZU are typical data interval bounds and
|
|
42
|
+
Z0L/Z0U are tolerance interval bounds.
|
|
43
|
+
- **Visualization:**
|
|
44
|
+
Offers plotting methods to visualize the Z0 variation, estimated intervals,
|
|
45
|
+
and data coverage.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
gdf : ELDF, EGDF, QLDF, or QGDF
|
|
50
|
+
The fitted GDF (Gnostics Distribution Function) object to analyze.
|
|
51
|
+
n_points : int, default=100
|
|
52
|
+
Number of search points for interval estimation (minimum 50).
|
|
53
|
+
dense_zone_fraction : float, default=0.4
|
|
54
|
+
Fraction of the search domain near Z0 to search densely (range: 0.1 to 0.8).
|
|
55
|
+
dense_points_fraction : float, default=0.7
|
|
56
|
+
Fraction of points allocated to the dense zone (range: 0.5 to 0.9).
|
|
57
|
+
convergence_window : int, default=15
|
|
58
|
+
Number of points in the moving window for convergence detection.
|
|
59
|
+
convergence_threshold : float, default=1e-6
|
|
60
|
+
Threshold for standard deviation of Z0 in the convergence window.
|
|
61
|
+
min_search_points : int, default=30
|
|
62
|
+
Minimum number of search points before checking for convergence.
|
|
63
|
+
boundary_margin_factor : float, default=0.001
|
|
64
|
+
Margin factor to avoid searching exactly at the boundaries.
|
|
65
|
+
extrema_search_tolerance : float, default=1e-6
|
|
66
|
+
Tolerance for detecting extrema in Z0 variation.
|
|
67
|
+
gdf_recompute : bool, default=False
|
|
68
|
+
If True, recompute the GDF for each candidate datum.
|
|
69
|
+
gnostic_filter : bool, default=False
|
|
70
|
+
If True, apply gnostic clustering to filter outlier Z0 values.
|
|
71
|
+
catch : bool, default=True
|
|
72
|
+
If True, catch and store warnings/errors internally.
|
|
73
|
+
verbose : bool, default=False
|
|
74
|
+
If True, print detailed progress and diagnostics.
|
|
75
|
+
flush : bool, default=False
|
|
76
|
+
If True, flush memory after fitting to save resources.
|
|
77
|
+
|
|
78
|
+
Attributes
|
|
79
|
+
----------
|
|
80
|
+
ZL : float
|
|
81
|
+
Lower bound of the typical data interval.
|
|
82
|
+
Z0L : float
|
|
83
|
+
Lower bound of the tolerance interval (Z0-based).
|
|
84
|
+
Z0 : float
|
|
85
|
+
Central value (Z0) of the original GDF.
|
|
86
|
+
Z0U : float
|
|
87
|
+
Upper bound of the tolerance interval (Z0-based).
|
|
88
|
+
ZU : float
|
|
89
|
+
Upper bound of the typical data interval.
|
|
90
|
+
tolerance_interval : float
|
|
91
|
+
Width of the tolerance interval (Z0U - Z0L).
|
|
92
|
+
typical_data_interval : float
|
|
93
|
+
Width of the typical data interval (ZU - ZL).
|
|
94
|
+
ordering_valid : bool
|
|
95
|
+
Whether the ordering constraint (ZL < Z0L < Z0 < Z0U < ZU) is satisfied.
|
|
96
|
+
params : dict
|
|
97
|
+
Dictionary of parameters, warnings, errors, and results.
|
|
98
|
+
search_results : dict
|
|
99
|
+
Raw search results for datum values and corresponding Z0s.
|
|
100
|
+
|
|
101
|
+
Methods
|
|
102
|
+
-------
|
|
103
|
+
fit(plot=False)
|
|
104
|
+
Run the interval estimation process. Optionally plot results.
|
|
105
|
+
results() -> dict
|
|
106
|
+
Return a dictionary of interval results and bounds.
|
|
107
|
+
plot_intervals(figsize=(12, 8))
|
|
108
|
+
Plot the Z0 variation and estimated intervals.
|
|
109
|
+
plot(figsize=(12, 8))
|
|
110
|
+
Plot the GDF, PDF, and intervals on the data domain.
|
|
111
|
+
|
|
112
|
+
Usage Example
|
|
113
|
+
-------------
|
|
114
|
+
>>> eld = ELDF()
|
|
115
|
+
>>> eld.fit(data)
|
|
116
|
+
>>> di = DataIntervals(eld, n_points=200, gdf_recompute=True, gnostic_filter=True, verbose=True)
|
|
117
|
+
>>> di.fit(plot=True)
|
|
118
|
+
>>> print(di.results())
|
|
119
|
+
>>> di.plot_intervals()
|
|
120
|
+
>>> di.plot()
|
|
121
|
+
|
|
122
|
+
Notes
|
|
123
|
+
-----
|
|
124
|
+
- For best results, use with ELDF or QLDF and set 'wedf=False' in the GDF.
|
|
125
|
+
- Increasing 'n_points' improves accuracy but increases computation time.
|
|
126
|
+
- Enable 'gdf_recompute' and 'gnostic_filter' for maximum robustness, especially with noisy data.
|
|
127
|
+
- The class is designed for research and diagnostic use; adjust parameters for your data and application.
|
|
128
|
+
"""
|
|
129
|
+
def __init__(self, gdf: Union[ELDF, EGDF, QLDF, QGDF],
|
|
130
|
+
n_points: int = 100,
|
|
131
|
+
dense_zone_fraction: float = 0.4,
|
|
132
|
+
dense_points_fraction: float = 0.7,
|
|
133
|
+
convergence_window: int = 15,
|
|
134
|
+
convergence_threshold: float = 1e-6,
|
|
135
|
+
min_search_points: int = 30,
|
|
136
|
+
boundary_margin_factor: float = 0.001,
|
|
137
|
+
extrema_search_tolerance: float = 1e-6,
|
|
138
|
+
gdf_recompute: bool = False,
|
|
139
|
+
gnostic_filter: bool = False,
|
|
140
|
+
catch: bool = True,
|
|
141
|
+
verbose: bool = False,
|
|
142
|
+
flush: bool = False):
|
|
143
|
+
self.gdf = gdf
|
|
144
|
+
self.n_points = max(n_points, 50)
|
|
145
|
+
self.dense_zone_fraction = np.clip(dense_zone_fraction, 0.1, 0.8)
|
|
146
|
+
self.dense_points_fraction = np.clip(dense_points_fraction, 0.5, 0.9)
|
|
147
|
+
self.convergence_window = max(convergence_window, 5)
|
|
148
|
+
self.convergence_threshold = convergence_threshold
|
|
149
|
+
self.min_search_points = max(min_search_points, 10)
|
|
150
|
+
self.boundary_margin_factor = max(boundary_margin_factor, 1e-6)
|
|
151
|
+
self.extrema_search_tolerance = extrema_search_tolerance
|
|
152
|
+
self.gdf_recompute = gdf_recompute
|
|
153
|
+
self.gnostic_filter = gnostic_filter
|
|
154
|
+
self.catch = catch
|
|
155
|
+
self.verbose = verbose
|
|
156
|
+
self.flush = flush
|
|
157
|
+
self.params: Dict = {}
|
|
158
|
+
self.params['errors'] = []
|
|
159
|
+
self.params['warnings'] = []
|
|
160
|
+
self.search_results = {'datum': [], 'z0': [], 'success': []}
|
|
161
|
+
|
|
162
|
+
# logger setup
|
|
163
|
+
self.logger = get_logger(self.__class__.__name__, level=logging.DEBUG if self.verbose else logging.WARNING)
|
|
164
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
165
|
+
|
|
166
|
+
# checks
|
|
167
|
+
self._extract_gdf_data()
|
|
168
|
+
self._reset_results()
|
|
169
|
+
self._store_init_params()
|
|
170
|
+
|
|
171
|
+
# validation
|
|
172
|
+
# n_points should not less then 50 or more then 10000 else it can be computationally expensive. It balances efficiency and accuracy.
|
|
173
|
+
if self.n_points < 50 or self.n_points > 10000:
|
|
174
|
+
msg = f"n_points={self.n_points} is out of recommended range [50, 10000]. Consider adjusting for efficiency and accuracy."
|
|
175
|
+
self._add_warning(msg)
|
|
176
|
+
|
|
177
|
+
# if gdf_recompute = True, it is recommended to use gnostic_filter = True to enhance robustness.
|
|
178
|
+
if self.gdf_recompute and not self.gnostic_filter:
|
|
179
|
+
msg = "Using gdf_recompute=True without gnostic_filter=True may reduce robustness. Consider enabling gnostic_filter if needed."
|
|
180
|
+
self._add_warning(msg)
|
|
181
|
+
|
|
182
|
+
def _add_warning(self, message: str):
|
|
183
|
+
self.params['warnings'].append(message)
|
|
184
|
+
self.logger.warning(f"Warning: {message}")
|
|
185
|
+
if self.catch:
|
|
186
|
+
self.params['warnings'].append(message)
|
|
187
|
+
|
|
188
|
+
def _add_error(self, message: str):
|
|
189
|
+
self.params['errors'].append(message)
|
|
190
|
+
self.logger.error(f"Error: {message}")
|
|
191
|
+
if self.catch:
|
|
192
|
+
self.params['errors'].append(message)
|
|
193
|
+
|
|
194
|
+
def _extract_gdf_data(self):
|
|
195
|
+
self.logger.info(f"Extracting GDF data...")
|
|
196
|
+
try:
|
|
197
|
+
gdf = self.gdf
|
|
198
|
+
self.data = np.array(gdf.data)
|
|
199
|
+
self.Z0 = float(gdf.z0)
|
|
200
|
+
self.LB = float(gdf.LB)
|
|
201
|
+
self.UB = float(gdf.UB)
|
|
202
|
+
self.S = getattr(gdf, 'S', 'auto')
|
|
203
|
+
self.S_opt = getattr(gdf, 'S_opt', None)
|
|
204
|
+
self.wedf = getattr(gdf, 'wedf', False)
|
|
205
|
+
self.n_points_gdf = getattr(gdf, 'n_points', self.n_points)
|
|
206
|
+
self.opt_method = getattr(gdf, 'opt_method', 'L-BFGS-B')
|
|
207
|
+
self.homogeneous = getattr(gdf, 'homogeneous', True)
|
|
208
|
+
self.is_homogeneous = getattr(gdf, 'is_homogeneous', True)
|
|
209
|
+
self.z0_optimize = getattr(gdf, 'z0_optimize', True)
|
|
210
|
+
self.max_data_size = getattr(gdf, 'max_data_size', 1000)
|
|
211
|
+
self.tolerance = getattr(gdf, 'tolerance', 1e-5)
|
|
212
|
+
self.DLB = getattr(gdf, 'DLB', None)
|
|
213
|
+
self.DUB = getattr(gdf, 'DUB', None)
|
|
214
|
+
self.LSB = getattr(gdf, 'LSB', None)
|
|
215
|
+
self.USB = getattr(gdf, 'USB', None)
|
|
216
|
+
self.LCB = getattr(gdf, 'LCB', None)
|
|
217
|
+
self.UCB = getattr(gdf, 'UCB', None)
|
|
218
|
+
self.RRE = self.gdf.params.get('RRE', None)
|
|
219
|
+
self.residual_entropy = self.gdf.params.get('residual_entropy', None)
|
|
220
|
+
self.gdf_name = type(gdf).__name__
|
|
221
|
+
if self.catch:
|
|
222
|
+
self.params['gdf_type'] = self.gdf_name
|
|
223
|
+
self.params['data_size'] = len(self.data)
|
|
224
|
+
self.params['LB'] = self.LB
|
|
225
|
+
self.params['UB'] = self.UB
|
|
226
|
+
self.params['Z0'] = self.Z0
|
|
227
|
+
self.params['S'] = self.S
|
|
228
|
+
self.params['S_opt'] = self.S_opt
|
|
229
|
+
self.params['wedf'] = self.wedf
|
|
230
|
+
self.params['opt_method'] = self.opt_method
|
|
231
|
+
self.params['is_homogeneous'] = self.is_homogeneous
|
|
232
|
+
self.params['data_range'] = [float(np.min(self.data)), float(np.max(self.data))]
|
|
233
|
+
self.params['RRE'] = self.RRE
|
|
234
|
+
self.params['residual_entropy'] = self.residual_entropy
|
|
235
|
+
|
|
236
|
+
self.logger.debug(f"Initialized with {self.params['gdf_type']} | Data size: {self.params['data_size']} | Z0: {self.Z0:.6f}")
|
|
237
|
+
|
|
238
|
+
except Exception as e:
|
|
239
|
+
self._add_error(f"Failed to extract GDF data: {e}")
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
def _argument_validation(self):
|
|
243
|
+
self.logger.info("Validating arguments and settings...")
|
|
244
|
+
# Check GDF type suitability
|
|
245
|
+
if self.gdf_name not in ['ELDF', 'QLDF']:
|
|
246
|
+
msg = "Interval Analysis is optimized for ELDF and QLDF. Results may be less robust for other types."
|
|
247
|
+
self._add_warning(msg)
|
|
248
|
+
|
|
249
|
+
# Check wedf setting
|
|
250
|
+
if getattr(self.gdf, 'wedf', False):
|
|
251
|
+
msg = "Interval Analysis works best with KSDF. Consider setting 'wedf=False' for optimal results."
|
|
252
|
+
self._add_warning(msg)
|
|
253
|
+
|
|
254
|
+
# Check n_points for computational efficiency
|
|
255
|
+
if self.n_points > 1000:
|
|
256
|
+
msg = (f"Current n_points = {self.n_points} is very high and may cause excessive computation time. "
|
|
257
|
+
"Consider reducing n_points for efficiency.")
|
|
258
|
+
self._add_warning(msg)
|
|
259
|
+
|
|
260
|
+
def _store_init_params(self):
|
|
261
|
+
if self.catch:
|
|
262
|
+
self.params.update({
|
|
263
|
+
'n_points': self.n_points,
|
|
264
|
+
'dense_zone_fraction': self.dense_zone_fraction,
|
|
265
|
+
'dense_points_fraction': self.dense_points_fraction,
|
|
266
|
+
'convergence_window': self.convergence_window,
|
|
267
|
+
'convergence_threshold': self.convergence_threshold,
|
|
268
|
+
'min_search_points': self.min_search_points,
|
|
269
|
+
'boundary_margin_factor': self.boundary_margin_factor,
|
|
270
|
+
'extrema_search_tolerance': self.extrema_search_tolerance,
|
|
271
|
+
'verbose': self.verbose,
|
|
272
|
+
'flush': self.flush
|
|
273
|
+
})
|
|
274
|
+
self.logger.info("Initial parameters stored.")
|
|
275
|
+
|
|
276
|
+
def _reset_results(self):
|
|
277
|
+
self.ZL = None
|
|
278
|
+
self.Z0L = None
|
|
279
|
+
self.ZU = None
|
|
280
|
+
self.Z0U = None
|
|
281
|
+
self.tolerance_interval = None
|
|
282
|
+
self.typical_data_interval = None
|
|
283
|
+
self.ordering_valid = None
|
|
284
|
+
|
|
285
|
+
def fit(self, plot: bool = False):
|
|
286
|
+
"""
|
|
287
|
+
Run the interval estimation process for the fitted GDF.
|
|
288
|
+
|
|
289
|
+
This method performs adaptive interval scanning by extending the data with candidate values,
|
|
290
|
+
recomputing the GDF (if enabled), and tracking the variation of the central parameter Z0.
|
|
291
|
+
It then extracts the typical data interval and tolerance interval, checks the ordering constraint,
|
|
292
|
+
and updates the internal results and diagnostics.
|
|
293
|
+
|
|
294
|
+
Parameters
|
|
295
|
+
----------
|
|
296
|
+
plot : bool, optional (default=False)
|
|
297
|
+
If True, automatically plot the interval analysis results after fitting.
|
|
298
|
+
|
|
299
|
+
Raises
|
|
300
|
+
------
|
|
301
|
+
Exception
|
|
302
|
+
If the fitting process fails due to invalid arguments or internal errors.
|
|
303
|
+
|
|
304
|
+
Notes
|
|
305
|
+
-----
|
|
306
|
+
- For best results, ensure the GDF is already fitted to data before calling this method.
|
|
307
|
+
- The method updates the object's attributes with the estimated intervals and stores
|
|
308
|
+
diagnostics in the `params` attribute.
|
|
309
|
+
- If `flush=True` was set at initialization, intermediate data is cleared after fitting.
|
|
310
|
+
"""
|
|
311
|
+
self.logger.info("Starting fit process for DataIntervals...")
|
|
312
|
+
import time
|
|
313
|
+
start_time = time.time()
|
|
314
|
+
try:
|
|
315
|
+
self._argument_validation()
|
|
316
|
+
|
|
317
|
+
self.logger.info("Fit process started.")
|
|
318
|
+
self._reset_results()
|
|
319
|
+
|
|
320
|
+
# Scan intervals and extract boundaries
|
|
321
|
+
self._scan_intervals()
|
|
322
|
+
self._extract_intervals_with_ordering()
|
|
323
|
+
|
|
324
|
+
# Check ordering constraint
|
|
325
|
+
if not self.ordering_valid:
|
|
326
|
+
msg = ("Interval ordering constraint violated. "
|
|
327
|
+
"Try setting 'wedf=False', or setting 'gnostic_filter=True', or increasing 'n_points', or adjusting thresholds for sensitivity.")
|
|
328
|
+
self._add_warning(msg)
|
|
329
|
+
|
|
330
|
+
# std interval
|
|
331
|
+
self.LSD, self.USD= std(self.data, S=self.S_opt, z0_optimize=self.z0_optimize, data_form=self.gdf.data_form, tolerance=self.tolerance)
|
|
332
|
+
# Update parameters and optionally plot
|
|
333
|
+
self._update_params()
|
|
334
|
+
if plot:
|
|
335
|
+
self.logger.info("Plotting interval analysis results...")
|
|
336
|
+
self.plot()
|
|
337
|
+
self.plot_intervals()
|
|
338
|
+
|
|
339
|
+
# Optionally flush memory
|
|
340
|
+
if self.flush:
|
|
341
|
+
self.logger.info("Flushing memory...")
|
|
342
|
+
self._flush_memory()
|
|
343
|
+
|
|
344
|
+
elapsed = time.time() - start_time
|
|
345
|
+
self.logger.info(f"Fit process completed in {elapsed:.2f} seconds.")
|
|
346
|
+
self.logger.info(f"Ordering valid: {self.ordering_valid}")
|
|
347
|
+
self.logger.info(f"Tolerance interval: [{self.Z0L:.4f}, {self.Z0U:.4f}]")
|
|
348
|
+
self.logger.info(f"Typical data interval: [{self.ZL:.4f}, {self.ZU:.4f}]")
|
|
349
|
+
except Exception as e:
|
|
350
|
+
err_msg = f"Fit failed: {e}"
|
|
351
|
+
self._add_error(err_msg)
|
|
352
|
+
raise
|
|
353
|
+
|
|
354
|
+
def _scan_intervals(self):
|
|
355
|
+
self.logger.info("Scanning intervals by extending data...")
|
|
356
|
+
try:
|
|
357
|
+
self.logger.info("Scanning intervals...")
|
|
358
|
+
|
|
359
|
+
# Scan lower direction (Z0 -> LB)
|
|
360
|
+
lower_points = self._generate_search_points('lower')
|
|
361
|
+
self.logger.info(f" Starting lower scan: {len(lower_points)} points from Z0 → LB")
|
|
362
|
+
for i, datum in enumerate(lower_points, 1):
|
|
363
|
+
z0_val = self._compute_z0_with_extended_datum(datum)
|
|
364
|
+
self.search_results['datum'].append(datum)
|
|
365
|
+
self.search_results['z0'].append(z0_val)
|
|
366
|
+
self.search_results['success'].append(True)
|
|
367
|
+
if self.verbose and i % (self.n_points/10) == 0:
|
|
368
|
+
self.logger.info(f" Lower scan [{i}/{len(lower_points)}]: Datum={datum:.4f}, Z0={z0_val:.6f}")
|
|
369
|
+
if self._check_convergence():
|
|
370
|
+
if self.verbose:
|
|
371
|
+
self.logger.info(f" Early stopping in lower scan at datum={datum:.4f}")
|
|
372
|
+
return # stop scanning entirely if convergence is reached
|
|
373
|
+
|
|
374
|
+
# Scan upper direction (Z0 -> UB)
|
|
375
|
+
upper_points = self._generate_search_points('upper')
|
|
376
|
+
self.logger.info(f" Starting upper scan: {len(upper_points)} points from Z0 → UB")
|
|
377
|
+
for i, datum in enumerate(upper_points, 1):
|
|
378
|
+
z0_val = self._compute_z0_with_extended_datum(datum)
|
|
379
|
+
self.search_results['datum'].append(datum)
|
|
380
|
+
self.search_results['z0'].append(z0_val)
|
|
381
|
+
self.search_results['success'].append(True)
|
|
382
|
+
if self.verbose and i % 50 == 0:
|
|
383
|
+
self.logger.info(f" Upper scan [{i}/{len(upper_points)}]: Datum={datum:.4f}, Z0={z0_val:.6f}")
|
|
384
|
+
if self._check_convergence():
|
|
385
|
+
if self.verbose:
|
|
386
|
+
self.logger.info(f" Early stopping in upper scan at datum={datum:.4f}")
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
except Exception as e:
|
|
390
|
+
self._add_error(f"Scanning intervals failed: {e}")
|
|
391
|
+
return
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _generate_search_points(self, direction: str) -> np.ndarray:
|
|
395
|
+
self.logger.debug(f"Generating search points in {direction} direction...")
|
|
396
|
+
# Dense zone near Z0, sparse toward LB/UB
|
|
397
|
+
if direction == 'lower':
|
|
398
|
+
start, end = self.Z0, self.LB + self.boundary_margin_factor * (self.UB - self.LB)
|
|
399
|
+
else:
|
|
400
|
+
start, end = self.Z0, self.UB - self.boundary_margin_factor * (self.UB - self.LB)
|
|
401
|
+
dense_n = int(self.n_points * self.dense_points_fraction)
|
|
402
|
+
sparse_n = self.n_points - dense_n
|
|
403
|
+
dense_zone = self.dense_zone_fraction * abs(self.Z0 - end)
|
|
404
|
+
if direction == 'lower':
|
|
405
|
+
dense_end = self.Z0 - dense_zone
|
|
406
|
+
dense_points = np.linspace(self.Z0, dense_end, dense_n)
|
|
407
|
+
sparse_points = np.linspace(dense_end, end, sparse_n)
|
|
408
|
+
else:
|
|
409
|
+
dense_end = self.Z0 + dense_zone
|
|
410
|
+
dense_points = np.linspace(self.Z0, dense_end, dense_n)
|
|
411
|
+
sparse_points = np.linspace(dense_end, end, sparse_n)
|
|
412
|
+
return np.unique(np.concatenate([dense_points, sparse_points]))
|
|
413
|
+
|
|
414
|
+
def _compute_z0_with_extended_datum(self, datum: float) -> float:
|
|
415
|
+
self.logger.info(f"Computing Z0 with extended datum: {datum:.4f}")
|
|
416
|
+
# Extend data and fit new GDF, return z0
|
|
417
|
+
extended_data = np.append(self.data, datum)
|
|
418
|
+
gdf_type = type(self.gdf)
|
|
419
|
+
if self.gdf_recompute:
|
|
420
|
+
kwargs = {
|
|
421
|
+
'verbose': False,
|
|
422
|
+
'flush': True,
|
|
423
|
+
'opt_method': self.opt_method,
|
|
424
|
+
'n_points': self.n_points_gdf,
|
|
425
|
+
'wedf': self.wedf,
|
|
426
|
+
'homogeneous': self.homogeneous,
|
|
427
|
+
'z0_optimize': self.z0_optimize,
|
|
428
|
+
'max_data_size': self.max_data_size,
|
|
429
|
+
'tolerance': self.tolerance,
|
|
430
|
+
}
|
|
431
|
+
else:
|
|
432
|
+
kwargs = {
|
|
433
|
+
'LB': self.LB,
|
|
434
|
+
'UB': self.UB,
|
|
435
|
+
'S': self.S,
|
|
436
|
+
'verbose': False,
|
|
437
|
+
'flush': True,
|
|
438
|
+
'opt_method': self.opt_method,
|
|
439
|
+
'n_points': self.n_points_gdf,
|
|
440
|
+
'wedf': self.wedf,
|
|
441
|
+
'homogeneous': self.homogeneous,
|
|
442
|
+
'z0_optimize': self.z0_optimize,
|
|
443
|
+
'max_data_size': self.max_data_size,
|
|
444
|
+
'tolerance': self.tolerance,
|
|
445
|
+
}
|
|
446
|
+
gdf_new = gdf_type(**kwargs)
|
|
447
|
+
gdf_new.fit(data=extended_data, plot=False)
|
|
448
|
+
return float(gdf_new.z0)
|
|
449
|
+
|
|
450
|
+
def _check_convergence(self) -> bool:
|
|
451
|
+
self.logger.info("Checking convergence of Z0...")
|
|
452
|
+
z0s = np.array(self.search_results['z0'])
|
|
453
|
+
if len(z0s) < self.convergence_window + self.min_search_points:
|
|
454
|
+
return False
|
|
455
|
+
window = z0s[-self.convergence_window:]
|
|
456
|
+
if np.std(window) < self.convergence_threshold:
|
|
457
|
+
return True
|
|
458
|
+
return False
|
|
459
|
+
|
|
460
|
+
def _get_z0s_main_cluster(self, z0s: np.ndarray, datums: np.ndarray) -> np.ndarray:
|
|
461
|
+
self.logger.info("Extracting main Z0 cluster...")
|
|
462
|
+
try:
|
|
463
|
+
# 4 less data points - skip clustering
|
|
464
|
+
if len(z0s) <= 4 or len(datums) < 4:
|
|
465
|
+
self._add_warning("Insufficient data points for clustering. Returning all values.")
|
|
466
|
+
return z0s, datums
|
|
467
|
+
|
|
468
|
+
# Fit ELDF to z0s for clustering
|
|
469
|
+
self.logger.info("Fitting ELDF for clustering...")
|
|
470
|
+
eldf_cluster = ELDF(catch=False, wedf=False, verbose=False)
|
|
471
|
+
eldf_cluster.fit(z0s)
|
|
472
|
+
# Cluster boundaries
|
|
473
|
+
self.logger.info("Fitting DataCluster to identify main cluster...")
|
|
474
|
+
cluster = DataCluster(gdf=eldf_cluster, verbose=self.verbose)
|
|
475
|
+
clb, cub = cluster.fit()
|
|
476
|
+
|
|
477
|
+
# z0s within cluster boundaries
|
|
478
|
+
in_cluster_mask = (z0s >= clb) & (z0s <= cub)
|
|
479
|
+
if not np.any(in_cluster_mask):
|
|
480
|
+
self._add_warning("No Z0 values found within cluster boundaries. Returning all values.")
|
|
481
|
+
return z0s, datums
|
|
482
|
+
|
|
483
|
+
z0s_main = z0s[in_cluster_mask]
|
|
484
|
+
datums_main = datums[in_cluster_mask]
|
|
485
|
+
return z0s_main, datums_main
|
|
486
|
+
|
|
487
|
+
except Exception as e:
|
|
488
|
+
self._add_warning(f"Cluster-based Z0 extraction failed: {e}. Using all Z0 values.")
|
|
489
|
+
return np.array(self.search_results['z0']), np.array(self.search_results['datum'])
|
|
490
|
+
|
|
491
|
+
def _extract_intervals_with_ordering(self):
|
|
492
|
+
self.logger.info("Extracting intervals with ordering constraint...")
|
|
493
|
+
|
|
494
|
+
datums = np.array(self.search_results['datum'])
|
|
495
|
+
z0s = np.array(self.search_results['z0'])
|
|
496
|
+
|
|
497
|
+
if self.gnostic_filter:
|
|
498
|
+
self.logger.info("Applying gnostic filtering to Z0 values...")
|
|
499
|
+
# MG cluster
|
|
500
|
+
z0s, datums = self._get_z0s_main_cluster(z0s, datums)
|
|
501
|
+
|
|
502
|
+
# Smoothing
|
|
503
|
+
if len(z0s) > 11:
|
|
504
|
+
z0s_smooth = savgol_filter(z0s, 11, 3)
|
|
505
|
+
else:
|
|
506
|
+
z0s_smooth = z0s
|
|
507
|
+
|
|
508
|
+
# clean dict
|
|
509
|
+
self.search_results_clean = {
|
|
510
|
+
'datum': datums,
|
|
511
|
+
'z0': z0s_smooth
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
# Window
|
|
515
|
+
data_mean = np.mean(self.data)
|
|
516
|
+
data_std = np.std(self.data)
|
|
517
|
+
window_mask = (datums >= data_mean - 2 * data_std) & (datums <= data_mean + 2 * data_std)
|
|
518
|
+
datums_win = datums[window_mask]
|
|
519
|
+
z0s_win = z0s_smooth[window_mask]
|
|
520
|
+
if len(z0s_win) == 0:
|
|
521
|
+
datums_win = datums
|
|
522
|
+
z0s_win = z0s_smooth
|
|
523
|
+
|
|
524
|
+
# Find local minima/maxima with prominence
|
|
525
|
+
min_peaks, _ = find_peaks(-z0s_win, prominence=0.1)
|
|
526
|
+
max_peaks, _ = find_peaks(z0s_win, prominence=0.1)
|
|
527
|
+
# Fallback to global min/max if no peaks found
|
|
528
|
+
if len(min_peaks) > 0:
|
|
529
|
+
min_idx = min_peaks[np.argmin(z0s_win[min_peaks])]
|
|
530
|
+
else:
|
|
531
|
+
min_idx = np.argmin(z0s_win)
|
|
532
|
+
if len(max_peaks) > 0:
|
|
533
|
+
max_idx = max_peaks[np.argmax(z0s_win[max_peaks])]
|
|
534
|
+
else:
|
|
535
|
+
max_idx = np.argmax(z0s_win)
|
|
536
|
+
zl, z0l = datums_win[min_idx], z0s_win[min_idx]
|
|
537
|
+
zu, z0u = datums_win[max_idx], z0s_win[max_idx]
|
|
538
|
+
ordering_valid = (zl < z0l < self.Z0 < z0u < zu)
|
|
539
|
+
if ordering_valid:
|
|
540
|
+
self.ZL, self.Z0L, self.ZU, self.Z0U = zl, z0l, zu, z0u
|
|
541
|
+
self.ordering_valid = True
|
|
542
|
+
else:
|
|
543
|
+
self._find_valid_extrema_with_ordering(datums_win, z0s_win)
|
|
544
|
+
|
|
545
|
+
# is still invalid? then replace incorrect bounds with z0
|
|
546
|
+
if not self.ordering_valid:
|
|
547
|
+
if self.Z0 < self.Z0L:
|
|
548
|
+
self.Z0L = self.Z0
|
|
549
|
+
if self.Z0 > self.Z0U:
|
|
550
|
+
self.Z0U = self.Z0
|
|
551
|
+
if self.ZL > self.Z0L:
|
|
552
|
+
self.ZL = self.Z0L
|
|
553
|
+
if self.ZU < self.Z0U:
|
|
554
|
+
self.ZU = self.Z0U
|
|
555
|
+
|
|
556
|
+
self.logger.info("Adjusted bounds to enforce ordering constraint.")
|
|
557
|
+
self.tolerance_interval = self.Z0U - self.Z0L
|
|
558
|
+
self.typical_data_interval = self.ZU - self.ZL
|
|
559
|
+
|
|
560
|
+
def _find_valid_extrema_with_ordering(self, datums, z0s):
|
|
561
|
+
self.logger.info("Searching for valid extrema combinations to satisfy ordering constraint...")
|
|
562
|
+
# Try combinations to satisfy ordering constraint
|
|
563
|
+
lower_mask = datums < self.Z0
|
|
564
|
+
upper_mask = datums > self.Z0
|
|
565
|
+
lower_datum = datums[lower_mask]
|
|
566
|
+
lower_z0 = z0s[lower_mask]
|
|
567
|
+
upper_datum = datums[upper_mask]
|
|
568
|
+
upper_z0 = z0s[upper_mask]
|
|
569
|
+
n_candidates = min(5, len(lower_datum), len(upper_datum))
|
|
570
|
+
found = False
|
|
571
|
+
|
|
572
|
+
self.logger.info(f"Found {n_candidates} candidate pairs for extrema.")
|
|
573
|
+
for i in range(n_candidates):
|
|
574
|
+
zl, z0l = lower_datum[i], lower_z0[i]
|
|
575
|
+
zu, z0u = upper_datum[-(i+1)], upper_z0[-(i+1)]
|
|
576
|
+
if zl < z0l < self.Z0 < z0u < zu:
|
|
577
|
+
self.ZL, self.Z0L, self.ZU, self.Z0U = zl, z0l, zu, z0u
|
|
578
|
+
self.ordering_valid = True
|
|
579
|
+
found = True
|
|
580
|
+
break
|
|
581
|
+
|
|
582
|
+
self.logger.info(f"Valid extrema found: {found}")
|
|
583
|
+
if not found:
|
|
584
|
+
# Fallback: use initial extrema
|
|
585
|
+
min_idx = np.argmin(z0s)
|
|
586
|
+
max_idx = np.argmax(z0s)
|
|
587
|
+
self.ZL, self.Z0L, self.ZU, self.Z0U = datums[min_idx], z0s[min_idx], datums[max_idx], z0s[max_idx]
|
|
588
|
+
self.ordering_valid = False
|
|
589
|
+
|
|
590
|
+
self.logger.info(f"Ordering constraint {'satisfied' if self.ordering_valid else 'NOT satisfied'}.")
|
|
591
|
+
|
|
592
|
+
def _update_params(self):
|
|
593
|
+
self.logger.info("Updating parameters with results...")
|
|
594
|
+
self.params.update({
|
|
595
|
+
'LB': self.LB,
|
|
596
|
+
'LSB': self.LSB,
|
|
597
|
+
'DLB': self.DLB,
|
|
598
|
+
'LCB': self.LCB,
|
|
599
|
+
'LSD': self.LSD,
|
|
600
|
+
'ZL': self.ZL,
|
|
601
|
+
'Z0L': self.Z0L,
|
|
602
|
+
'Z0': self.Z0,
|
|
603
|
+
'Z0U': self.Z0U,
|
|
604
|
+
'ZU': self.ZU,
|
|
605
|
+
'USD': self.USD,
|
|
606
|
+
'UCB': self.UCB,
|
|
607
|
+
'DUB': self.DUB,
|
|
608
|
+
'USB': self.USB,
|
|
609
|
+
'UB': self.UB,
|
|
610
|
+
'tolerance_interval': self.tolerance_interval,
|
|
611
|
+
'typical_data_interval': self.typical_data_interval,
|
|
612
|
+
'ordering_valid': self.ordering_valid,
|
|
613
|
+
'search_points': len(self.search_results['datum'])
|
|
614
|
+
})
|
|
615
|
+
self.logger.info(f"""Results updated.
|
|
616
|
+
Tolerance interval: [{self.Z0L:.4f}, {self.Z0U:.4f}],
|
|
617
|
+
Typical data interval: [{self.ZL:.4f}, {self.ZU:.4f}]
|
|
618
|
+
Ordering valid: {self.ordering_valid}""")
|
|
619
|
+
|
|
620
|
+
def results(self) -> Dict:
|
|
621
|
+
"""
|
|
622
|
+
Return a dictionary of estimated interval results and bounds.
|
|
623
|
+
|
|
624
|
+
Returns
|
|
625
|
+
-------
|
|
626
|
+
results : dict
|
|
627
|
+
A dictionary containing the following keys (values may be None if not available):
|
|
628
|
+
- 'LB', 'LSB', 'DLB', 'LCB': Lower bounds (various types, if available)
|
|
629
|
+
- 'ZL': Lower bound of the typical data interval
|
|
630
|
+
- 'Z0L': Lower bound of the tolerance interval (Z0-based)
|
|
631
|
+
- 'Z0': Central value (Z0) of the original GDF
|
|
632
|
+
- 'Z0U': Upper bound of the tolerance interval (Z0-based)
|
|
633
|
+
- 'ZU': Upper bound of the typical data interval
|
|
634
|
+
- 'UCB', 'DUB', 'USB', 'UB': Upper bounds (various types, if available)
|
|
635
|
+
|
|
636
|
+
Example
|
|
637
|
+
-------
|
|
638
|
+
>>> intervals = di.results()
|
|
639
|
+
>>> print(intervals['Z0L'], intervals['Z0U'])
|
|
640
|
+
"""
|
|
641
|
+
self.logger.info("Retrieving results dictionary...")
|
|
642
|
+
results = {
|
|
643
|
+
'LB': float(self.LB) if self.LB is not None else None,
|
|
644
|
+
'LSB': float(self.LSB) if self.LSB is not None else None,
|
|
645
|
+
'DLB': float(self.DLB) if self.DLB is not None else None,
|
|
646
|
+
'LCB': float(self.LCB) if self.LCB is not None else None,
|
|
647
|
+
'LSD': float(self.LSD) if self.LSD is not None else None,
|
|
648
|
+
'ZL': float(self.ZL) if self.ZL is not None else None,
|
|
649
|
+
'Z0L': float(self.Z0L) if self.Z0L is not None else None,
|
|
650
|
+
'Z0': float(self.Z0) if self.Z0 is not None else None,
|
|
651
|
+
'Z0U': float(self.Z0U) if self.Z0U is not None else None,
|
|
652
|
+
'ZU': float(self.ZU) if self.ZU is not None else None,
|
|
653
|
+
'USD': float(self.USD) if self.USD is not None else None,
|
|
654
|
+
'UCB': float(self.UCB) if self.UCB is not None else None,
|
|
655
|
+
'DUB': float(self.DUB) if self.DUB is not None else None,
|
|
656
|
+
'USB': float(self.USB) if self.USB is not None else None,
|
|
657
|
+
'UB': float(self.UB) if self.UB is not None else None
|
|
658
|
+
}
|
|
659
|
+
return results
|
|
660
|
+
|
|
661
|
+
def plot_intervals(self, figsize=(12, 8)):
|
|
662
|
+
"""
|
|
663
|
+
Plot the Z0 variation and estimated intervals.
|
|
664
|
+
|
|
665
|
+
This method visualizes how the central parameter Z0 changes as the data is extended,
|
|
666
|
+
and marks the estimated typical data interval and tolerance interval on the plot.
|
|
667
|
+
It also indicates whether the ordering constraint is satisfied.
|
|
668
|
+
|
|
669
|
+
Parameters
|
|
670
|
+
----------
|
|
671
|
+
figsize : tuple, optional (default=(12, 8))
|
|
672
|
+
Size of the matplotlib figure.
|
|
673
|
+
|
|
674
|
+
Notes
|
|
675
|
+
-----
|
|
676
|
+
- The plot shows the Z0 trajectory, interval boundaries, and highlights the ordering constraint status.
|
|
677
|
+
- Useful for diagnostics and for understanding the robustness of the interval estimation.
|
|
678
|
+
"""
|
|
679
|
+
self.logger.info("Plotting Z0 variation and intervals...")
|
|
680
|
+
|
|
681
|
+
import matplotlib.pyplot as plt
|
|
682
|
+
datums = np.array(self.search_results_clean['datum'])
|
|
683
|
+
z0s = np.array(self.search_results_clean['z0'])
|
|
684
|
+
fig, ax = plt.subplots(1, 1, figsize=figsize)
|
|
685
|
+
sort_idx = np.argsort(datums)
|
|
686
|
+
ax.scatter(datums[sort_idx], z0s[sort_idx], color='k', alpha=0.5, linewidth=1, label='Z0 Variation')
|
|
687
|
+
ax.plot(datums[sort_idx], z0s[sort_idx], color='k', alpha=0.5, linewidth=1)
|
|
688
|
+
ax.scatter([self.ZL], [self.Z0L], marker='v', s=120, color='purple', edgecolor='black', zorder=10, label=f'ZL,Z0L ({self.ZL:.4f},{self.Z0L:.4f})')
|
|
689
|
+
ax.scatter([self.Z0], [self.Z0], marker='s', s=120, color='green', edgecolor='black', zorder=10, label=f'Z0 ({self.Z0:.4f})')
|
|
690
|
+
ax.scatter([self.ZU], [self.Z0U], marker='^', s=120, color='orange', edgecolor='black', zorder=10, label=f'Z0U,ZU ({self.Z0U:.4f},{self.ZU:.4f})')
|
|
691
|
+
ax.axvline(x=self.ZL, color='purple', linestyle='--', alpha=1, linewidth=1)
|
|
692
|
+
ax.axvline(x=self.Z0, color='green', linestyle='-', alpha=1, linewidth=2)
|
|
693
|
+
ax.axvline(x=self.ZU, color='orange', linestyle='--', alpha=1, linewidth=1)
|
|
694
|
+
ax.axhline(y=self.Z0L, color='purple', linestyle=':', alpha=1, linewidth=1)
|
|
695
|
+
ax.axhline(y=self.Z0U, color='orange', linestyle=':', alpha=1, linewidth=1)
|
|
696
|
+
ordering_status = "✓ VALID" if self.ordering_valid else "✗ INVALID"
|
|
697
|
+
tol_interval_str = f"Tolerance Interval: [{self.Z0L:.4f}, {self.Z0U:.4f}]"
|
|
698
|
+
typ_interval_str = f"Typical Data Interval: [{self.ZL:.4f}, {self.ZU:.4f}]"
|
|
699
|
+
ordering_str = f"Ordering Constraint: {ordering_status}"
|
|
700
|
+
ax.plot([], [], ' ', label=tol_interval_str)
|
|
701
|
+
ax.plot([], [], ' ', label=typ_interval_str)
|
|
702
|
+
ax.plot([], [], ' ', label=ordering_str)
|
|
703
|
+
pad = (self.Z0U - self.Z0L) * 0.1
|
|
704
|
+
z0_min, z0_max = self.Z0L - pad, self.Z0U + pad
|
|
705
|
+
ax.set_ylim(z0_min, z0_max)
|
|
706
|
+
ax.set_xlabel('Datum Value', fontsize=12, fontweight='bold')
|
|
707
|
+
ax.set_ylabel('Z0 Value', fontsize=12, fontweight='bold')
|
|
708
|
+
title = 'Z0-Based Interval Estimation'
|
|
709
|
+
if not self.ordering_valid:
|
|
710
|
+
title += ' - ⚠ Ordering Constraint Violated'
|
|
711
|
+
ax.set_title(title, fontsize=12)
|
|
712
|
+
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
|
|
713
|
+
ax.grid(True, alpha=0.3)
|
|
714
|
+
plt.tight_layout()
|
|
715
|
+
plt.show()
|
|
716
|
+
|
|
717
|
+
#log summary
|
|
718
|
+
self.logger.info(f"\nZ0 Variation Plot Summary:")
|
|
719
|
+
self.logger.info(f" Typical data interval: [{self.ZL:.6f}, {self.ZU:.6f}] (width: {self.ZU - self.ZL:.6f})")
|
|
720
|
+
self.logger.info(f" Tolerance interval: [{self.Z0L:.6f}, {self.Z0U:.6f}] (width: {self.Z0U - self.Z0L:.6f})")
|
|
721
|
+
self.logger.info(f" Ordering constraint: {'✓ SATISFIED' if self.ordering_valid else '✗ VIOLATED'}")
|
|
722
|
+
|
|
723
|
+
def plot(self, figsize=(12, 8)):
|
|
724
|
+
"""
|
|
725
|
+
Plot the GDF, PDF, and estimated intervals on the data domain.
|
|
726
|
+
|
|
727
|
+
This method visualizes the fitted GDF curve, the probability density function (if available),
|
|
728
|
+
and overlays the estimated typical data interval and tolerance interval.
|
|
729
|
+
It also marks the original data points and key interval boundaries.
|
|
730
|
+
|
|
731
|
+
Parameters
|
|
732
|
+
----------
|
|
733
|
+
figsize : tuple, optional (default=(12, 8))
|
|
734
|
+
Size of the matplotlib figure.
|
|
735
|
+
|
|
736
|
+
Notes
|
|
737
|
+
-----
|
|
738
|
+
- The plot provides a comprehensive view of the data, the fitted distribution, and the intervals.
|
|
739
|
+
- Useful for reporting and for visually assessing the coverage and validity of the intervals.
|
|
740
|
+
"""
|
|
741
|
+
self.logger.info("Plotting GDF, PDF, and intervals...")
|
|
742
|
+
|
|
743
|
+
import matplotlib.pyplot as plt
|
|
744
|
+
x_points = np.array(self.data)
|
|
745
|
+
x_min, x_max = np.min(x_points), np.max(x_points)
|
|
746
|
+
x_pad = (x_max - x_min) * 0.05
|
|
747
|
+
x_min -= x_pad
|
|
748
|
+
x_max += x_pad
|
|
749
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
750
|
+
ax2 = ax1.twinx()
|
|
751
|
+
|
|
752
|
+
# gdf points
|
|
753
|
+
gdf_points = f"{self.gdf_name.lower()}_points"
|
|
754
|
+
# ELDF curve (if available)
|
|
755
|
+
gdf_vals = getattr(self.gdf, gdf_points, None)
|
|
756
|
+
smooth_x = getattr(self.gdf, 'di_points_n', None)
|
|
757
|
+
if gdf_vals is not None and smooth_x is not None:
|
|
758
|
+
ax1.plot(smooth_x, gdf_vals, '-', color='blue', linewidth=2.5, alpha=0.9, label=self.gdf_name)
|
|
759
|
+
else:
|
|
760
|
+
ax1.plot(x_points, [self.Z0]*len(x_points), 'o', color='blue', label=self.gdf_name, markersize=4, alpha=0.7)
|
|
761
|
+
# PDF curve (if available)
|
|
762
|
+
pdf_vals = getattr(self.gdf, 'pdf_points', None)
|
|
763
|
+
if pdf_vals is not None and smooth_x is not None:
|
|
764
|
+
ax2.plot(smooth_x, pdf_vals, '-', color='red', linewidth=2.5, alpha=0.9, label='PDF')
|
|
765
|
+
max_pdf = np.max(pdf_vals)
|
|
766
|
+
elif pdf_vals is not None:
|
|
767
|
+
ax2.plot(x_points, pdf_vals, 'o', color='red', label='PDF', markersize=4, alpha=0.7)
|
|
768
|
+
max_pdf = np.max(pdf_vals)
|
|
769
|
+
else:
|
|
770
|
+
max_pdf = 1.0
|
|
771
|
+
|
|
772
|
+
# Typical Data Interval (ZL to ZU)
|
|
773
|
+
ax1.axvspan(self.ZL, self.ZU, alpha=0.2, color='lightblue', label=f'Typical Data Interval \n[ZL: {self.ZL:.3f}, ZU: {self.ZU:.3f}]')
|
|
774
|
+
# Tolerance Interval (Z0L to Z0U)
|
|
775
|
+
ax1.axvspan(self.Z0L, self.Z0U, alpha=0.20, color='lightgreen', label=f'Tolerance Interval \n[Z0L: {self.Z0L:.3f}, Z0U: {self.Z0U:.3f}]')
|
|
776
|
+
|
|
777
|
+
# Critical vertical lines
|
|
778
|
+
ax1.axvline(x=self.ZL, color='orange', linestyle='-.', linewidth=2, alpha=0.8, label=f'ZL={self.ZL:.3f}')
|
|
779
|
+
ax1.axvline(x=self.Z0, color='magenta', linestyle='-.', linewidth=1, alpha=0.9, label=f'Z0={self.Z0:.3f}')
|
|
780
|
+
ax1.axvline(x=self.ZU, color='orange', linestyle='--', linewidth=2, alpha=0.8, label=f'ZU={self.ZU:.3f}')
|
|
781
|
+
ax1.axvline(x=self.Z0L, color='grey', linestyle='-', linewidth=1.5, alpha=0.7, zorder=0)
|
|
782
|
+
ax1.axvline(x=self.Z0U, color='grey', linestyle='-', linewidth=1.5, alpha=0.7, zorder=0)
|
|
783
|
+
# Data bounds
|
|
784
|
+
if self.LB is not None:
|
|
785
|
+
ax1.axvline(x=self.gdf.LB, color='purple', linestyle='-.', linewidth=1, alpha=1, label=f'LB={self.gdf.LB:.3f}')
|
|
786
|
+
if self.UB is not None:
|
|
787
|
+
ax1.axvline(x=self.gdf.UB, color='purple', linestyle='--', linewidth=1, alpha=1, label=f'UB={self.gdf.UB:.3f}')
|
|
788
|
+
# DLB and DUB bounds
|
|
789
|
+
if self.DLB is not None:
|
|
790
|
+
ax1.axvline(x=self.gdf.DLB, color='brown', linestyle='-.', linewidth=1.5, alpha=1, label=f'DLB={self.gdf.LB:.3f}')
|
|
791
|
+
if self.DUB is not None:
|
|
792
|
+
ax1.axvline(x=self.gdf.DUB, color='brown', linestyle='--', linewidth=1.5, alpha=1, label=f'DUB={self.gdf.DUB:.3f}')
|
|
793
|
+
# LSB and USB bounds
|
|
794
|
+
if self.LSB is not None:
|
|
795
|
+
ax1.axvline(x=self.gdf.LSB, color='red', linestyle='-.', linewidth=1, alpha=1, label=f'LSB={self.gdf.LSB:.3f}')
|
|
796
|
+
if self.USB is not None:
|
|
797
|
+
ax1.axvline(x=self.gdf.USB, color='red', linestyle='--', linewidth=1, alpha=1, label=f'USB={self.gdf.USB:.3f}')
|
|
798
|
+
# LCB and UCB bounds
|
|
799
|
+
if self.LCB is not None:
|
|
800
|
+
ax1.axvline(x=self.gdf.LCB, color='blue', linestyle='-', linewidth=1, alpha=1, label=f'LCB={self.gdf.LCB:.3f}')
|
|
801
|
+
if self.UCB is not None:
|
|
802
|
+
ax1.axvline(x=self.gdf.UCB, color='blue', linestyle='--', linewidth=1, alpha=1, label=f'UCB={self.gdf.UCB:.3f}')
|
|
803
|
+
# LSD and USD bounds
|
|
804
|
+
if self.LSD is not None:
|
|
805
|
+
ax1.axvline(x=self.LSD, color='cyan', linestyle='-.', linewidth=1, alpha=1, label=f'LSD={self.LSD:.3f}')
|
|
806
|
+
if self.USD is not None:
|
|
807
|
+
ax1.axvline(x=self.USD, color='cyan', linestyle='--', linewidth=1, alpha=1, label=f'USD={self.USD:.3f}')
|
|
808
|
+
# Rug plot for original data
|
|
809
|
+
data_y_pos = -0.05
|
|
810
|
+
ax1.scatter(x_points, [data_y_pos] * len(x_points), alpha=0.6, s=15, color='black', marker='|')
|
|
811
|
+
ax1.set_xlabel('Data Values', fontsize=12, fontweight='bold')
|
|
812
|
+
ax1.set_ylabel(f'{self.gdf_name} Value', fontsize=12, fontweight='bold', color='blue')
|
|
813
|
+
ax1.tick_params(axis='y', labelcolor='blue')
|
|
814
|
+
ax1.set_ylim(-0.1, 1.05)
|
|
815
|
+
ax1.set_xlim(x_min, x_max)
|
|
816
|
+
ax2.set_ylabel('PDF Value', fontsize=12, fontweight='bold', color='red')
|
|
817
|
+
ax2.tick_params(axis='y', labelcolor='red')
|
|
818
|
+
ax2.set_ylim(0, max_pdf * 1.1)
|
|
819
|
+
ax2.set_xlim(x_min, x_max)
|
|
820
|
+
ax1.grid(True, alpha=0.3)
|
|
821
|
+
title_text = f'{self.gdf_name} Interval Analysis (Z0 = {self.Z0:.3f})'
|
|
822
|
+
ax1.set_title(title_text, fontsize=12)
|
|
823
|
+
handles, labels = ax1.get_legend_handles_labels()
|
|
824
|
+
ax1.legend(handles, labels, loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=10, borderaxespad=0)
|
|
825
|
+
plt.tight_layout()
|
|
826
|
+
plt.show()
|
|
827
|
+
|
|
828
|
+
self.logger.info(f"\n{self.gdf_name} Interval Analysis Plot Summary:")
|
|
829
|
+
self.logger.info(f" Z0 (Gnostic Mode): {self.Z0:.4f}")
|
|
830
|
+
self.logger.info(f" Tolerance interval: [{self.Z0L:.4f}, {self.Z0U:.4f}] (width: {self.Z0U - self.Z0L:.4f})")
|
|
831
|
+
self.logger.info(f" Typical data interval: [{self.ZL:.4f}, {self.ZU:.4f}] (width: {self.ZU - self.ZL:.4f})")
|
|
832
|
+
data_in_tolerance = np.sum((x_points >= self.Z0L) & (x_points <= self.Z0U))
|
|
833
|
+
self.logger.info(f" Data coverage - Tolerance: {data_in_tolerance}/{len(x_points)} ({data_in_tolerance/len(x_points):.1%})")
|
|
834
|
+
data_in_typical = np.sum((x_points >= self.ZL) & (x_points <= self.ZU))
|
|
835
|
+
self.logger.info(f" Data coverage - Typical: {data_in_typical}/{len(x_points)} ({data_in_typical/len(x_points):.1%})")
|
|
836
|
+
self.logger.info(f" Total data points: {len(x_points)}")
|
|
837
|
+
self.logger.info(f" Data range: [{np.min(x_points):.4f}, {np.max(x_points):.4f}]")
|
|
838
|
+
|
|
839
|
+
def _flush_memory(self):
|
|
840
|
+
if self.flush:
|
|
841
|
+
self.search_results = {'datum': [], 'z0': [], 'success': []}
|
|
842
|
+
self.logger.info("Flushed data to free memory.")
|
|
843
|
+
|
|
844
|
+
def __repr__(self):
|
|
845
|
+
return (f"DataIntervals(gdf={self.gdf_name}, n_points={self.n_points}, "
|
|
846
|
+
f"dense_zone_fraction={self.dense_zone_fraction}, "
|
|
847
|
+
f"dense_points_fraction={self.dense_points_fraction}, "
|
|
848
|
+
f"convergence_window={self.convergence_window}, "
|
|
849
|
+
f"convergence_threshold={self.convergence_threshold}, "
|
|
850
|
+
f"min_search_points={self.min_search_points}, "
|
|
851
|
+
f"boundary_margin_factor={self.boundary_margin_factor}, "
|
|
852
|
+
f"extrema_search_tolerance={self.extrema_search_tolerance}, "
|
|
853
|
+
f"verbose={self.verbose}, flush={self.flush})")
|