machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ELDF Interval Analysis Module
|
|
3
|
+
|
|
4
|
+
Estimating Local Marginal Analysis
|
|
5
|
+
|
|
6
|
+
Author: Nirmal Parmar
|
|
7
|
+
Machine Gnostics
|
|
8
|
+
'''
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from machinegnostics.magcal.gdf.base_el_intv import BaseIntervalAnalysisELDF
|
|
12
|
+
|
|
13
|
+
class IntervalAnalysisELDF(BaseIntervalAnalysisELDF):
|
|
14
|
+
"""
|
|
15
|
+
Interval Analysis class for Estimating Local Distribution Functions (ELDF) with advanced extrema detection capabilities.
|
|
16
|
+
|
|
17
|
+
This class performs comprehensive interval analysis on data samples using local distribution function estimation methods
|
|
18
|
+
to identify critical intervals and extrema bounds that characterize the underlying data distribution. It extends the
|
|
19
|
+
base ELDF functionality with specialized algorithms for extrema detection and robust interval boundary analysis using
|
|
20
|
+
local distribution estimation approaches.
|
|
21
|
+
|
|
22
|
+
### Key Features:
|
|
23
|
+
|
|
24
|
+
**ELDF Interval Analysis - Extrema Detection:**
|
|
25
|
+
|
|
26
|
+
1. **Tolerance Interval - Location Parameter Extrema (Z0L, Z0U)**:
|
|
27
|
+
Extrema bounds for the location parameter Z̃0 under local distribution function optimization.
|
|
28
|
+
These bounds represent the minimum and maximum values the location parameter can achieve
|
|
29
|
+
when introducing data points within the specified range. The bounds demonstrate
|
|
30
|
+
the sensitivity and robustness of the local distribution function estimator.
|
|
31
|
+
|
|
32
|
+
2. **Typical Data Interval - Extrema Search Bounds (ZL, ZU)**:
|
|
33
|
+
The interval bounds where extrema search is performed for new data points. Within this
|
|
34
|
+
interval, the algorithm searches for data values that produce minimum/maximum location
|
|
35
|
+
parameter estimates. This interval defines the search space for extrema detection.
|
|
36
|
+
|
|
37
|
+
3. **Local Distribution Location Parameter (Z0)**:
|
|
38
|
+
The local distribution function estimate of the location parameter from the original data at maximum probability distribution.
|
|
39
|
+
This serves as the baseline for comparison with extrema bounds and represents the
|
|
40
|
+
optimal location estimate under the current data configuration using local distribution methods.
|
|
41
|
+
|
|
42
|
+
### Use Cases:
|
|
43
|
+
|
|
44
|
+
- **Sensitivity Analysis**: Understanding how new data points affect parameter estimates
|
|
45
|
+
- **Robustness Assessment**: Evaluating stability of local distribution function estimators
|
|
46
|
+
- **Uncertainty Quantification**: Defining parameter uncertainty through extrema bounds
|
|
47
|
+
- **Outlier Impact Analysis**: Measuring potential impact of extreme observations
|
|
48
|
+
- **Local Confidence Intervals**: Distribution-based alternative to bootstrap confidence intervals
|
|
49
|
+
- **Risk Assessment**: Quantifying parameter estimation risk under data variations
|
|
50
|
+
- **Quality Control**: Establishing distribution-based control limits for processes
|
|
51
|
+
|
|
52
|
+
### Attributes:
|
|
53
|
+
|
|
54
|
+
data : np.ndarray
|
|
55
|
+
Input data array (1-dimensional) for interval analysis. Must be a 1D numpy array
|
|
56
|
+
containing numerical values. Empty arrays or arrays with all NaN values
|
|
57
|
+
will raise an error.
|
|
58
|
+
|
|
59
|
+
estimate_cluster_bounds : bool, default=True
|
|
60
|
+
Whether to estimate cluster bounds during interval analysis. When True,
|
|
61
|
+
performs clustering analysis to identify main data groupings and fits
|
|
62
|
+
ELDF specifically to the main cluster for more robust extrema detection.
|
|
63
|
+
|
|
64
|
+
get_clusters : bool, default=True
|
|
65
|
+
Whether to perform cluster analysis during interval detection. When True,
|
|
66
|
+
enables cluster-based interval identification and main cluster extraction
|
|
67
|
+
for heterogeneous data handling. Essential for robust analysis of mixed datasets.
|
|
68
|
+
|
|
69
|
+
n_points_per_direction : int, default=1000
|
|
70
|
+
Number of search points per direction for extrema detection. Higher values
|
|
71
|
+
provide more precise extrema bounds but require more computation. Controls
|
|
72
|
+
the granularity of the search grid used by the interval engine.
|
|
73
|
+
|
|
74
|
+
dense_zone_fraction : float, default=0.4
|
|
75
|
+
Fraction of search space allocated to dense sampling around critical regions.
|
|
76
|
+
Controls the proportion of search points concentrated in areas of rapid
|
|
77
|
+
distribution function change. Range typically 0.2 to 0.8.
|
|
78
|
+
|
|
79
|
+
dense_points_fraction : float, default=0.7
|
|
80
|
+
Fraction of total search points allocated to dense zones. Higher values
|
|
81
|
+
provide better resolution in critical regions but may miss global extrema.
|
|
82
|
+
Must be between 0.1 and 0.9.
|
|
83
|
+
|
|
84
|
+
convergence_window : int, default=15
|
|
85
|
+
Number of consecutive search steps used for convergence assessment.
|
|
86
|
+
Larger windows provide more stable convergence detection but may slow
|
|
87
|
+
down the search process. Must be positive integer.
|
|
88
|
+
|
|
89
|
+
convergence_threshold : float, default=1e-7
|
|
90
|
+
Threshold for convergence detection in extrema search. Smaller values
|
|
91
|
+
provide more precise extrema bounds but may require more iterations.
|
|
92
|
+
Specific to interval engine optimization processes.
|
|
93
|
+
|
|
94
|
+
min_search_points : int, default=30
|
|
95
|
+
Minimum number of search points required for reliable extrema detection.
|
|
96
|
+
Safety parameter to ensure adequate sampling density for convergence.
|
|
97
|
+
Must be positive integer, typically 20-100.
|
|
98
|
+
|
|
99
|
+
boundary_margin_factor : float, default=0.001
|
|
100
|
+
Margin factor for boundary detection to avoid numerical edge effects.
|
|
101
|
+
Controls the buffer zone around detected boundaries. Smaller values
|
|
102
|
+
provide tighter bounds but may be sensitive to numerical precision.
|
|
103
|
+
|
|
104
|
+
extrema_search_tolerance : float, default=1e-6
|
|
105
|
+
Numerical tolerance for extrema search convergence criteria.
|
|
106
|
+
Smaller values lead to more precise extrema detection but may require
|
|
107
|
+
more iterations. Specific to local distribution optimization convergence.
|
|
108
|
+
|
|
109
|
+
early_stopping_steps : int, default=10
|
|
110
|
+
Number of consecutive steps without improvement before stopping optimization.
|
|
111
|
+
Prevents infinite loops and improves efficiency during extrema detection.
|
|
112
|
+
|
|
113
|
+
cluster_threshold : float, default=0.05
|
|
114
|
+
Threshold for PDF-based cluster detection as fraction of maximum PDF value.
|
|
115
|
+
Lower values detect more subtle clusters. Range typically 0.01 to 0.2.
|
|
116
|
+
Used when get_clusters=True for data heterogeneity analysis.
|
|
117
|
+
|
|
118
|
+
DLB : float, optional
|
|
119
|
+
Data Lower Bound - the absolute minimum value that the data can theoretically take.
|
|
120
|
+
If None, will be inferred from data minimum. Manual override for distribution lower bound.
|
|
121
|
+
|
|
122
|
+
DUB : float, optional
|
|
123
|
+
Data Upper Bound - the absolute maximum value that the data can theoretically take.
|
|
124
|
+
If None, will be inferred from data maximum. Manual override for distribution upper bound.
|
|
125
|
+
|
|
126
|
+
LB : float, optional
|
|
127
|
+
Lower Probable Bound - the practical lower limit for interval analysis.
|
|
128
|
+
Manual override for ELDF lower bound used in extrema computations.
|
|
129
|
+
|
|
130
|
+
UB : float, optional
|
|
131
|
+
Upper Probable Bound - the practical upper limit for interval analysis.
|
|
132
|
+
Manual override for ELDF upper bound used in extrema computations.
|
|
133
|
+
|
|
134
|
+
S : float or 'auto', default='auto'
|
|
135
|
+
Scale parameter for the local distribution function. If 'auto' (default),
|
|
136
|
+
the scale will be automatically estimated from the data during fitting.
|
|
137
|
+
Affects extrema detection sensitivity and distribution optimization.
|
|
138
|
+
|
|
139
|
+
varS : bool, default=False
|
|
140
|
+
Whether to use variable scale parameter estimation. When True, allows
|
|
141
|
+
the scale parameter to vary during distribution optimization, potentially
|
|
142
|
+
improving fit quality but increasing computational complexity.
|
|
143
|
+
|
|
144
|
+
z0_optimize : bool, default=True
|
|
145
|
+
Whether to optimize the location parameter during ELDF fitting.
|
|
146
|
+
When True, finds the optimal estimate of the location parameter using
|
|
147
|
+
local distribution methods. Should typically remain True for proper extrema analysis.
|
|
148
|
+
|
|
149
|
+
tolerance : float, default=1e-6
|
|
150
|
+
Numerical tolerance for convergence criteria in ELDF fitting algorithms.
|
|
151
|
+
Smaller values lead to more precise parameter estimates but may require
|
|
152
|
+
more iterations. Affects both initial fitting and extrema optimization.
|
|
153
|
+
|
|
154
|
+
data_form : str, default='a'
|
|
155
|
+
Form of data processing for interval analysis. Options are:
|
|
156
|
+
- 'a': Additive form (default) - processes data linearly
|
|
157
|
+
- 'm': Multiplicative form - applies log transformation for better handling
|
|
158
|
+
of multiplicative processes in distribution estimation
|
|
159
|
+
|
|
160
|
+
n_points : int, default=1000
|
|
161
|
+
Number of points to generate for ELDF curve evaluation and visualization.
|
|
162
|
+
Higher values provide smoother curves and more precise interval boundaries
|
|
163
|
+
but require more computation. Must be positive integer.
|
|
164
|
+
|
|
165
|
+
homogeneous : bool, default=True
|
|
166
|
+
Whether to assume data homogeneity during interval analysis. When False,
|
|
167
|
+
enables clustering analysis for heterogeneous data handling. Affects
|
|
168
|
+
extrema detection strategy and cluster-based fitting decisions.
|
|
169
|
+
|
|
170
|
+
catch : bool, default=True
|
|
171
|
+
Whether to enable error catching and provide detailed interval analysis results.
|
|
172
|
+
Setting to True (default) allows access to detailed results and interval plotting
|
|
173
|
+
but uses more memory. Required for interval plotting and parameter access.
|
|
174
|
+
|
|
175
|
+
weights : np.ndarray, optional
|
|
176
|
+
Sample weights for weighted local distribution function analysis. Must be the same
|
|
177
|
+
length as data array. If None, uniform weights are used. Affects distribution
|
|
178
|
+
computation and extrema detection priorities.
|
|
179
|
+
|
|
180
|
+
wedf : bool, default=True
|
|
181
|
+
Whether to compute Weighted Estimating Distribution Function (WEDF) for interval analysis.
|
|
182
|
+
When True, incorporates weights into distribution computations and extrema detection.
|
|
183
|
+
|
|
184
|
+
opt_method : str, default='L-BFGS-B'
|
|
185
|
+
Optimization method for distribution optimization and extrema detection.
|
|
186
|
+
Default is 'L-BFGS-B' which handles bounded optimization well.
|
|
187
|
+
Must be a valid scipy.optimize method that supports bounds constraints.
|
|
188
|
+
|
|
189
|
+
verbose : bool, default=False
|
|
190
|
+
Whether to print detailed progress information during interval analysis.
|
|
191
|
+
When True, provides diagnostic output about optimization progress,
|
|
192
|
+
convergence status, and extrema detection results.
|
|
193
|
+
|
|
194
|
+
max_data_size : int, default=1000
|
|
195
|
+
Maximum data size for interval processing. Safety limit to prevent excessive
|
|
196
|
+
memory usage during extrema detection and distribution optimization.
|
|
197
|
+
Large datasets are automatically subsampled.
|
|
198
|
+
|
|
199
|
+
flush : bool, default=True
|
|
200
|
+
Whether to flush output streams for real-time progress display during
|
|
201
|
+
extrema detection. May affect memory usage and computation speed during
|
|
202
|
+
intensive distribution optimization processes.
|
|
203
|
+
|
|
204
|
+
### Examples
|
|
205
|
+
|
|
206
|
+
Basic ELDF interval analysis:
|
|
207
|
+
|
|
208
|
+
>>> import numpy as np
|
|
209
|
+
>>> from machinegnostics.magcal import IntervalAnalysisELDF
|
|
210
|
+
>>>
|
|
211
|
+
>>> # Sample data for extrema analysis
|
|
212
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
213
|
+
>>>
|
|
214
|
+
>>> # Perform ELDF interval analysis
|
|
215
|
+
>>> ia = IntervalAnalysisELDF(
|
|
216
|
+
... data=data,
|
|
217
|
+
... n_points_per_direction=2000,
|
|
218
|
+
... convergence_threshold=1e-8,
|
|
219
|
+
... verbose=True,
|
|
220
|
+
... get_clusters=True
|
|
221
|
+
... )
|
|
222
|
+
>>> ia.fit()
|
|
223
|
+
>>>
|
|
224
|
+
>>> # Get detected extrema and intervals
|
|
225
|
+
>>> intervals = ia.get_intervals(decimals=4)
|
|
226
|
+
>>> print("Location parameter extrema:", (intervals['Z0L'], intervals['Z0U']))
|
|
227
|
+
>>> print("Search bounds:", (intervals['ZL'], intervals['ZU']))
|
|
228
|
+
>>> print("Baseline location:", intervals['Z0'])
|
|
229
|
+
>>>
|
|
230
|
+
>>> # Plot extrema analysis results
|
|
231
|
+
>>> ia.plot()
|
|
232
|
+
|
|
233
|
+
Advanced usage with clustering:
|
|
234
|
+
|
|
235
|
+
>>> # Data with potential outliers
|
|
236
|
+
>>> mixed_data = np.concatenate([
|
|
237
|
+
... np.random.normal(5, 1, 50), # main cluster
|
|
238
|
+
... np.random.normal(15, 0.5, 5) # outlier cluster
|
|
239
|
+
... ])
|
|
240
|
+
>>>
|
|
241
|
+
>>> # Configure for heterogeneous data
|
|
242
|
+
>>> ia_mixed = IntervalAnalysisELDF(
|
|
243
|
+
... data=mixed_data,
|
|
244
|
+
... homogeneous=False,
|
|
245
|
+
... get_clusters=True,
|
|
246
|
+
... cluster_threshold=0.03,
|
|
247
|
+
... estimate_cluster_bounds=True,
|
|
248
|
+
... verbose=True
|
|
249
|
+
... )
|
|
250
|
+
>>> ia_mixed.fit(plot=True)
|
|
251
|
+
>>>
|
|
252
|
+
>>> # Compare extrema with and without clustering
|
|
253
|
+
>>> extrema_mixed = ia_mixed.get_intervals()
|
|
254
|
+
>>> sensitivity = extrema_mixed['Z0U'] - extrema_mixed['Z0L']
|
|
255
|
+
>>> print(f"Parameter sensitivity: {sensitivity:.4f}")
|
|
256
|
+
|
|
257
|
+
### Methods
|
|
258
|
+
|
|
259
|
+
fit(plot=False)
|
|
260
|
+
Fit the ELDF Interval Analysis model to the data and detect extrema bounds.
|
|
261
|
+
|
|
262
|
+
get_intervals(decimals=4)
|
|
263
|
+
Return dictionary containing all detected intervals and extrema bounds
|
|
264
|
+
with specified precision.
|
|
265
|
+
|
|
266
|
+
plot(figsize=(12, 8))
|
|
267
|
+
Plot the ELDF analysis results with extrema visualization and
|
|
268
|
+
interval engine output.
|
|
269
|
+
|
|
270
|
+
### Notes
|
|
271
|
+
|
|
272
|
+
- ELDF interval analysis focuses on local distribution function-based extrema detection
|
|
273
|
+
- The algorithm uses iterative search with dense zone sampling for efficiency
|
|
274
|
+
- Extrema bounds quantify parameter uncertainty under data variations
|
|
275
|
+
- Setting get_clusters=True enables robust analysis of heterogeneous data
|
|
276
|
+
- The interval engine performs bidirectional search for comprehensive coverage
|
|
277
|
+
- Convergence parameters significantly affect precision vs. computation tradeoffs
|
|
278
|
+
- For large datasets, consider increasing max_data_size or reducing n_points_per_direction
|
|
279
|
+
|
|
280
|
+
### Raises
|
|
281
|
+
|
|
282
|
+
ValueError
|
|
283
|
+
If data array is empty, contains only NaN values, or has invalid dimensions.
|
|
284
|
+
If weights array is provided but has different length than data array.
|
|
285
|
+
If numerical parameters (tolerance, convergence_threshold, etc.) are invalid.
|
|
286
|
+
If n_points_per_direction is too small for reliable extrema detection.
|
|
287
|
+
|
|
288
|
+
RuntimeError
|
|
289
|
+
If ELDF fitting fails or interval engine fails to converge.
|
|
290
|
+
If extrema detection cannot find valid bounds within the search parameters.
|
|
291
|
+
If clustering analysis fails when get_clusters=True.
|
|
292
|
+
|
|
293
|
+
OptimizationError
|
|
294
|
+
If the underlying distribution optimization encounters numerical issues during
|
|
295
|
+
extrema search or parameter estimation.
|
|
296
|
+
|
|
297
|
+
ConvergenceError
|
|
298
|
+
If the interval engine fails to converge within the specified tolerance
|
|
299
|
+
and maximum iterations. Consider adjusting convergence parameters.
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
def __init__(self,
|
|
303
|
+
data: np.ndarray,
|
|
304
|
+
DLB: float = None,
|
|
305
|
+
DUB: float = None,
|
|
306
|
+
LB: float = None,
|
|
307
|
+
UB: float = None,
|
|
308
|
+
S = 'auto',
|
|
309
|
+
varS: bool = False,
|
|
310
|
+
z0_optimize: bool = True,
|
|
311
|
+
tolerance: float = 1e-6,
|
|
312
|
+
data_form: str = 'a',
|
|
313
|
+
n_points: int = 1000,
|
|
314
|
+
homogeneous: bool = True,
|
|
315
|
+
catch: bool = True,
|
|
316
|
+
weights: np.ndarray = None,
|
|
317
|
+
wedf: bool = True,
|
|
318
|
+
opt_method: str = 'L-BFGS-B',
|
|
319
|
+
verbose: bool = False,
|
|
320
|
+
max_data_size: int = 1000,
|
|
321
|
+
flush: bool = True,
|
|
322
|
+
early_stopping_steps: int = 10,
|
|
323
|
+
cluster_threshold: float = 0.05,
|
|
324
|
+
estimate_cluster_bounds: bool = True,
|
|
325
|
+
get_clusters: bool = True,
|
|
326
|
+
n_points_per_direction: int = 1000, # intv engine specific
|
|
327
|
+
dense_zone_fraction: float = 0.4,
|
|
328
|
+
dense_points_fraction: float = 0.7,
|
|
329
|
+
convergence_window: int = 15,
|
|
330
|
+
convergence_threshold: float = 1e-7,
|
|
331
|
+
min_search_points: int = 30,
|
|
332
|
+
boundary_margin_factor: float = 0.001,
|
|
333
|
+
extrema_search_tolerance: float = 1e-6,):
|
|
334
|
+
|
|
335
|
+
super().__init__(data=data,
|
|
336
|
+
DLB=DLB,
|
|
337
|
+
DUB=DUB,
|
|
338
|
+
LB=LB,
|
|
339
|
+
UB=UB,
|
|
340
|
+
S=S,
|
|
341
|
+
varS=varS,
|
|
342
|
+
z0_optimize=z0_optimize,
|
|
343
|
+
tolerance=tolerance,
|
|
344
|
+
data_form=data_form,
|
|
345
|
+
n_points=n_points,
|
|
346
|
+
homogeneous=homogeneous,
|
|
347
|
+
catch=catch,
|
|
348
|
+
weights=weights,
|
|
349
|
+
wedf=wedf,
|
|
350
|
+
opt_method=opt_method,
|
|
351
|
+
verbose=verbose,
|
|
352
|
+
max_data_size=max_data_size,
|
|
353
|
+
flush=flush,
|
|
354
|
+
early_stopping_steps=early_stopping_steps,
|
|
355
|
+
cluster_threshold=cluster_threshold,
|
|
356
|
+
estimate_cluster_bounds=estimate_cluster_bounds,
|
|
357
|
+
get_clusters=get_clusters,
|
|
358
|
+
n_points_per_direction=n_points_per_direction,
|
|
359
|
+
dense_zone_fraction=dense_zone_fraction,
|
|
360
|
+
dense_points_fraction=dense_points_fraction,
|
|
361
|
+
convergence_window=convergence_window,
|
|
362
|
+
convergence_threshold=convergence_threshold,
|
|
363
|
+
min_search_points=min_search_points,
|
|
364
|
+
boundary_margin_factor=boundary_margin_factor,
|
|
365
|
+
extrema_search_tolerance=extrema_search_tolerance)
|
|
366
|
+
|
|
367
|
+
def fit(self, plot: bool = False):
|
|
368
|
+
"""
|
|
369
|
+
Fit the ELDF Interval Analysis model to the data and detect location parameter extrema.
|
|
370
|
+
|
|
371
|
+
This method performs the complete ELDF interval analysis workflow including:
|
|
372
|
+
- Fitting the Estimating Local Distribution Function (ELDF) to the data
|
|
373
|
+
- Detecting extrema bounds (Z0L, Z0U) for the location parameter under data variations
|
|
374
|
+
- Identifying search bounds (ZL, ZU) where extrema detection is performed
|
|
375
|
+
- Computing the baseline location parameter (Z0) from local distribution estimation
|
|
376
|
+
- Optionally performing cluster-based analysis if get_clusters=True for heterogeneous data
|
|
377
|
+
- Running the interval engine with bidirectional search for comprehensive extrema detection
|
|
378
|
+
|
|
379
|
+
The fitting process uses local distribution function optimization with convergence monitoring to ensure
|
|
380
|
+
robust extrema detection while maintaining computational efficiency through dense zone sampling.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
plot : bool, default=False
|
|
385
|
+
Whether to automatically plot the results after fitting. When True,
|
|
386
|
+
generates a comprehensive visualization showing the ELDF curve, detected
|
|
387
|
+
extrema bounds, search regions, and interval engine results. Set to False
|
|
388
|
+
for programmatic use without visualization.
|
|
389
|
+
|
|
390
|
+
Returns
|
|
391
|
+
-------
|
|
392
|
+
None
|
|
393
|
+
This method modifies the object in-place, storing all fitted parameters
|
|
394
|
+
and detected extrema as instance attributes accessible through
|
|
395
|
+
get_intervals() method.
|
|
396
|
+
|
|
397
|
+
Raises
|
|
398
|
+
------
|
|
399
|
+
ValueError
|
|
400
|
+
If the data array is empty, contains only NaN values, or has invalid format.
|
|
401
|
+
If any of the fitting parameters (tolerance, n_points_per_direction, etc.) are invalid.
|
|
402
|
+
If clustering parameters are inconsistent when get_clusters=True.
|
|
403
|
+
|
|
404
|
+
RuntimeError
|
|
405
|
+
If the ELDF fitting process fails to converge within the specified tolerance.
|
|
406
|
+
If extrema detection cannot find valid bounds due to optimization issues.
|
|
407
|
+
If the interval engine fails during bidirectional search.
|
|
408
|
+
|
|
409
|
+
ConvergenceError
|
|
410
|
+
If the extrema search algorithm fails to converge within convergence_threshold
|
|
411
|
+
and the specified search parameters.
|
|
412
|
+
|
|
413
|
+
Examples
|
|
414
|
+
--------
|
|
415
|
+
>>> import numpy as np
|
|
416
|
+
>>> from machinegnostics.magcal import IntervalAnalysisELDF
|
|
417
|
+
>>>
|
|
418
|
+
>>> # Fit with automatic plotting for visualization
|
|
419
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
420
|
+
>>> ia = IntervalAnalysisELDF(data=data, verbose=True)
|
|
421
|
+
>>> ia.fit(plot=True)
|
|
422
|
+
>>>
|
|
423
|
+
>>> # Fit without plotting for programmatic use
|
|
424
|
+
>>> ia.fit(plot=False)
|
|
425
|
+
>>> extrema = ia.get_intervals()
|
|
426
|
+
>>> print(f"Parameter extrema: [{extrema['Z0L']:.4f}, {extrema['Z0U']:.4f}]")
|
|
427
|
+
>>>
|
|
428
|
+
>>> # High-precision extrema detection
|
|
429
|
+
>>> ia_precise = IntervalAnalysisELDF(
|
|
430
|
+
... data=data,
|
|
431
|
+
... n_points_per_direction=5000,
|
|
432
|
+
... convergence_threshold=1e-9,
|
|
433
|
+
... extrema_search_tolerance=1e-8
|
|
434
|
+
... )
|
|
435
|
+
>>> ia_precise.fit()
|
|
436
|
+
|
|
437
|
+
Notes
|
|
438
|
+
-----
|
|
439
|
+
- The extrema bounds represent parameter sensitivity to hypothetical data variations
|
|
440
|
+
- Convergence is monitored using a sliding window approach for stability
|
|
441
|
+
- Dense zone sampling focuses computational resources on critical distribution regions
|
|
442
|
+
- The method must be called before accessing extrema or plotting results
|
|
443
|
+
- For heterogeneous data, clustering analysis improves extrema detection robustness
|
|
444
|
+
- Computational time scales with n_points_per_direction and convergence requirements
|
|
445
|
+
"""
|
|
446
|
+
self._fit_eldf_intv(plot=plot)
|
|
447
|
+
|
|
448
|
+
def plot(self, figsize=(12, 8)):
|
|
449
|
+
"""
|
|
450
|
+
Plot comprehensive ELDF Interval Analysis results with extrema visualization.
|
|
451
|
+
|
|
452
|
+
This method generates detailed plots showing the fitted ELDF curve, detected extrema bounds,
|
|
453
|
+
search regions, and interval engine optimization results. It provides visualization of both the
|
|
454
|
+
underlying local distribution function and the extrema detection process for thorough analysis
|
|
455
|
+
and presentation of distribution-based interval estimation results.
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
figsize : tuple, default=(12, 8)
|
|
460
|
+
Figure size as (width, height) in inches. Larger sizes provide more detail
|
|
461
|
+
for complex extrema visualization but consume more memory. Adjust based on
|
|
462
|
+
display requirements and available screen space.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
None
|
|
467
|
+
Displays the plot using matplotlib. The visualization shows:
|
|
468
|
+
- ELDF curve with local distribution function fitting
|
|
469
|
+
- Extrema bounds (Z0L, Z0U) highlighting parameter sensitivity
|
|
470
|
+
- Search bounds (ZL, ZU) showing the extrema detection region
|
|
471
|
+
- Baseline location parameter (Z0) as reference point
|
|
472
|
+
- Interval engine convergence and optimization traces
|
|
473
|
+
|
|
474
|
+
Raises
|
|
475
|
+
------
|
|
476
|
+
RuntimeError
|
|
477
|
+
If the model has not been fitted yet. Call fit() method first before plotting.
|
|
478
|
+
|
|
479
|
+
ValueError
|
|
480
|
+
If figsize contains non-positive values or invalid format.
|
|
481
|
+
|
|
482
|
+
Examples
|
|
483
|
+
--------
|
|
484
|
+
>>> import numpy as np
|
|
485
|
+
>>> from machinegnostics.magcal import IntervalAnalysisELDF
|
|
486
|
+
>>>
|
|
487
|
+
>>> # Basic extrema visualization
|
|
488
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
489
|
+
>>> ia = IntervalAnalysisELDF(data=data)
|
|
490
|
+
>>> ia.fit(plot=False)
|
|
491
|
+
>>> ia.plot()
|
|
492
|
+
>>>
|
|
493
|
+
>>> # Detailed visualization for presentation
|
|
494
|
+
>>> ia.plot(figsize=(15, 10))
|
|
495
|
+
>>>
|
|
496
|
+
>>> # Compact plot for reports
|
|
497
|
+
>>> ia.plot(figsize=(10, 6))
|
|
498
|
+
|
|
499
|
+
Notes
|
|
500
|
+
-----
|
|
501
|
+
- The plot automatically adjusts scales based on detected extrema range
|
|
502
|
+
- Extrema bounds are highlighted with distinctive colors and markers
|
|
503
|
+
- Search regions show the scope of the interval engine optimization
|
|
504
|
+
- Convergence traces help assess optimization quality and stability
|
|
505
|
+
- The baseline location parameter provides reference for extrema interpretation
|
|
506
|
+
- Large datasets may show subsampled data points for clarity
|
|
507
|
+
"""
|
|
508
|
+
self._plot_eldf_intv(figsize=figsize)
|
|
509
|
+
|
|
510
|
+
def get_intervals(self, decimals: int = 4):
|
|
511
|
+
"""
|
|
512
|
+
Retrieve all detected intervals and extrema bounds from the fitted ELDF Interval Analysis model.
|
|
513
|
+
|
|
514
|
+
This method returns a comprehensive dictionary containing all extrema bounds and intervals identified
|
|
515
|
+
during the fitting process, including location parameter extrema, search bounds, baseline estimates,
|
|
516
|
+
and optionally cluster-based bounds for heterogeneous data analysis.
|
|
517
|
+
|
|
518
|
+
Parameters
|
|
519
|
+
----------
|
|
520
|
+
decimals : int, default=4
|
|
521
|
+
Number of decimal places to round the interval boundaries and extrema bounds.
|
|
522
|
+
Higher values provide more precision but may include numerical noise from
|
|
523
|
+
optimization processes. Must be non-negative. Typical range is 2-8 depending
|
|
524
|
+
on data scale and precision requirements for local distribution-based estimates.
|
|
525
|
+
|
|
526
|
+
Returns
|
|
527
|
+
-------
|
|
528
|
+
dict
|
|
529
|
+
Dictionary containing detected extrema and intervals with the following keys:
|
|
530
|
+
|
|
531
|
+
- 'Z0' : float
|
|
532
|
+
Baseline location parameter from local distribution function estimation
|
|
533
|
+
- 'Z0L' : float
|
|
534
|
+
Lower extrema bound for location parameter under data variations
|
|
535
|
+
- 'Z0U' : float
|
|
536
|
+
Upper extrema bound for location parameter under data variations
|
|
537
|
+
- 'ZL' : float
|
|
538
|
+
Lower search bound where extrema detection was performed
|
|
539
|
+
- 'ZU' : float
|
|
540
|
+
Upper search bound where extrema detection was performed
|
|
541
|
+
- 'tolerance_interval' : tuple
|
|
542
|
+
(Z0L, Z0U) - Parameter extrema bounds as interval
|
|
543
|
+
- 'search_bounds' : tuple
|
|
544
|
+
(ZL, ZU) - Bounds where extrema search was conducted
|
|
545
|
+
- 'parameter_sensitivity' : float
|
|
546
|
+
Width of extrema interval (Z0U - Z0L) indicating parameter robustness
|
|
547
|
+
- 'data_bounds' : tuple
|
|
548
|
+
(DLB, DUB) - Theoretical data bounds used in analysis
|
|
549
|
+
- 'probable_bounds' : tuple
|
|
550
|
+
(LB, UB) - Practical probable bounds from ELDF fitting
|
|
551
|
+
- 'main_cluster' : np.ndarray, optional
|
|
552
|
+
Main cluster data points if get_clusters=True was used
|
|
553
|
+
|
|
554
|
+
Raises
|
|
555
|
+
------
|
|
556
|
+
RuntimeError
|
|
557
|
+
If the model has not been fitted yet. Call fit() method first.
|
|
558
|
+
|
|
559
|
+
ValueError
|
|
560
|
+
If decimals parameter is negative or not an integer.
|
|
561
|
+
|
|
562
|
+
Examples
|
|
563
|
+
--------
|
|
564
|
+
>>> import numpy as np
|
|
565
|
+
>>> from machinegnostics.magcal import IntervalAnalysisELDF
|
|
566
|
+
>>>
|
|
567
|
+
>>> # Basic extrema retrieval
|
|
568
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
569
|
+
>>> ia = IntervalAnalysisELDF(data=data)
|
|
570
|
+
>>> ia.fit(plot=False)
|
|
571
|
+
>>> intervals = ia.get_intervals(decimals=4)
|
|
572
|
+
>>>
|
|
573
|
+
>>> # Access core extrema values
|
|
574
|
+
>>> print("Baseline location:", intervals['Z0'])
|
|
575
|
+
>>> print("Extrema bounds:", (intervals['Z0L'], intervals['Z0U']))
|
|
576
|
+
>>> print("Search region:", (intervals['ZL'], intervals['ZU']))
|
|
577
|
+
>>>
|
|
578
|
+
>>> # Analyze parameter sensitivity
|
|
579
|
+
>>> sensitivity = intervals['Z0U'] - intervals['Z0L']
|
|
580
|
+
>>> print(f"Parameter sensitivity: {sensitivity:.4f}")
|
|
581
|
+
>>>
|
|
582
|
+
>>> # Compare with baseline uncertainty
|
|
583
|
+
>>> baseline_position = (intervals['Z0'] - intervals['ZL']) / (intervals['ZU'] - intervals['ZL'])
|
|
584
|
+
>>> print(f"Baseline position in search region: {baseline_position:.3f}")
|
|
585
|
+
>>>
|
|
586
|
+
>>> # High precision for detailed analysis
|
|
587
|
+
>>> precise_intervals = ia.get_intervals(decimals=6)
|
|
588
|
+
>>> extrema_width = precise_intervals['Z0U'] - precise_intervals['Z0L']
|
|
589
|
+
>>> search_width = precise_intervals['ZU'] - precise_intervals['ZL']
|
|
590
|
+
>>> coverage_ratio = extrema_width / search_width
|
|
591
|
+
>>> print(f"Extrema coverage ratio: {coverage_ratio:.4f}")
|
|
592
|
+
>>>
|
|
593
|
+
>>> # Access interval engine specific results
|
|
594
|
+
>>> if hasattr(ia, 'intv'):
|
|
595
|
+
... engine_results = ia.intv.get_intervals(decimals=decimals)
|
|
596
|
+
... print("Interval engine results:", engine_results)
|
|
597
|
+
|
|
598
|
+
Notes
|
|
599
|
+
-----
|
|
600
|
+
- Extrema bounds (Z0L, Z0U) quantify location parameter sensitivity to data changes
|
|
601
|
+
- Search bounds (ZL, ZU) define the region where extrema detection was performed
|
|
602
|
+
- The baseline location parameter (Z0) serves as the local distribution function estimate
|
|
603
|
+
- Parameter sensitivity (Z0U - Z0L) indicates robustness of the location estimate
|
|
604
|
+
- All extrema are computed through local distribution function optimization processes
|
|
605
|
+
- For heterogeneous data, main cluster information may be included in results
|
|
606
|
+
- Precision depends on convergence parameters used during fitting
|
|
607
|
+
- Missing bounds (when estimation fails) return None values
|
|
608
|
+
"""
|
|
609
|
+
return self.intv.get_intervals(decimals=decimals)
|