machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Marginal Analysis ELDF
|
|
3
|
+
|
|
4
|
+
Author: Nirmal Parmar
|
|
5
|
+
Machine Gnostics
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from machinegnostics.magcal.gdf.base_el_ma import BaseMarginalAnalysisELDF
|
|
10
|
+
|
|
11
|
+
class MarginalAnalysisELDF(BaseMarginalAnalysisELDF):
|
|
12
|
+
"""
|
|
13
|
+
Marginal Analysis for Estimating Local Distribution Function (ELDF) with advanced clustering capabilities.
|
|
14
|
+
|
|
15
|
+
This class performs comprehensive marginal cluster analysis on data samples to identify critical boundaries
|
|
16
|
+
and intervals that characterize the underlying local data distribution. It provides a complete toolkit
|
|
17
|
+
for local density analysis, boundary detection, and cluster identification with intuitive methods
|
|
18
|
+
for fitting and visualization.
|
|
19
|
+
|
|
20
|
+
### Key Features:
|
|
21
|
+
|
|
22
|
+
**Local Boundary Analysis:**
|
|
23
|
+
|
|
24
|
+
1. **LB and UB (Lower/Upper Bounds)**: Statistical boundaries of the local distribution optimized during fitting.
|
|
25
|
+
These define the effective support where significant local probability density exists.
|
|
26
|
+
|
|
27
|
+
2. **DLB and DUB (Data Lower/Upper Bounds)**: Actual minimum and maximum values in the data sample.
|
|
28
|
+
These represent the observed range and serve as hard constraints for local analysis.
|
|
29
|
+
|
|
30
|
+
3. **CLB and CUB (Cluster Lower/Upper Bounds)**: Boundaries of the main local data cluster.
|
|
31
|
+
Critical for ELDF analysis as they capture local concentration patterns and identify
|
|
32
|
+
the primary region of interest in the local distribution.
|
|
33
|
+
|
|
34
|
+
4. **Z0 (Mode)**: Point where local PDF reaches its global maximum. This is the most important
|
|
35
|
+
parameter for local distribution analysis, representing the peak of local probability density.
|
|
36
|
+
|
|
37
|
+
**ELDF-Specific Characteristics:**
|
|
38
|
+
- **Local density focus**: All analysis is based on local probability density rather than cumulative probability
|
|
39
|
+
- **No sample bounds**: Unlike EGDF, ELDF doesn't compute LSB/USB as they're not relevant for local analysis
|
|
40
|
+
- **Enhanced clustering**: CLB/CUB bounds are primary outputs for understanding local data structure
|
|
41
|
+
- **Mode-centric analysis**: Z0 is the central parameter for local distribution characterization
|
|
42
|
+
|
|
43
|
+
### Primary Use Cases:
|
|
44
|
+
|
|
45
|
+
- **Local Quality Control**: Setting control limits based on local process characteristics
|
|
46
|
+
- **Anomaly Detection**: Identifying outliers relative to local data patterns
|
|
47
|
+
- **Peak Analysis**: Understanding modes and local maxima in data distributions
|
|
48
|
+
- **Density-Based Clustering**: Segmenting data based on local concentration patterns
|
|
49
|
+
- **Process Monitoring**: Real-time assessment of local process behavior
|
|
50
|
+
- **Hotspot Analysis**: Detecting regions of high activity or concentration
|
|
51
|
+
|
|
52
|
+
### Parameters:
|
|
53
|
+
|
|
54
|
+
data : np.ndarray
|
|
55
|
+
Input data array for local marginal analysis. Must be a 1D numpy array with numerical values.
|
|
56
|
+
Empty arrays or arrays containing only NaN values will raise an error.
|
|
57
|
+
|
|
58
|
+
early_stopping_steps : int, default=10
|
|
59
|
+
Number of consecutive optimization steps without improvement before stopping.
|
|
60
|
+
Higher values allow more thorough optimization but increase computation time.
|
|
61
|
+
Must be a positive integer.
|
|
62
|
+
|
|
63
|
+
cluster_threshold : float, default=0.05
|
|
64
|
+
Threshold for cluster detection as fraction of maximum PDF value.
|
|
65
|
+
Lower values (0.01-0.02) detect more subtle clusters; higher values (0.1-0.2)
|
|
66
|
+
focus on prominent clusters. Critical parameter for CLB/CUB estimation.
|
|
67
|
+
|
|
68
|
+
get_clusters : bool, default=True
|
|
69
|
+
Whether to perform cluster analysis and compute CLB/CUB bounds.
|
|
70
|
+
Highly recommended for ELDF analysis. Set to False only for basic local fitting.
|
|
71
|
+
|
|
72
|
+
DLB : float, optional
|
|
73
|
+
Data Lower Bound override. If None, inferred from data minimum.
|
|
74
|
+
Use to set theoretical minimum for the local distribution.
|
|
75
|
+
|
|
76
|
+
DUB : float, optional
|
|
77
|
+
Data Upper Bound override. If None, inferred from data maximum.
|
|
78
|
+
Use to set theoretical maximum for the local distribution.
|
|
79
|
+
|
|
80
|
+
LB : float, optional
|
|
81
|
+
Lower Probable Bound override for local distribution support.
|
|
82
|
+
Defines the practical lower limit where significant local density exists.
|
|
83
|
+
|
|
84
|
+
UB : float, optional
|
|
85
|
+
Upper Probable Bound override for local distribution support.
|
|
86
|
+
Defines the practical upper limit where significant local density exists.
|
|
87
|
+
|
|
88
|
+
S : float or 'auto', default='auto'
|
|
89
|
+
Scale parameter for local distribution. When 'auto', estimated from data.
|
|
90
|
+
When float, used as fixed scale. Critical for local density estimation quality.
|
|
91
|
+
|
|
92
|
+
varS : bool, default=False
|
|
93
|
+
Whether to allow variable scale (S) during optimization.
|
|
94
|
+
If True, S is optimized; if False, S remains fixed. Need to keep S == 'auto' if varS is True.
|
|
95
|
+
|
|
96
|
+
z0_optimize : bool, default=True
|
|
97
|
+
Whether to use advanced optimization for Z0 (mode) estimation.
|
|
98
|
+
Provides sub-point precision for mode location, important for local analysis.
|
|
99
|
+
|
|
100
|
+
tolerance : float, default=1e-6
|
|
101
|
+
Numerical tolerance for optimization convergence. Smaller values provide
|
|
102
|
+
higher precision but may require more computation time.
|
|
103
|
+
|
|
104
|
+
data_form : str, default='a'
|
|
105
|
+
Data processing form:
|
|
106
|
+
- 'a': Additive (linear) - standard for most local analysis
|
|
107
|
+
- 'm': Multiplicative (log-transformed) - for multiplicative processes
|
|
108
|
+
|
|
109
|
+
n_points : int, default=1000
|
|
110
|
+
Number of points for smooth curve generation. Higher values provide
|
|
111
|
+
smoother visualizations but require more computation. Must be positive.
|
|
112
|
+
|
|
113
|
+
homogeneous : bool, default=True
|
|
114
|
+
Whether to assume data homogeneity. Affects optimization strategy.
|
|
115
|
+
Set to False for data with multiple distinct local patterns.
|
|
116
|
+
|
|
117
|
+
catch : bool, default=True
|
|
118
|
+
Whether to store detailed results and enable plotting capabilities.
|
|
119
|
+
Must be True for accessing .params, .plot(), and detailed analysis results.
|
|
120
|
+
|
|
121
|
+
weights : np.ndarray, optional
|
|
122
|
+
Sample weights for weighted local analysis. Must match data length.
|
|
123
|
+
Use to emphasize specific regions in local density estimation.
|
|
124
|
+
|
|
125
|
+
wedf : bool, default=True
|
|
126
|
+
Whether to compute Weighted Empirical Distribution Function alongside ELDF.
|
|
127
|
+
Enhances local analysis when weights are provided.
|
|
128
|
+
|
|
129
|
+
opt_method : str, default='L-BFGS-B'
|
|
130
|
+
Optimization algorithm for parameter estimation. Default works well for
|
|
131
|
+
most local optimization problems. Other options: 'TNC', 'Powell', 'SLSQP'.
|
|
132
|
+
|
|
133
|
+
verbose : bool, default=False
|
|
134
|
+
Whether to print detailed progress information during fitting.
|
|
135
|
+
Useful for diagnostics and understanding optimization behavior.
|
|
136
|
+
|
|
137
|
+
max_data_size : int, default=1000
|
|
138
|
+
Safety limit for data size to prevent memory issues during processing.
|
|
139
|
+
|
|
140
|
+
flush : bool, default=True
|
|
141
|
+
Whether to flush output streams for real-time progress display.
|
|
142
|
+
|
|
143
|
+
### Attributes (Available After fit()):
|
|
144
|
+
|
|
145
|
+
CLB : float
|
|
146
|
+
Cluster Lower Bound - lower boundary of the main local cluster
|
|
147
|
+
|
|
148
|
+
CUB : float
|
|
149
|
+
Cluster Upper Bound - upper boundary of the main local cluster
|
|
150
|
+
|
|
151
|
+
z0 : float
|
|
152
|
+
Mode of the local distribution (point of maximum PDF)
|
|
153
|
+
|
|
154
|
+
main_cluster : np.ndarray
|
|
155
|
+
Data points in the main cluster (between CLB and CUB)
|
|
156
|
+
|
|
157
|
+
lower_cluster : np.ndarray
|
|
158
|
+
Data points below the main cluster (< CLB)
|
|
159
|
+
|
|
160
|
+
upper_cluster : np.ndarray
|
|
161
|
+
Data points above the main cluster (> CUB)
|
|
162
|
+
|
|
163
|
+
is_homogeneous : bool
|
|
164
|
+
Whether the data was determined to be homogeneous
|
|
165
|
+
|
|
166
|
+
params : dict
|
|
167
|
+
Complete dictionary of all computed parameters and results (when catch=True)
|
|
168
|
+
|
|
169
|
+
init_eldf : ELDF
|
|
170
|
+
The underlying fitted ELDF object with detailed local distribution information
|
|
171
|
+
|
|
172
|
+
### Examples:
|
|
173
|
+
|
|
174
|
+
**Basic Local Analysis:**
|
|
175
|
+
|
|
176
|
+
>>> import numpy as np
|
|
177
|
+
>>> from machinegnostics.magcal import MarginalAnalysisELDF
|
|
178
|
+
>>>
|
|
179
|
+
>>> # Data with local concentration patterns
|
|
180
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
181
|
+
>>>
|
|
182
|
+
>>> # Perform marginal analysis
|
|
183
|
+
>>> ma = MarginalAnalysisELDF(data=data, verbose=True)
|
|
184
|
+
>>> ma.fit()
|
|
185
|
+
>>>
|
|
186
|
+
>>> # Access key results
|
|
187
|
+
>>> print(f"Local mode (Z0): {ma.z0:.3f}")
|
|
188
|
+
>>> print(f"Main cluster: [{ma.CLB:.3f}, {ma.CUB:.3f}]")
|
|
189
|
+
>>> print(f"Cluster size: {len(ma.main_cluster)} points")
|
|
190
|
+
>>> print(f"Data homogeneous: {ma.is_homogeneous}")
|
|
191
|
+
|
|
192
|
+
**Advanced Clustering Analysis:**
|
|
193
|
+
|
|
194
|
+
>>> # Sensitive cluster detection with visualization
|
|
195
|
+
>>> ma = MarginalAnalysisELDF(
|
|
196
|
+
... data=data,
|
|
197
|
+
... cluster_threshold=0.02, # Detect subtle clusters
|
|
198
|
+
... z0_optimize=True, # Precise mode location
|
|
199
|
+
... n_points=2000, # Smooth curves
|
|
200
|
+
... verbose=True
|
|
201
|
+
... )
|
|
202
|
+
>>>
|
|
203
|
+
>>> # Fit and visualize
|
|
204
|
+
>>> ma.fit(plot=True)
|
|
205
|
+
>>>
|
|
206
|
+
>>> # Detailed cluster analysis
|
|
207
|
+
>>> print(f"Lower cluster: {len(ma.lower_cluster)} points")
|
|
208
|
+
>>> print(f"Main cluster: {len(ma.main_cluster)} points")
|
|
209
|
+
>>> print(f"Upper cluster: {len(ma.upper_cluster)} points")
|
|
210
|
+
>>>
|
|
211
|
+
>>> # Additional visualizations
|
|
212
|
+
>>> ma.plot(plot_type='both', bounds=True) # ELDF + PDF
|
|
213
|
+
>>> ma.plot(derivatives=True) # Derivative analysis
|
|
214
|
+
|
|
215
|
+
**Quality Control Application:**
|
|
216
|
+
|
|
217
|
+
>>> # Process monitoring with specification limits
|
|
218
|
+
>>> ma = MarginalAnalysisELDF(
|
|
219
|
+
... data=process_measurements,
|
|
220
|
+
... LB=lower_spec_limit,
|
|
221
|
+
... UB=upper_spec_limit,
|
|
222
|
+
... tolerance=1e-8, # High precision
|
|
223
|
+
... verbose=True
|
|
224
|
+
... )
|
|
225
|
+
>>> ma.fit()
|
|
226
|
+
>>>
|
|
227
|
+
>>> # Check process capability
|
|
228
|
+
>>> process_capable = (ma.CLB >= ma.init_eldf.LB and
|
|
229
|
+
... ma.CUB <= ma.init_eldf.UB)
|
|
230
|
+
>>> print(f"Process within specs: {process_capable}")
|
|
231
|
+
>>>
|
|
232
|
+
>>> # Monitor process centering
|
|
233
|
+
>>> target_center = (ma.init_eldf.LB + ma.init_eldf.UB) / 2
|
|
234
|
+
>>> centering_error = abs(ma.z0 - target_center)
|
|
235
|
+
>>> print(f"Process centering error: {centering_error:.4f}")
|
|
236
|
+
|
|
237
|
+
### Methods:
|
|
238
|
+
|
|
239
|
+
fit(plot=False)
|
|
240
|
+
Fit the ELDF marginal analysis model to the data.
|
|
241
|
+
|
|
242
|
+
Parameters:
|
|
243
|
+
- plot (bool): Whether to display visualization after fitting
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
- None (sets all analysis attributes)
|
|
247
|
+
|
|
248
|
+
plot(plot_type='marginal', plot_smooth=True, bounds=True, derivatives=False, figsize=(12, 8))
|
|
249
|
+
Generate comprehensive visualizations of the local analysis results.
|
|
250
|
+
|
|
251
|
+
Parameters:
|
|
252
|
+
- plot_type (str): 'marginal', 'eldf', 'pdf', or 'both'
|
|
253
|
+
- plot_smooth (bool): Whether to show smooth interpolated curves
|
|
254
|
+
- bounds (bool): Whether to display boundary lines (LB, UB, CLB, CUB, etc.)
|
|
255
|
+
- derivatives (bool): Whether to show derivative analysis plots
|
|
256
|
+
- figsize (tuple): Figure dimensions (width, height)
|
|
257
|
+
|
|
258
|
+
### Notes:
|
|
259
|
+
|
|
260
|
+
- **Always call fit() before accessing analysis results or plotting**
|
|
261
|
+
- **Set catch=True (default) to enable result storage and plotting**
|
|
262
|
+
- **CLB/CUB bounds are the primary outputs for local cluster analysis**
|
|
263
|
+
- **Z0 represents the local mode and is the most critical single parameter**
|
|
264
|
+
- **cluster_threshold significantly affects clustering sensitivity**
|
|
265
|
+
- **For heterogeneous data, set homogeneous=False**
|
|
266
|
+
- **Large n_points values provide smoother visualizations but use more memory**
|
|
267
|
+
- **Weighted analysis (wedf=True) enhances results when weights are meaningful**
|
|
268
|
+
|
|
269
|
+
### Raises:
|
|
270
|
+
|
|
271
|
+
ValueError
|
|
272
|
+
- Empty or invalid data array
|
|
273
|
+
- Invalid parameter values (negative tolerances, invalid bounds, etc.)
|
|
274
|
+
- Mismatched weights array length
|
|
275
|
+
- Invalid plot_type or other method parameters
|
|
276
|
+
|
|
277
|
+
RuntimeError
|
|
278
|
+
- ELDF fitting fails to converge
|
|
279
|
+
- Cluster boundary estimation fails
|
|
280
|
+
- Plotting attempted before fitting or with catch=False
|
|
281
|
+
|
|
282
|
+
OptimizationError
|
|
283
|
+
- Underlying optimization algorithm fails
|
|
284
|
+
- Numerical issues in local density estimation
|
|
285
|
+
|
|
286
|
+
### See Also:
|
|
287
|
+
|
|
288
|
+
ELDF : Core Estimating Local Distribution Function class
|
|
289
|
+
DataHomogeneity : Homogeneity testing and cluster boundary estimation
|
|
290
|
+
MarginalAnalysisEGDF : Equivalent analysis for global cumulative distributions
|
|
291
|
+
"""
|
|
292
|
+
def __init__(self,
|
|
293
|
+
data: np.ndarray,
|
|
294
|
+
early_stopping_steps: int = 10,
|
|
295
|
+
cluster_threshold: float = 0.05,
|
|
296
|
+
get_clusters: bool = True,
|
|
297
|
+
DLB: float = None,
|
|
298
|
+
DUB: float = None,
|
|
299
|
+
LB: float = None,
|
|
300
|
+
UB: float = None,
|
|
301
|
+
S = 'auto',
|
|
302
|
+
varS: bool = False,
|
|
303
|
+
z0_optimize: bool = True,
|
|
304
|
+
tolerance: float = 1e-6,
|
|
305
|
+
data_form: str = 'a',
|
|
306
|
+
n_points: int = 1000,
|
|
307
|
+
homogeneous: bool = True,
|
|
308
|
+
catch: bool = True,
|
|
309
|
+
weights: np.ndarray = None,
|
|
310
|
+
wedf: bool = True,
|
|
311
|
+
opt_method: str = 'L-BFGS-B',
|
|
312
|
+
verbose: bool = False,
|
|
313
|
+
max_data_size: int = 1000,
|
|
314
|
+
flush: bool = True):
|
|
315
|
+
super().__init__(data=data,
|
|
316
|
+
early_stopping_steps=early_stopping_steps,
|
|
317
|
+
cluster_threshold=cluster_threshold,
|
|
318
|
+
get_clusters=get_clusters,
|
|
319
|
+
DLB=DLB,
|
|
320
|
+
DUB=DUB,
|
|
321
|
+
LB=LB,
|
|
322
|
+
UB=UB,
|
|
323
|
+
S=S,
|
|
324
|
+
varS=varS,
|
|
325
|
+
z0_optimize=z0_optimize,
|
|
326
|
+
tolerance=tolerance,
|
|
327
|
+
data_form=data_form,
|
|
328
|
+
n_points=n_points,
|
|
329
|
+
homogeneous=homogeneous,
|
|
330
|
+
catch=catch,
|
|
331
|
+
weights=weights,
|
|
332
|
+
wedf=wedf,
|
|
333
|
+
opt_method=opt_method,
|
|
334
|
+
verbose=verbose,
|
|
335
|
+
max_data_size=max_data_size,
|
|
336
|
+
flush=flush)
|
|
337
|
+
|
|
338
|
+
def fit(self, plot: bool = False):
|
|
339
|
+
"""
|
|
340
|
+
Fit the ELDF marginal analysis model to the data and perform comprehensive cluster analysis.
|
|
341
|
+
|
|
342
|
+
This method performs the complete local marginal analysis workflow including:
|
|
343
|
+
1. ELDF model fitting with parameter optimization
|
|
344
|
+
2. Local boundary estimation (LB, UB, DLB, DUB)
|
|
345
|
+
3. Homogeneity testing and cluster detection
|
|
346
|
+
4. Cluster boundary estimation (CLB, CUB) when data is heterogeneous
|
|
347
|
+
5. Z0 (mode) identification with optional sub-point precision
|
|
348
|
+
6. Main cluster extraction and outlier identification
|
|
349
|
+
|
|
350
|
+
After successful fitting, all analysis results become available through instance attributes
|
|
351
|
+
(CLB, CUB, z0, main_cluster, etc.) and the params dictionary when catch=True.
|
|
352
|
+
|
|
353
|
+
Parameters:
|
|
354
|
+
-----------
|
|
355
|
+
plot : bool, default=False
|
|
356
|
+
Whether to automatically display the marginal analysis plot after fitting.
|
|
357
|
+
When True, shows ELDF and PDF curves with all detected boundaries and clusters.
|
|
358
|
+
Equivalent to calling fit() followed by plot().
|
|
359
|
+
|
|
360
|
+
Note: Plotting requires catch=True (default) during initialization.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
--------
|
|
364
|
+
None
|
|
365
|
+
This method modifies the instance in-place, setting all analysis attributes.
|
|
366
|
+
Use the instance attributes or params dictionary to access results.
|
|
367
|
+
|
|
368
|
+
Raises:
|
|
369
|
+
-------
|
|
370
|
+
RuntimeError
|
|
371
|
+
If ELDF optimization fails to converge within the specified tolerance and iterations.
|
|
372
|
+
If cluster boundary estimation fails (when get_clusters=True).
|
|
373
|
+
If Z0 optimization fails to locate the local mode.
|
|
374
|
+
If plotting is requested but catch=False was set during initialization.
|
|
375
|
+
|
|
376
|
+
ValueError
|
|
377
|
+
If the input data is invalid (empty, all NaN, wrong dimensions).
|
|
378
|
+
If any parameter bounds are inconsistent (e.g., DLB > DUB, LB > UB).
|
|
379
|
+
If weights array doesn't match data length.
|
|
380
|
+
|
|
381
|
+
OptimizationError
|
|
382
|
+
If the underlying optimization algorithm encounters numerical issues.
|
|
383
|
+
If local density estimation fails due to poor data conditioning.
|
|
384
|
+
|
|
385
|
+
Examples:
|
|
386
|
+
---------
|
|
387
|
+
**Basic Fitting:**
|
|
388
|
+
|
|
389
|
+
>>> import numpy as np
|
|
390
|
+
>>> from machinegnostics.magcal import MarginalAnalysisELDF
|
|
391
|
+
>>>
|
|
392
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
393
|
+
>>> ma = MarginalAnalysisELDF(data=data, verbose=True)
|
|
394
|
+
>>>
|
|
395
|
+
>>> # Fit without plotting
|
|
396
|
+
>>> ma.fit()
|
|
397
|
+
>>>
|
|
398
|
+
>>> # Access results
|
|
399
|
+
>>> print(f"Mode (Z0): {ma.z0:.3f}")
|
|
400
|
+
>>> print(f"Cluster bounds: [{ma.CLB:.3f}, {ma.CUB:.3f}]")
|
|
401
|
+
>>> print(f"Main cluster size: {len(ma.main_cluster)}")
|
|
402
|
+
>>> print(f"Homogeneous: {ma.is_homogeneous}")
|
|
403
|
+
|
|
404
|
+
**Fitting with Automatic Visualization:**
|
|
405
|
+
|
|
406
|
+
>>> # Fit and immediately see results
|
|
407
|
+
>>> ma = MarginalAnalysisELDF(data=data, cluster_threshold=0.02, verbose=True)
|
|
408
|
+
>>> ma.fit(plot=True) # Shows plot automatically
|
|
409
|
+
>>>
|
|
410
|
+
>>> # Results are immediately available
|
|
411
|
+
>>> print(f"Analysis complete. Found {len(ma.main_cluster)} points in main cluster.")
|
|
412
|
+
|
|
413
|
+
**Error Handling:**
|
|
414
|
+
|
|
415
|
+
>>> try:
|
|
416
|
+
... # Attempt fitting with potentially problematic data
|
|
417
|
+
... problematic_data = np.array([np.nan, np.nan, 1, 2])
|
|
418
|
+
... ma = MarginalAnalysisELDF(data=problematic_data)
|
|
419
|
+
... ma.fit()
|
|
420
|
+
... except ValueError as e:
|
|
421
|
+
... print(f"Data validation error: {e}")
|
|
422
|
+
... except RuntimeError as e:
|
|
423
|
+
... print(f"Fitting failed: {e}")
|
|
424
|
+
|
|
425
|
+
**Batch Processing Workflow:**
|
|
426
|
+
|
|
427
|
+
>>> datasets = [data1, data2, data3]
|
|
428
|
+
>>> results = []
|
|
429
|
+
>>>
|
|
430
|
+
>>> for i, dataset in enumerate(datasets):
|
|
431
|
+
... ma = MarginalAnalysisELDF(data=dataset, verbose=False)
|
|
432
|
+
...
|
|
433
|
+
... try:
|
|
434
|
+
... ma.fit()
|
|
435
|
+
... results.append({
|
|
436
|
+
... 'dataset_id': i,
|
|
437
|
+
... 'z0': ma.z0,
|
|
438
|
+
... 'cluster_bounds': (ma.CLB, ma.CUB),
|
|
439
|
+
... 'main_cluster_size': len(ma.main_cluster),
|
|
440
|
+
... 'homogeneous': ma.is_homogeneous
|
|
441
|
+
... })
|
|
442
|
+
... except Exception as e:
|
|
443
|
+
... print(f"Dataset {i} failed: {e}")
|
|
444
|
+
|
|
445
|
+
Notes:
|
|
446
|
+
------
|
|
447
|
+
- **Must be called before accessing any analysis results**
|
|
448
|
+
- **Sets _fitted flag to True upon successful completion**
|
|
449
|
+
- **All instance attributes (CLB, CUB, z0, etc.) are populated during fitting**
|
|
450
|
+
- **params dictionary is updated with complete results when catch=True**
|
|
451
|
+
- **Subsequent calls to fit() will re-run the entire analysis**
|
|
452
|
+
- **For large datasets, consider setting verbose=True to monitor progress**
|
|
453
|
+
- **cluster_threshold parameter significantly affects CLB/CUB detection sensitivity**
|
|
454
|
+
|
|
455
|
+
See Also:
|
|
456
|
+
---------
|
|
457
|
+
plot() : Generate detailed visualizations of fitting results
|
|
458
|
+
"""
|
|
459
|
+
# Fit the model to the data
|
|
460
|
+
self._fit_eldf(plot=plot)
|
|
461
|
+
|
|
462
|
+
def plot(self,
|
|
463
|
+
plot_type: str = 'marginal',
|
|
464
|
+
plot_smooth: bool = True,
|
|
465
|
+
bounds: bool = True,
|
|
466
|
+
figsize: tuple = (12, 8)):
|
|
467
|
+
"""
|
|
468
|
+
Generate comprehensive visualizations of the ELDF marginal analysis results.
|
|
469
|
+
|
|
470
|
+
This method creates professional-quality plots showing the local distribution analysis
|
|
471
|
+
results including ELDF curves, PDF curves, all detected boundaries, cluster bounds,
|
|
472
|
+
and marginal points. Multiple plot types are available for different analysis needs.
|
|
473
|
+
|
|
474
|
+
The visualization uses a dual y-axis approach with ELDF on the primary axis (blue)
|
|
475
|
+
and PDF on the secondary axis (red), with all boundaries and marginal points clearly
|
|
476
|
+
labeled and color-coded for easy interpretation.
|
|
477
|
+
|
|
478
|
+
Parameters:
|
|
479
|
+
-----------
|
|
480
|
+
plot_type : str, default='marginal'
|
|
481
|
+
Type of visualization to generate:
|
|
482
|
+
|
|
483
|
+
- 'marginal': Complete marginal analysis view showing both ELDF and PDF with
|
|
484
|
+
all boundaries, cluster bounds, and marginal points. Recommended for most users.
|
|
485
|
+
- 'eldf': Focus on ELDF curve only with boundaries and marginal points.
|
|
486
|
+
Useful for understanding local distribution characteristics.
|
|
487
|
+
- 'pdf': Focus on PDF curve only with boundaries and marginal points.
|
|
488
|
+
Useful for density analysis and peak detection.
|
|
489
|
+
- 'both': Equivalent to 'marginal' - shows both ELDF and PDF curves.
|
|
490
|
+
|
|
491
|
+
plot_smooth : bool, default=True
|
|
492
|
+
Whether to display smooth interpolated curves alongside discrete points.
|
|
493
|
+
|
|
494
|
+
- True: Shows both discrete points (circles/squares) and smooth curves
|
|
495
|
+
for professional visualization. Uses n_points resolution for smoothness.
|
|
496
|
+
- False: Shows only discrete points connected by lines. Faster rendering
|
|
497
|
+
but less smooth appearance.
|
|
498
|
+
|
|
499
|
+
bounds : bool, default=True
|
|
500
|
+
Whether to display boundary lines and shaded regions.
|
|
501
|
+
|
|
502
|
+
- True: Shows all detected boundaries (DLB, DUB, LB, UB, CLB, CUB) as
|
|
503
|
+
vertical lines with labels, plus shaded regions for outlier areas.
|
|
504
|
+
- False: Shows only the ELDF/PDF curves and Z0 marginal point.
|
|
505
|
+
Clean view focusing on the distribution shape.
|
|
506
|
+
|
|
507
|
+
figsize : tuple, default=(12, 8)
|
|
508
|
+
Figure dimensions as (width, height) in inches.
|
|
509
|
+
Larger sizes provide more detail but use more screen space.
|
|
510
|
+
Recommended range: (8, 6) to (16, 12).
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
--------
|
|
514
|
+
None
|
|
515
|
+
Displays the plot using matplotlib. No return value.
|
|
516
|
+
|
|
517
|
+
Raises:
|
|
518
|
+
-------
|
|
519
|
+
RuntimeError
|
|
520
|
+
If fit() has not been called yet (no analysis results available).
|
|
521
|
+
If catch=False was set during initialization (plotting disabled).
|
|
522
|
+
If the underlying ELDF object is not properly fitted.
|
|
523
|
+
|
|
524
|
+
ValueError
|
|
525
|
+
If plot_type is not one of the valid options.
|
|
526
|
+
If figsize is not a tuple of two positive numbers.
|
|
527
|
+
If derivatives=True but derivative calculation fails.
|
|
528
|
+
|
|
529
|
+
AttributeError
|
|
530
|
+
If required plotting data is missing from the fitted ELDF object.
|
|
531
|
+
|
|
532
|
+
Examples:
|
|
533
|
+
---------
|
|
534
|
+
**Basic Marginal Analysis Plot:**
|
|
535
|
+
|
|
536
|
+
>>> import numpy as np
|
|
537
|
+
>>> from machinegnostics.magcal import MarginalAnalysisELDF
|
|
538
|
+
>>>
|
|
539
|
+
>>> data = np.array([-10,-9,-8,-0.2,-0.1,0,0.1,0.2,8,9,10])
|
|
540
|
+
>>> ma = MarginalAnalysisELDF(data=data, verbose=True)
|
|
541
|
+
>>> ma.fit()
|
|
542
|
+
>>>
|
|
543
|
+
>>> # Standard marginal analysis visualization
|
|
544
|
+
>>> ma.plot() # Shows ELDF + PDF with all boundaries
|
|
545
|
+
|
|
546
|
+
**Customized Visualizations:**
|
|
547
|
+
|
|
548
|
+
>>> # Focus on PDF for density analysis
|
|
549
|
+
>>> ma.plot(plot_type='pdf', bounds=True, figsize=(10, 6))
|
|
550
|
+
>>>
|
|
551
|
+
>>> # Clean ELDF view without boundaries
|
|
552
|
+
>>> ma.plot(plot_type='eldf', bounds=False, plot_smooth=True)
|
|
553
|
+
>>>
|
|
554
|
+
>>> # High-resolution smooth curves
|
|
555
|
+
>>> ma_hires = MarginalAnalysisELDF(data=data, n_points=2000)
|
|
556
|
+
>>> ma_hires.fit()
|
|
557
|
+
>>> ma_hires.plot(plot_smooth=True, figsize=(14, 8))
|
|
558
|
+
|
|
559
|
+
**Presentation-Ready Plots:**
|
|
560
|
+
|
|
561
|
+
>>> # Large, high-quality plot for presentations
|
|
562
|
+
>>> ma = MarginalAnalysisELDF(
|
|
563
|
+
... data=data,
|
|
564
|
+
... n_points=2000, # High resolution
|
|
565
|
+
... cluster_threshold=0.02, # Sensitive clustering
|
|
566
|
+
... verbose=False # Clean output
|
|
567
|
+
... )
|
|
568
|
+
>>> ma.fit()
|
|
569
|
+
>>> ma.plot(
|
|
570
|
+
... plot_type='marginal',
|
|
571
|
+
... plot_smooth=True,
|
|
572
|
+
... bounds=True,
|
|
573
|
+
... figsize=(16, 10) # Large size
|
|
574
|
+
... )
|
|
575
|
+
|
|
576
|
+
**Batch Visualization:**
|
|
577
|
+
|
|
578
|
+
>>> # Compare multiple datasets
|
|
579
|
+
>>> datasets = [data1, data2, data3]
|
|
580
|
+
>>>
|
|
581
|
+
>>> for i, dataset in enumerate(datasets):
|
|
582
|
+
... ma = MarginalAnalysisELDF(data=dataset, verbose=False)
|
|
583
|
+
... ma.fit()
|
|
584
|
+
...
|
|
585
|
+
... # Create subplot or separate figures
|
|
586
|
+
... ma.plot(plot_type='marginal', bounds=True,
|
|
587
|
+
... figsize=(12, 8))
|
|
588
|
+
... plt.title(f'Dataset {i+1} - Local Marginal Analysis')
|
|
589
|
+
... plt.show()
|
|
590
|
+
|
|
591
|
+
Plot Elements:
|
|
592
|
+
--------------
|
|
593
|
+
**Colors and Lines:**
|
|
594
|
+
- Blue: ELDF curve and points
|
|
595
|
+
- Red: PDF curve and points
|
|
596
|
+
- Light Blue: WEDF points (when available)
|
|
597
|
+
- Green: DLB (Data Lower Bound)
|
|
598
|
+
- Orange: DUB (Data Upper Bound) and CLB/CUB (Cluster bounds)
|
|
599
|
+
- Purple: LB (Lower Bound)
|
|
600
|
+
- Brown: UB (Upper Bound)
|
|
601
|
+
- Magenta: Z0 (Mode) - dash-dot line
|
|
602
|
+
|
|
603
|
+
**Shaded Regions:**
|
|
604
|
+
- Light purple: Lower outlier region (DLB to LB)
|
|
605
|
+
- Light brown: Upper outlier region (UB to DUB)
|
|
606
|
+
|
|
607
|
+
**Line Styles:**
|
|
608
|
+
- Solid: Primary boundaries (DLB, DUB)
|
|
609
|
+
- Dashed: Probable boundaries (LB, UB) and cluster bounds (CLB, CUB)
|
|
610
|
+
- Dash-dot: Z0 mode line
|
|
611
|
+
|
|
612
|
+
Notes:
|
|
613
|
+
------
|
|
614
|
+
- **Requires successful fit() call before plotting**
|
|
615
|
+
- **catch=True must be set during initialization for plotting to work**
|
|
616
|
+
- **Larger n_points values create smoother curves but use more memory**
|
|
617
|
+
- **All boundary lines include value labels for easy interpretation**
|
|
618
|
+
- **Plot automatically sets appropriate axis limits based on data range**
|
|
619
|
+
- **Grid is enabled for easy value reading**
|
|
620
|
+
- **Legend includes all visible elements with their values**
|
|
621
|
+
|
|
622
|
+
See Also:
|
|
623
|
+
---------
|
|
624
|
+
fit() : Perform the marginal analysis before plotting
|
|
625
|
+
"""
|
|
626
|
+
# Plot the results
|
|
627
|
+
self._plot_eldf(plot_type=plot_type, plot_smooth=plot_smooth, bounds=bounds, derivatives=False, figsize=figsize)
|