machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1218 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import warnings
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
from scipy.signal import find_peaks
|
|
5
|
+
from scipy.ndimage import gaussian_filter1d
|
|
6
|
+
from typing import Union, Dict, Any, Optional, Tuple, List
|
|
7
|
+
from machinegnostics.magcal import EGDF
|
|
8
|
+
import logging
|
|
9
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
10
|
+
|
|
11
|
+
class DataHomogeneity:
|
|
12
|
+
"""
|
|
13
|
+
Analyze data homogeneity for EGDF objects using probability density function analysis.
|
|
14
|
+
|
|
15
|
+
This class provides comprehensive homogeneity analysis for Estimating Global Distribution Functions (EGDF)
|
|
16
|
+
by examining the shape and characteristics of their probability density functions (PDF). The
|
|
17
|
+
homogeneity criterion is based on the mathematical properties and expected PDF behavior of EGDF
|
|
18
|
+
according to gnostic theory principles.
|
|
19
|
+
|
|
20
|
+
**Gnostic Theory Foundation:**
|
|
21
|
+
|
|
22
|
+
The EGDF is uniquely determined by the data sample and finds the optimal scale parameter automatically.
|
|
23
|
+
Unlike local distribution functions, EGDF has limited flexibility and provides a unique representation
|
|
24
|
+
for each homogeneous data sample. The key principle is that homogeneous data should produce a
|
|
25
|
+
distribution with a single density maximum, while non-homogeneous data will exhibit multiple maxima
|
|
26
|
+
or negative density values.
|
|
27
|
+
|
|
28
|
+
**Homogeneity Criteria:**
|
|
29
|
+
|
|
30
|
+
- **EGDF (Estimating Global Distribution Function)**: Data is considered homogeneous if:
|
|
31
|
+
1. PDF has exactly one global maximum (single peak)
|
|
32
|
+
2. PDF contains no negative values
|
|
33
|
+
|
|
34
|
+
**EGDF Characteristics:**
|
|
35
|
+
|
|
36
|
+
- **Uniqueness**: EGDF finds the best scale parameter automatically, providing a unique model
|
|
37
|
+
- **Robustness**: EGDF is robust with respect to outliers
|
|
38
|
+
- **Homogeneity Testing**: Particularly suitable for reliable data homogeneity testing
|
|
39
|
+
- **Global Nature**: Uses normalized weights resulting in limited flexibility controlled by optimal scale
|
|
40
|
+
- **Data-Driven**: Primary parameters are the data themselves, following gnostic "let data speak" principle
|
|
41
|
+
|
|
42
|
+
**Non-Homogeneity Detection:**
|
|
43
|
+
|
|
44
|
+
EGDF can sensitively detect two main causes of non-homogeneity:
|
|
45
|
+
1. **Outliers**: Individual data points significantly different from others, creating local maxima
|
|
46
|
+
2. **Clusters**: Separate groups in the data, resulting in multiple density peaks
|
|
47
|
+
|
|
48
|
+
**Key Features:**
|
|
49
|
+
|
|
50
|
+
- Automatic EGDF validation
|
|
51
|
+
- Robust peak detection with configurable smoothing
|
|
52
|
+
- Comprehensive error and warning tracking
|
|
53
|
+
- Memory management with optional data flushing
|
|
54
|
+
- Detailed visualization of analysis results
|
|
55
|
+
- Integration with existing GDF parameter systems
|
|
56
|
+
|
|
57
|
+
**Analysis Pipeline:**
|
|
58
|
+
|
|
59
|
+
1. **Validation**: Ensures input is EGDF only (rejects QGDF/ELDF/QLDF)
|
|
60
|
+
2. **PDF Extraction**: Retrieves PDF points from fitted EGDF object
|
|
61
|
+
3. **Smoothing**: Applies Gaussian filtering for noise reduction
|
|
62
|
+
4. **Maxima Detection**: Identifies peaks in the smoothed PDF
|
|
63
|
+
5. **Homogeneity Assessment**: Evaluates based on peak count and PDF negativity
|
|
64
|
+
6. **Result Storage**: Comprehensive parameter collection and storage
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
gdf : EGDF
|
|
69
|
+
A fitted Estimating Global Distribution Function object. Must be EGDF
|
|
70
|
+
(QGDF, ELDF and QLDF are not supported). The object must:
|
|
71
|
+
- Be fitted (gdf._fitted == True)
|
|
72
|
+
- Have catch=True to generate required pdf_points and di_points_n
|
|
73
|
+
- Contain valid data and PDF information
|
|
74
|
+
- Have optimized scale parameter S_opt from EGDF fitting process
|
|
75
|
+
|
|
76
|
+
verbose : bool, default=True
|
|
77
|
+
Controls output verbosity during analysis.
|
|
78
|
+
- True: Prints detailed progress, warnings, and results
|
|
79
|
+
- False: Silent operation (errors still raise exceptions)
|
|
80
|
+
|
|
81
|
+
catch : bool, default=True
|
|
82
|
+
Enables comprehensive result storage in params dictionary.
|
|
83
|
+
- True: Stores all analysis results, parameters, and metadata
|
|
84
|
+
- False: Minimal storage (not recommended for most use cases)
|
|
85
|
+
|
|
86
|
+
flush : bool, default=False
|
|
87
|
+
Controls memory management of large arrays after analysis.
|
|
88
|
+
- True: Clears pdf_points and di_points_n from GDF object to save memory
|
|
89
|
+
- False: Preserves all data arrays (recommended for further analysis)
|
|
90
|
+
|
|
91
|
+
smoothing_sigma : float, default=1.0
|
|
92
|
+
Gaussian smoothing parameter for PDF preprocessing before peak detection.
|
|
93
|
+
- Larger values: More aggressive smoothing, may merge distinct features
|
|
94
|
+
- Smaller values: Less smoothing, may detect noise as features
|
|
95
|
+
- Range: 0.1 to 5.0 (typical), must be positive
|
|
96
|
+
- Important for numerical sensitivity beyond visual inspection
|
|
97
|
+
|
|
98
|
+
min_height_ratio : float, default=0.01
|
|
99
|
+
Minimum relative height threshold for peak detection.
|
|
100
|
+
- Expressed as fraction of global maximum height
|
|
101
|
+
- Range: 0.001 to 0.1 (typical)
|
|
102
|
+
- Higher values: More selective, fewer detected peaks
|
|
103
|
+
- Lower values: More sensitive, may include noise
|
|
104
|
+
|
|
105
|
+
min_distance : Optional[int], default=None
|
|
106
|
+
Minimum separation between detected peaks in array indices.
|
|
107
|
+
- None: Automatically calculated as len(pdf_data) // 20
|
|
108
|
+
- Integer: Explicit minimum distance constraint
|
|
109
|
+
- Prevents detection of closely spaced spurious peaks
|
|
110
|
+
|
|
111
|
+
Attributes
|
|
112
|
+
----------
|
|
113
|
+
is_homogeneous : bool or None
|
|
114
|
+
Primary analysis result. None before fit(), True/False after analysis
|
|
115
|
+
|
|
116
|
+
picks : List[Dict]
|
|
117
|
+
Detected maxima with detailed information:
|
|
118
|
+
- index: Array index of maximum
|
|
119
|
+
- position: Data value at maximum
|
|
120
|
+
- pdf_value: Original PDF value at maximum
|
|
121
|
+
- smoothed_pdf_value: Smoothed PDF value at maximum
|
|
122
|
+
- is_global: Boolean indicating global maximum
|
|
123
|
+
|
|
124
|
+
z0 : float or None
|
|
125
|
+
Global optimum value from EGDF object or detected from PDF
|
|
126
|
+
|
|
127
|
+
global_extremum_idx : int or None
|
|
128
|
+
Array index of the global maximum
|
|
129
|
+
|
|
130
|
+
fitted : bool
|
|
131
|
+
Read-only property indicating if analysis has been completed
|
|
132
|
+
|
|
133
|
+
Raises
|
|
134
|
+
------
|
|
135
|
+
ValueError
|
|
136
|
+
- If input is not EGDF object
|
|
137
|
+
- If GDF object is not fitted
|
|
138
|
+
- If required attributes are missing
|
|
139
|
+
|
|
140
|
+
AttributeError
|
|
141
|
+
- If EGDF object lacks pdf_points (catch=False during EGDF fitting)
|
|
142
|
+
- If required EGDF attributes are not accessible
|
|
143
|
+
|
|
144
|
+
RuntimeError
|
|
145
|
+
- If fit() method fails due to numerical issues
|
|
146
|
+
- If plot() or results() called before fit()
|
|
147
|
+
|
|
148
|
+
Examples
|
|
149
|
+
--------
|
|
150
|
+
**Basic Homogeneity Analysis with EGDF:**
|
|
151
|
+
|
|
152
|
+
>>> import numpy as np
|
|
153
|
+
>>> from machinegnostics.magcal import EGDF
|
|
154
|
+
>>> from machinegnostics.magcal import DataHomogeneity
|
|
155
|
+
>>>
|
|
156
|
+
>>> # Prepare homogeneous data (single cluster)
|
|
157
|
+
>>> data = np.array([1.0, 1.1, 1.2, 0.9, 1.0, 1.1])
|
|
158
|
+
>>>
|
|
159
|
+
>>> # Fit EGDF with catch=True (required for homogeneity analysis)
|
|
160
|
+
>>> egdf = EGDF(data=data, catch=True, verbose=False)
|
|
161
|
+
>>> egdf.fit() # Automatically finds optimal scale parameter
|
|
162
|
+
>>>
|
|
163
|
+
>>> # Analyze homogeneity
|
|
164
|
+
>>> homogeneity = DataHomogeneity(egdf, verbose=True)
|
|
165
|
+
>>> is_homogeneous = homogeneity.fit()
|
|
166
|
+
>>> print(f"Data is homogeneous: {is_homogeneous}")
|
|
167
|
+
>>>
|
|
168
|
+
>>> # Visualize results
|
|
169
|
+
>>> homogeneity.plot()
|
|
170
|
+
>>>
|
|
171
|
+
>>> # Get detailed results
|
|
172
|
+
>>> results = homogeneity.results()
|
|
173
|
+
>>> print(f"Number of maxima detected: {len(results['picks'])}")
|
|
174
|
+
|
|
175
|
+
**EGDF Analysis with Multiple Clusters:**
|
|
176
|
+
|
|
177
|
+
>>> # Heterogeneous data (multiple clusters)
|
|
178
|
+
>>> data = np.array([1, 2, 3, 10, 11, 12, 20, 21, 22])
|
|
179
|
+
>>>
|
|
180
|
+
>>> # Fit EGDF (will find optimal S automatically)
|
|
181
|
+
>>> egdf = EGDF(data=data, catch=True)
|
|
182
|
+
>>> egdf.fit()
|
|
183
|
+
>>>
|
|
184
|
+
>>> # Analyze with custom smoothing for numerical sensitivity
|
|
185
|
+
>>> homogeneity = DataHomogeneity(
|
|
186
|
+
... egdf,
|
|
187
|
+
... verbose=True,
|
|
188
|
+
... smoothing_sigma=2.0, # More aggressive smoothing
|
|
189
|
+
... min_height_ratio=0.05, # Higher threshold
|
|
190
|
+
... flush=True # Save memory
|
|
191
|
+
... )
|
|
192
|
+
>>>
|
|
193
|
+
>>> is_homogeneous = homogeneity.fit()
|
|
194
|
+
>>> # Expected: False due to multiple clusters creating multiple maxima
|
|
195
|
+
|
|
196
|
+
**Outlier Detection Example:**
|
|
197
|
+
|
|
198
|
+
>>> # Data with outlier
|
|
199
|
+
>>> data = np.array([5, 5.1, 5.2, 4.9, 5.0, 15.0]) # 15.0 is outlier
|
|
200
|
+
>>>
|
|
201
|
+
>>> # Fit EGDF
|
|
202
|
+
>>> egdf = EGDF(data=data, catch=True)
|
|
203
|
+
>>> egdf.fit()
|
|
204
|
+
>>>
|
|
205
|
+
>>> # Analyze homogeneity
|
|
206
|
+
>>> homogeneity = DataHomogeneity(egdf, verbose=True)
|
|
207
|
+
>>> is_homogeneous = homogeneity.fit()
|
|
208
|
+
>>> # Expected: False due to outlier creating additional local maximum
|
|
209
|
+
|
|
210
|
+
**Error Handling and Parameter Access:**
|
|
211
|
+
|
|
212
|
+
>>> # Access comprehensive results
|
|
213
|
+
>>> results = homogeneity.results()
|
|
214
|
+
>>>
|
|
215
|
+
>>> # Check for analysis errors
|
|
216
|
+
>>> if 'errors' in results:
|
|
217
|
+
... print("Analysis errors:", results['errors'])
|
|
218
|
+
>>>
|
|
219
|
+
>>> # Check for warnings
|
|
220
|
+
>>> if 'warnings' in results:
|
|
221
|
+
... print("Analysis warnings:", results['warnings'])
|
|
222
|
+
>>>
|
|
223
|
+
>>> # Access EGDF parameters
|
|
224
|
+
>>> gdf_params = results['gdf_parameters']
|
|
225
|
+
>>> print(f"Optimal scale parameter: {gdf_params.get('S_opt', 'Not found')}")
|
|
226
|
+
>>> print(f"Global optimum Z0: {gdf_params.get('z0', 'Not found')}")
|
|
227
|
+
|
|
228
|
+
**Memory Management:**
|
|
229
|
+
|
|
230
|
+
>>> # For large datasets, use flush=True to save memory
|
|
231
|
+
>>> large_data = np.random.normal(0, 1, 10000)
|
|
232
|
+
>>> egdf_large = EGDF(data=large_data, catch=True)
|
|
233
|
+
>>> egdf_large.fit()
|
|
234
|
+
>>>
|
|
235
|
+
>>> # Analysis with memory cleanup
|
|
236
|
+
>>> homogeneity = DataHomogeneity(egdf_large, flush=True)
|
|
237
|
+
>>> homogeneity.fit() # pdf_points and di_points_n cleared after analysis
|
|
238
|
+
|
|
239
|
+
Notes
|
|
240
|
+
-----
|
|
241
|
+
**Mathematical Background:**
|
|
242
|
+
|
|
243
|
+
The gnostic homogeneity analysis is based on the principle that homogeneous data should
|
|
244
|
+
produce a unimodal PDF with specific characteristics for EGDF:
|
|
245
|
+
|
|
246
|
+
- **EGDF Uniqueness**: Each data sample has exactly one optimal EGDF representation
|
|
247
|
+
- **Scale Optimization**: EGDF automatically finds the best scale parameter S_opt
|
|
248
|
+
- **Density Properties**: Homogeneous data produces single maximum, non-negative density
|
|
249
|
+
- **Numerical Sensitivity**: Analysis must be numerical, not based on visual inspection
|
|
250
|
+
|
|
251
|
+
**Why Only EGDF:**
|
|
252
|
+
|
|
253
|
+
Homogeneity testing is only applicable to EGDF because:
|
|
254
|
+
- EGDF provides unique representation for each data sample
|
|
255
|
+
- Automatic scale parameter optimization enables reliable homogeneity testing
|
|
256
|
+
- Global nature with normalized weights makes it suitable for detecting data structure
|
|
257
|
+
- Robustness against outliers while maintaining sensitivity to detect them
|
|
258
|
+
- QGDF, ELDF, and QLDF have different mathematical properties unsuitable for this analysis
|
|
259
|
+
|
|
260
|
+
**Gnostic Principles Applied:**
|
|
261
|
+
|
|
262
|
+
- **Data Primacy**: Data are the primary parameters determining the distribution
|
|
263
|
+
- **Let Data Speak**: Analysis relies on data-driven optimal parameters
|
|
264
|
+
- **Unique Representation**: EGDF provides the one and only best representation
|
|
265
|
+
- **Numerical Decision Making**: Homogeneity decisions must be numerical, not visual
|
|
266
|
+
|
|
267
|
+
**Parameter Tuning Guidelines:**
|
|
268
|
+
|
|
269
|
+
- **smoothing_sigma**: Start with 1.0, increase for noisy data to improve numerical stability
|
|
270
|
+
- **min_height_ratio**: Start with 0.01, increase to reduce false positives from noise
|
|
271
|
+
- **min_distance**: Usually auto-calculated, manually set for specific data characteristics
|
|
272
|
+
- Remember: Visual inspection can be misleading, rely on numerical analysis
|
|
273
|
+
|
|
274
|
+
**Performance Considerations:**
|
|
275
|
+
|
|
276
|
+
- Memory usage scales with data size due to PDF point storage
|
|
277
|
+
- Use flush=True for large datasets if PDF data not needed afterward
|
|
278
|
+
- Smoothing adds computational cost but improves numerical robustness
|
|
279
|
+
- EGDF fitting provides optimal parameters, reducing computational overhead
|
|
280
|
+
|
|
281
|
+
**Integration with Existing Workflows:**
|
|
282
|
+
|
|
283
|
+
This class integrates seamlessly with existing EGDF workflows:
|
|
284
|
+
- Reads parameters from fitted EGDF objects including S_opt
|
|
285
|
+
- Appends errors/warnings to existing EGDF parameter collections
|
|
286
|
+
- Updates EGDF objects with homogeneity results
|
|
287
|
+
- Preserves all original EGDF functionality and gnostic principles
|
|
288
|
+
- Works with EGDF's automatic parameter optimization
|
|
289
|
+
|
|
290
|
+
**Theoretical Foundation:**
|
|
291
|
+
|
|
292
|
+
Based on gnostic theory where:
|
|
293
|
+
- Global distribution functions assume data sample homogeneity
|
|
294
|
+
- Non-homogeneous samples exhibit multiple density maxima or negative densities
|
|
295
|
+
- EGDF's unique scale parameter enables reliable homogeneity hypothesis testing
|
|
296
|
+
- Robustness properties make EGDF particularly suitable for small, widely spread samples
|
|
297
|
+
|
|
298
|
+
See Also
|
|
299
|
+
--------
|
|
300
|
+
EGDF : Estimating Global Distribution Function
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
def __init__(self, gdf: EGDF, verbose=True, catch=True, flush=False,
|
|
304
|
+
smoothing_sigma=1.0, min_height_ratio=0.01, min_distance=None):
|
|
305
|
+
self.gdf = gdf
|
|
306
|
+
self.verbose = verbose
|
|
307
|
+
self.catch = catch
|
|
308
|
+
self.flush = flush
|
|
309
|
+
self.params = {}
|
|
310
|
+
self._fitted = False
|
|
311
|
+
|
|
312
|
+
# Analysis parameters
|
|
313
|
+
self.smoothing_sigma = smoothing_sigma
|
|
314
|
+
self.min_height_ratio = min_height_ratio
|
|
315
|
+
self.min_distance = min_distance
|
|
316
|
+
|
|
317
|
+
# Results
|
|
318
|
+
self.z0 = None
|
|
319
|
+
self.picks = []
|
|
320
|
+
self.is_homogeneous = None
|
|
321
|
+
self.global_extremum_idx = None
|
|
322
|
+
|
|
323
|
+
# Logger setup
|
|
324
|
+
self.logger = get_logger(self.__class__.__name__, level=logging.DEBUG if verbose else logging.ERROR)
|
|
325
|
+
self.logger.debug(f"{self.__class__.__name__} initialized: ")
|
|
326
|
+
|
|
327
|
+
self._gdf_obj_validation()
|
|
328
|
+
self._validate_egdf_only()
|
|
329
|
+
|
|
330
|
+
def _validate_egdf_only(self):
|
|
331
|
+
"""Validate that the GDF object is EGDF only."""
|
|
332
|
+
self.logger.info("Validating GDF object for DataHomogeneity analysis")
|
|
333
|
+
class_name = self.gdf.__class__.__name__
|
|
334
|
+
|
|
335
|
+
if 'QGDF' in class_name:
|
|
336
|
+
self.logger.error(f"DataHomogeneity only supports EGDF objects. "
|
|
337
|
+
f"Received {class_name}. QGDF is not supported for homogeneity analysis.")
|
|
338
|
+
raise ValueError(
|
|
339
|
+
f"DataHomogeneity only supports EGDF objects. "
|
|
340
|
+
f"Received {class_name}. QGDF is not supported for homogeneity analysis."
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
if 'ELDF' in class_name or 'QLDF' in class_name:
|
|
344
|
+
self.logger.error(f"DataHomogeneity only supports EGDF objects. "
|
|
345
|
+
f"Received {class_name}. Local distribution functions (ELDF, QLDF) are not supported "
|
|
346
|
+
f"for homogeneity analysis.")
|
|
347
|
+
raise ValueError(
|
|
348
|
+
f"DataHomogeneity only supports EGDF objects. "
|
|
349
|
+
f"Received {class_name}. Local distribution functions (ELDF, QLDF) are not supported "
|
|
350
|
+
f"for homogeneity analysis."
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
if 'EGDF' not in class_name:
|
|
354
|
+
# Fallback detection based on methods
|
|
355
|
+
if not hasattr(self.gdf, '_fit_egdf'):
|
|
356
|
+
self.logger.error(f"DataHomogeneity only supports EGDF objects. "
|
|
357
|
+
f"Cannot determine if {class_name} is EGDF. "
|
|
358
|
+
f"Object must be EGDF for homogeneity analysis.")
|
|
359
|
+
raise ValueError(
|
|
360
|
+
f"DataHomogeneity only supports EGDF objects. "
|
|
361
|
+
f"Cannot determine if {class_name} is EGDF. "
|
|
362
|
+
f"Object must be EGDF for homogeneity analysis."
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def _gdf_obj_validation(self):
|
|
366
|
+
"""Validate that the EGDF object meets requirements for homogeneity analysis."""
|
|
367
|
+
self.logger.debug("Validating EGDF object attributes for homogeneity analysis")
|
|
368
|
+
if not hasattr(self.gdf, '_fitted'):
|
|
369
|
+
self.logger.error("EGDF object must have _fitted attribute")
|
|
370
|
+
raise ValueError("EGDF object must have _fitted attribute")
|
|
371
|
+
|
|
372
|
+
if not self.gdf._fitted:
|
|
373
|
+
self.logger.error("EGDF object must be fitted before homogeneity analysis")
|
|
374
|
+
raise ValueError("EGDF object must be fitted before homogeneity analysis")
|
|
375
|
+
|
|
376
|
+
required_attrs = ['data']
|
|
377
|
+
for attr in required_attrs:
|
|
378
|
+
if not hasattr(self.gdf, attr):
|
|
379
|
+
self.logger.error(f"EGDF object missing required attribute: {attr}")
|
|
380
|
+
raise ValueError(f"EGDF object missing required attribute: {attr}")
|
|
381
|
+
|
|
382
|
+
if not (hasattr(self.gdf, 'pdf_points') and self.gdf.pdf_points is not None):
|
|
383
|
+
if hasattr(self.gdf, 'catch') and not self.gdf.catch:
|
|
384
|
+
self.logger.error("EGDF object must have catch=True to generate "
|
|
385
|
+
"pdf_points required for homogeneity analysis.")
|
|
386
|
+
raise AttributeError(
|
|
387
|
+
f"EGDF object must have catch=True to generate "
|
|
388
|
+
f"pdf_points required for homogeneity analysis."
|
|
389
|
+
)
|
|
390
|
+
else:
|
|
391
|
+
self.logger.error("EGDF object is missing 'pdf_points'. "
|
|
392
|
+
"Please ensure catch=True when fitting EGDF.")
|
|
393
|
+
raise AttributeError(
|
|
394
|
+
f"EGDF object is missing 'pdf_points'. "
|
|
395
|
+
f"Please ensure catch=True when fitting EGDF."
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def _prepare_params_from_gdf(self):
|
|
399
|
+
"""Extract and prepare parameters from the EGDF object."""
|
|
400
|
+
self.logger.debug("Extracting parameters from EGDF object")
|
|
401
|
+
gdf_params = {}
|
|
402
|
+
|
|
403
|
+
# Extract basic parameters
|
|
404
|
+
if hasattr(self.gdf, 'params') and self.gdf.params:
|
|
405
|
+
gdf_params.update(self.gdf.params)
|
|
406
|
+
|
|
407
|
+
# Extract direct attributes
|
|
408
|
+
direct_attrs = ['S', 'S_opt', 'z0', 'data', 'pdf_points', 'di_points_n']
|
|
409
|
+
for attr in direct_attrs:
|
|
410
|
+
if hasattr(self.gdf, attr):
|
|
411
|
+
value = getattr(self.gdf, attr)
|
|
412
|
+
if value is not None:
|
|
413
|
+
gdf_params[attr] = value
|
|
414
|
+
|
|
415
|
+
return gdf_params
|
|
416
|
+
|
|
417
|
+
def _append_error(self, error_message, exception_type=None):
|
|
418
|
+
"""Append error to existing errors in EGDF params or create new ones."""
|
|
419
|
+
self.logger.error(error_message)
|
|
420
|
+
error_entry = {
|
|
421
|
+
'method': 'DataHomogeneity',
|
|
422
|
+
'error': error_message,
|
|
423
|
+
'exception_type': exception_type or 'DataHomogeneityError'
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
# Add to EGDF object params if possible
|
|
427
|
+
if hasattr(self.gdf, 'params'):
|
|
428
|
+
if 'errors' not in self.gdf.params:
|
|
429
|
+
self.gdf.params['errors'] = []
|
|
430
|
+
self.gdf.params['errors'].append(error_entry)
|
|
431
|
+
|
|
432
|
+
# Also add to local params
|
|
433
|
+
if 'errors' not in self.params:
|
|
434
|
+
self.params['errors'] = []
|
|
435
|
+
self.params['errors'].append(error_entry)
|
|
436
|
+
|
|
437
|
+
def _append_warning(self, warning_message):
|
|
438
|
+
"""Append warning to existing warnings in EGDF params or create new ones."""
|
|
439
|
+
self.logger.warning(warning_message)
|
|
440
|
+
warning_entry = {
|
|
441
|
+
'method': 'DataHomogeneity',
|
|
442
|
+
'warning': warning_message
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
# Add to EGDF object params if possible
|
|
446
|
+
if hasattr(self.gdf, 'params'):
|
|
447
|
+
if 'warnings' not in self.gdf.params:
|
|
448
|
+
self.gdf.params['warnings'] = []
|
|
449
|
+
self.gdf.params['warnings'].append(warning_entry)
|
|
450
|
+
|
|
451
|
+
# Also add to local params
|
|
452
|
+
if 'warnings' not in self.params:
|
|
453
|
+
self.params['warnings'] = []
|
|
454
|
+
self.params['warnings'].append(warning_entry)
|
|
455
|
+
|
|
456
|
+
def _flush_memory(self):
|
|
457
|
+
"""Flush di_points and pdf_points from memory if flush=True."""
|
|
458
|
+
self.logger.info("Flushing memory if flush=True")
|
|
459
|
+
if self.flush:
|
|
460
|
+
# # Flush from EGDF object attributes
|
|
461
|
+
# if hasattr(self.gdf, 'di_points_n'):
|
|
462
|
+
# self.gdf.di_points_n = None
|
|
463
|
+
# if self.verbose:
|
|
464
|
+
# print("Flushed di_points_n from EGDF object to save memory.")
|
|
465
|
+
|
|
466
|
+
# if hasattr(self.gdf, 'pdf_points'):
|
|
467
|
+
# self.gdf.pdf_points = None
|
|
468
|
+
# if self.verbose:
|
|
469
|
+
# print("Flushed pdf_points from EGDF object to save memory.")
|
|
470
|
+
|
|
471
|
+
# Flush from EGDF object params dictionary
|
|
472
|
+
if hasattr(self.gdf, 'params') and self.gdf.params:
|
|
473
|
+
if 'di_points_n' in self.gdf.params:
|
|
474
|
+
del self.gdf.params['di_points_n']
|
|
475
|
+
self.logger.info("Removed di_points_n from EGDF params dictionary to save memory.")
|
|
476
|
+
|
|
477
|
+
if 'pdf_points' in self.gdf.params:
|
|
478
|
+
del self.gdf.params['pdf_points']
|
|
479
|
+
self.logger.info("Removed pdf_points from EGDF params dictionary to save memory.")
|
|
480
|
+
|
|
481
|
+
# Also flush from local params if they exist
|
|
482
|
+
if 'gdf_parameters' in self.params and self.params['gdf_parameters']:
|
|
483
|
+
if 'di_points_n' in self.params['gdf_parameters']:
|
|
484
|
+
del self.params['gdf_parameters']['di_points_n']
|
|
485
|
+
self.logger.info("Removed di_points_n from local gdf_parameters to save memory.")
|
|
486
|
+
|
|
487
|
+
if 'pdf_points' in self.params['gdf_parameters']:
|
|
488
|
+
del self.params['gdf_parameters']['pdf_points']
|
|
489
|
+
self.logger.info("Removed pdf_points from local gdf_parameters to save memory.")
|
|
490
|
+
|
|
491
|
+
def fit(self, plot: bool = False) -> bool:
|
|
492
|
+
"""
|
|
493
|
+
Perform comprehensive homogeneity analysis on the EGDF object.
|
|
494
|
+
|
|
495
|
+
This is the primary analysis method that executes the complete homogeneity assessment
|
|
496
|
+
pipeline. It analyzes the probability density function (PDF) of the fitted EGDF object
|
|
497
|
+
to determine if the underlying data exhibits homogeneous characteristics based on
|
|
498
|
+
peak detection and PDF properties.
|
|
499
|
+
|
|
500
|
+
**Analysis Pipeline:**
|
|
501
|
+
|
|
502
|
+
1. **Parameter Extraction**: Retrieves comprehensive parameters from the input EGDF object
|
|
503
|
+
2. **PDF Processing**: Applies Gaussian smoothing to reduce noise and improve detection
|
|
504
|
+
3. **Peak Detection**: Identifies maxima in the smoothed PDF
|
|
505
|
+
4. **Homogeneity Assessment**: Evaluates based on peak count and PDF negativity
|
|
506
|
+
5. **Result Storage**: Stores comprehensive analysis results and metadata
|
|
507
|
+
6. **Memory Management**: Optionally flushes large arrays to conserve memory
|
|
508
|
+
|
|
509
|
+
**Homogeneity Criteria:**
|
|
510
|
+
|
|
511
|
+
- **EGDF**: Data is homogeneous if PDF has exactly one global maximum and no negative values
|
|
512
|
+
|
|
513
|
+
The method automatically handles parameter tuning, error tracking, and integration
|
|
514
|
+
with the existing EGDF parameter system.
|
|
515
|
+
|
|
516
|
+
Parameters
|
|
517
|
+
----------
|
|
518
|
+
plot : bool, optional
|
|
519
|
+
If True, generates plots for visual inspection of the analysis results.
|
|
520
|
+
- True: Displays plots of original and smoothed PDF with detected maxima
|
|
521
|
+
|
|
522
|
+
Returns
|
|
523
|
+
-------
|
|
524
|
+
bool
|
|
525
|
+
The primary homogeneity result:
|
|
526
|
+
- True: Data exhibits homogeneous characteristics
|
|
527
|
+
- False: Data is heterogeneous (multiple maxima or negative PDF values)
|
|
528
|
+
|
|
529
|
+
Raises
|
|
530
|
+
------
|
|
531
|
+
RuntimeError
|
|
532
|
+
If the analysis fails due to:
|
|
533
|
+
- Numerical instabilities in PDF processing
|
|
534
|
+
- Insufficient or corrupted PDF data
|
|
535
|
+
- Memory allocation issues during processing
|
|
536
|
+
|
|
537
|
+
AttributeError
|
|
538
|
+
If the EGDF object lacks required attributes:
|
|
539
|
+
- Missing pdf_points (ensure catch=True during EGDF fitting)
|
|
540
|
+
- Missing di_points_n for position mapping
|
|
541
|
+
- Invalid or incomplete EGDF state
|
|
542
|
+
|
|
543
|
+
ValueError
|
|
544
|
+
If analysis parameters are invalid:
|
|
545
|
+
- Negative smoothing_sigma
|
|
546
|
+
- Invalid min_height_ratio (not between 0 and 1)
|
|
547
|
+
- Corrupted PDF data (NaN, infinite values)
|
|
548
|
+
|
|
549
|
+
Side Effects
|
|
550
|
+
-----------
|
|
551
|
+
- Updates self.is_homogeneous with the analysis result
|
|
552
|
+
- Populates self.picks with detected maxima information
|
|
553
|
+
- Sets self.z0 with the global optimum value
|
|
554
|
+
- Updates self.global_extremum_idx with the maximum location
|
|
555
|
+
- Modifies EGDF object params with homogeneity results (if catch=True)
|
|
556
|
+
- May clear pdf_points and di_points_n from EGDF object (if flush=True)
|
|
557
|
+
- Appends any errors or warnings to existing EGDF error/warning collections
|
|
558
|
+
|
|
559
|
+
Examples
|
|
560
|
+
--------
|
|
561
|
+
**Basic Usage:**
|
|
562
|
+
|
|
563
|
+
>>> # After creating DataHomogeneity instance
|
|
564
|
+
>>> homogeneity = DataHomogeneity(egdf_object, verbose=True)
|
|
565
|
+
>>> is_homogeneous = homogeneity.fit()
|
|
566
|
+
>>> print(f"Analysis complete. Homogeneous: {is_homogeneous}")
|
|
567
|
+
|
|
568
|
+
**Memory Management:**
|
|
569
|
+
|
|
570
|
+
>>> # For large datasets
|
|
571
|
+
>>> homogeneity = DataHomogeneity(large_egdf, flush=True)
|
|
572
|
+
>>> result = homogeneity.fit() # Automatically frees memory after analysis
|
|
573
|
+
|
|
574
|
+
**Integration with Workflows:**
|
|
575
|
+
|
|
576
|
+
>>> # Analysis integrates seamlessly with existing EGDF workflows
|
|
577
|
+
>>> egdf.fit() # Standard EGDF fitting
|
|
578
|
+
>>> homogeneity = DataHomogeneity(egdf)
|
|
579
|
+
>>> homogeneity.fit() # Homogeneity analysis
|
|
580
|
+
>>>
|
|
581
|
+
>>> # Results now available in both objects
|
|
582
|
+
>>> print("EGDF homogeneity flag:", egdf.params['is_homogeneous'])
|
|
583
|
+
>>> print("Detailed analysis:", homogeneity.results())
|
|
584
|
+
|
|
585
|
+
Notes
|
|
586
|
+
-----
|
|
587
|
+
**Performance Considerations:**
|
|
588
|
+
|
|
589
|
+
- Processing time scales approximately O(n log n) with PDF length
|
|
590
|
+
- Memory usage depends on PDF resolution and catch parameter
|
|
591
|
+
- Smoothing adds computational overhead but improves robustness
|
|
592
|
+
|
|
593
|
+
**Parameter Sensitivity:**
|
|
594
|
+
|
|
595
|
+
The analysis robustness depends on proper parameter tuning:
|
|
596
|
+
- Increase smoothing_sigma for noisy data
|
|
597
|
+
- Adjust min_height_ratio to control sensitivity
|
|
598
|
+
- Set appropriate min_distance to avoid spurious detections
|
|
599
|
+
|
|
600
|
+
**Mathematical Foundation:**
|
|
601
|
+
|
|
602
|
+
The method implements gnostic homogeneity theory where:
|
|
603
|
+
- Homogeneous data should produce unimodal PDFs
|
|
604
|
+
- EGDF represents optimal scale parameter selection (expect single peak)
|
|
605
|
+
|
|
606
|
+
**Quality Assurance:**
|
|
607
|
+
|
|
608
|
+
The method includes comprehensive validation:
|
|
609
|
+
- PDF integrity checks (no NaN, infinite values)
|
|
610
|
+
- Parameter bounds validation
|
|
611
|
+
- Numerical stability monitoring
|
|
612
|
+
- Automatic fallback strategies for edge cases
|
|
613
|
+
|
|
614
|
+
See Also
|
|
615
|
+
--------
|
|
616
|
+
plot : Visualize the analysis results
|
|
617
|
+
results : Access comprehensive analysis data
|
|
618
|
+
"""
|
|
619
|
+
self.logger.info("Starting homogeneity analysis fit() method")
|
|
620
|
+
try:
|
|
621
|
+
# Prepare parameters from EGDF
|
|
622
|
+
self.logger.debug("Preparing parameters from EGDF object")
|
|
623
|
+
gdf_params = self._prepare_params_from_gdf()
|
|
624
|
+
|
|
625
|
+
# Set minimum distance if not provided
|
|
626
|
+
if self.min_distance is None:
|
|
627
|
+
self.logger.debug("Minimum distance not provided, calculating...")
|
|
628
|
+
pdf_data = self._get_pdf_data()
|
|
629
|
+
self.min_distance = max(1, len(pdf_data) // 20)
|
|
630
|
+
|
|
631
|
+
# Perform homogeneity test
|
|
632
|
+
self.logger.info("Testing homogeneity")
|
|
633
|
+
self.is_homogeneous = self._test_homogeneity()
|
|
634
|
+
|
|
635
|
+
# Extract Z0
|
|
636
|
+
self.logger.info("Extracting global optimum Z0")
|
|
637
|
+
self.z0 = self._get_z0()
|
|
638
|
+
|
|
639
|
+
# Store comprehensive results
|
|
640
|
+
if self.catch:
|
|
641
|
+
self.params.update({
|
|
642
|
+
'gdf_type': 'egdf',
|
|
643
|
+
'is_homogeneous': self.is_homogeneous,
|
|
644
|
+
'picks': self.picks,
|
|
645
|
+
'z0': self.z0,
|
|
646
|
+
'global_extremum_idx': self.global_extremum_idx,
|
|
647
|
+
'analysis_parameters': {
|
|
648
|
+
'smoothing_sigma': self.smoothing_sigma,
|
|
649
|
+
'min_height_ratio': self.min_height_ratio,
|
|
650
|
+
'min_distance': self.min_distance,
|
|
651
|
+
'flush': self.flush
|
|
652
|
+
},
|
|
653
|
+
'homogeneity_fitted': True
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
# Include EGDF parameters
|
|
657
|
+
self.params['gdf_parameters'] = gdf_params
|
|
658
|
+
|
|
659
|
+
# Update EGDF object params if possible
|
|
660
|
+
if hasattr(self.gdf, 'catch') and self.gdf.catch and hasattr(self.gdf, 'params'):
|
|
661
|
+
self.gdf.params.update({
|
|
662
|
+
'is_homogeneous': self.is_homogeneous,
|
|
663
|
+
'homogeneity_checked': True,
|
|
664
|
+
'homogeneity_fitted': True
|
|
665
|
+
})
|
|
666
|
+
|
|
667
|
+
self.logger.info("Homogeneity results written to EGDF params dictionary.")
|
|
668
|
+
|
|
669
|
+
self._fitted = True
|
|
670
|
+
|
|
671
|
+
# plot
|
|
672
|
+
if plot:
|
|
673
|
+
self.logger.info("Plotting results as requested")
|
|
674
|
+
self.plot()
|
|
675
|
+
|
|
676
|
+
# Flush memory if requested
|
|
677
|
+
self.logger.info("Handling memory flush if requested")
|
|
678
|
+
self._flush_memory()
|
|
679
|
+
|
|
680
|
+
self.logger.info("Homogeneity analysis completed for EGDF.")
|
|
681
|
+
self.logger.info(f"Data is {'homogeneous' if self.is_homogeneous else 'not homogeneous'}")
|
|
682
|
+
self.logger.info(f"Number of maxima detected: {len(self.picks)}")
|
|
683
|
+
|
|
684
|
+
return self.is_homogeneous
|
|
685
|
+
|
|
686
|
+
except Exception as e:
|
|
687
|
+
error_msg = f"Error during homogeneity analysis: {str(e)}"
|
|
688
|
+
self._append_error(error_msg, type(e).__name__)
|
|
689
|
+
raise
|
|
690
|
+
|
|
691
|
+
def _test_homogeneity(self):
|
|
692
|
+
"""
|
|
693
|
+
Test data homogeneity for EGDF.
|
|
694
|
+
|
|
695
|
+
Returns
|
|
696
|
+
-------
|
|
697
|
+
bool
|
|
698
|
+
True if homogeneous, False otherwise.
|
|
699
|
+
"""
|
|
700
|
+
self.logger.info("Starting homogeneity test for EGDF")
|
|
701
|
+
try:
|
|
702
|
+
pdf_data = self._get_pdf_data()
|
|
703
|
+
has_negative_pdf = np.any(pdf_data < 0)
|
|
704
|
+
|
|
705
|
+
# EGDF: Look for single global maximum
|
|
706
|
+
self.picks = self._detect_maxima()
|
|
707
|
+
extrema_type = "maxima"
|
|
708
|
+
num_extrema = len(self.picks)
|
|
709
|
+
is_homogeneous = not has_negative_pdf and num_extrema == 1
|
|
710
|
+
|
|
711
|
+
if self.verbose:
|
|
712
|
+
if not is_homogeneous:
|
|
713
|
+
reasons = []
|
|
714
|
+
if has_negative_pdf:
|
|
715
|
+
reasons.append("PDF has negative values")
|
|
716
|
+
self._append_warning("PDF contains negative values - may indicate numerical issues")
|
|
717
|
+
if num_extrema > 1:
|
|
718
|
+
reasons.append(f"multiple {extrema_type} [{num_extrema}] detected")
|
|
719
|
+
self._append_warning(f"Multiple {extrema_type} detected - data may not be homogeneous")
|
|
720
|
+
elif num_extrema == 0:
|
|
721
|
+
reasons.append(f"no significant {extrema_type} detected")
|
|
722
|
+
self._append_warning(f"No significant {extrema_type} detected - check smoothing parameters")
|
|
723
|
+
self.logger.info(f"EGDF data is not homogeneous: {', '.join(reasons)}.")
|
|
724
|
+
else:
|
|
725
|
+
self.logger.info(f"EGDF data is homogeneous: PDF has no negative values "
|
|
726
|
+
f"and exactly one {extrema_type[:-1]} detected.")
|
|
727
|
+
|
|
728
|
+
# Store additional info in params
|
|
729
|
+
if self.catch:
|
|
730
|
+
self.params.update({
|
|
731
|
+
'has_negative_pdf': has_negative_pdf,
|
|
732
|
+
f'num_{extrema_type}': num_extrema,
|
|
733
|
+
'extrema_type': extrema_type
|
|
734
|
+
})
|
|
735
|
+
|
|
736
|
+
return is_homogeneous
|
|
737
|
+
|
|
738
|
+
except Exception as e:
|
|
739
|
+
error_msg = f"Error in homogeneity test: {str(e)}"
|
|
740
|
+
self._append_error(error_msg, type(e).__name__)
|
|
741
|
+
raise
|
|
742
|
+
|
|
743
|
+
def _detect_maxima(self):
|
|
744
|
+
"""Detect maxima for EGDF analysis."""
|
|
745
|
+
self.logger.info("Detecting maxima in the PDF")
|
|
746
|
+
try:
|
|
747
|
+
pdf_data = self._get_pdf_data()
|
|
748
|
+
data_points = self._get_data_points()
|
|
749
|
+
smoothed_pdf = self._smooth_pdf()
|
|
750
|
+
|
|
751
|
+
min_height = np.max(smoothed_pdf) * self.min_height_ratio
|
|
752
|
+
maxima_idx, _ = find_peaks(smoothed_pdf,
|
|
753
|
+
height=min_height,
|
|
754
|
+
distance=self.min_distance)
|
|
755
|
+
|
|
756
|
+
picks = []
|
|
757
|
+
global_max_value = -np.inf
|
|
758
|
+
|
|
759
|
+
for idx in maxima_idx:
|
|
760
|
+
pick_info = {
|
|
761
|
+
'index': int(idx),
|
|
762
|
+
'position': float(data_points[idx]),
|
|
763
|
+
'pdf_value': float(pdf_data[idx]),
|
|
764
|
+
'smoothed_pdf_value': float(smoothed_pdf[idx]),
|
|
765
|
+
'is_global': False
|
|
766
|
+
}
|
|
767
|
+
picks.append(pick_info)
|
|
768
|
+
|
|
769
|
+
if smoothed_pdf[idx] > global_max_value:
|
|
770
|
+
global_max_value = smoothed_pdf[idx]
|
|
771
|
+
self.global_extremum_idx = idx
|
|
772
|
+
|
|
773
|
+
# Mark global maximum
|
|
774
|
+
for pick in picks:
|
|
775
|
+
if pick['index'] == self.global_extremum_idx:
|
|
776
|
+
pick['is_global'] = True
|
|
777
|
+
break
|
|
778
|
+
|
|
779
|
+
# Sort by importance (global first, then by height)
|
|
780
|
+
picks.sort(key=lambda x: (not x['is_global'], -x['smoothed_pdf_value']))
|
|
781
|
+
|
|
782
|
+
return picks
|
|
783
|
+
|
|
784
|
+
except Exception as e:
|
|
785
|
+
error_msg = f"Error detecting maxima: {str(e)}"
|
|
786
|
+
self._append_error(error_msg, type(e).__name__)
|
|
787
|
+
return []
|
|
788
|
+
|
|
789
|
+
def _smooth_pdf(self):
|
|
790
|
+
"""Apply Gaussian smoothing to PDF."""
|
|
791
|
+
self.logger.info("Smoothing PDF with Gaussian filter")
|
|
792
|
+
try:
|
|
793
|
+
pdf_data = self._get_pdf_data()
|
|
794
|
+
return gaussian_filter1d(pdf_data, sigma=self.smoothing_sigma)
|
|
795
|
+
except Exception as e:
|
|
796
|
+
error_msg = f"Error smoothing PDF: {str(e)}"
|
|
797
|
+
self._append_error(error_msg, type(e).__name__)
|
|
798
|
+
return pdf_data # Return unsmoothed data as fallback
|
|
799
|
+
|
|
800
|
+
def _get_pdf_data(self):
|
|
801
|
+
"""Get PDF values from the EGDF object."""
|
|
802
|
+
self.logger.info("Retrieving PDF data from EGDF object")
|
|
803
|
+
return self.gdf.pdf_points
|
|
804
|
+
|
|
805
|
+
def _get_data_points(self):
|
|
806
|
+
"""Get data point positions from the EGDF object."""
|
|
807
|
+
self.logger.info("Retrieving data point positions from EGDF object")
|
|
808
|
+
return self.gdf.di_points_n
|
|
809
|
+
|
|
810
|
+
def _get_z0(self):
|
|
811
|
+
"""Get Z0 (global optimum) value from the EGDF object."""
|
|
812
|
+
self.logger.info("Retrieving Z0 (global optimum) from EGDF object")
|
|
813
|
+
if hasattr(self.gdf, 'z0') and self.gdf.z0 is not None:
|
|
814
|
+
return self.gdf.z0
|
|
815
|
+
elif hasattr(self.gdf, 'params') and 'z0' in self.gdf.params:
|
|
816
|
+
return self.gdf.params['z0']
|
|
817
|
+
else:
|
|
818
|
+
# Fallback: use global extremum from PDF
|
|
819
|
+
if self.global_extremum_idx is not None:
|
|
820
|
+
data_points = self._get_data_points()
|
|
821
|
+
if self.verbose:
|
|
822
|
+
self._append_warning("Z0 not found in EGDF object. Using PDF global extremum as Z0.")
|
|
823
|
+
return data_points[self.global_extremum_idx]
|
|
824
|
+
return None
|
|
825
|
+
|
|
826
|
+
def plot(self, figsize=(12, 8), title=None):
|
|
827
|
+
"""
|
|
828
|
+
Create a comprehensive visualization of the homogeneity analysis results.
|
|
829
|
+
|
|
830
|
+
This method generates an informative plot that displays the probability density
|
|
831
|
+
function (PDF), detected maxima, homogeneity status, and key analysis metrics.
|
|
832
|
+
The visualization provides both quantitative and qualitative insights into the
|
|
833
|
+
data's homogeneous characteristics.
|
|
834
|
+
|
|
835
|
+
**Plot Components:**
|
|
836
|
+
|
|
837
|
+
1. **Original PDF Curve**: Blue solid line showing the raw probability density
|
|
838
|
+
2. **Smoothed PDF Curve**: Orange dashed line showing Gaussian-filtered PDF
|
|
839
|
+
3. **Global Maximum**: Red circle with vertical line marking the primary maximum
|
|
840
|
+
4. **Secondary Maxima**: Grey circles with vertical lines for additional maxima
|
|
841
|
+
5. **Z0 Reference**: Cyan dotted line if Z0 differs from detected maximum
|
|
842
|
+
6. **Status Indicator**: Color-coded text box showing homogeneity result
|
|
843
|
+
7. **Analysis Summary**: Information box with key metrics and statistics
|
|
844
|
+
|
|
845
|
+
The plot layout is optimized for both screen display and publication quality,
|
|
846
|
+
with clear legends, appropriate scaling, and professional formatting.
|
|
847
|
+
|
|
848
|
+
Parameters
|
|
849
|
+
----------
|
|
850
|
+
figsize : tuple of float, default=(12, 8)
|
|
851
|
+
Figure dimensions in inches as (width, height).
|
|
852
|
+
- Larger sizes provide better detail visibility
|
|
853
|
+
- Smaller sizes suitable for embedded displays
|
|
854
|
+
- Recommended range: (8, 6) to (16, 12)
|
|
855
|
+
|
|
856
|
+
title : str, optional
|
|
857
|
+
Custom plot title. If None, generates descriptive title automatically.
|
|
858
|
+
- None: Auto-generated title with EGDF type and homogeneity status
|
|
859
|
+
- str: Custom title text (supports LaTeX formatting)
|
|
860
|
+
- Empty string: No title displayed
|
|
861
|
+
|
|
862
|
+
Returns
|
|
863
|
+
-------
|
|
864
|
+
None
|
|
865
|
+
The method displays the plot using matplotlib.pyplot.show() and does not
|
|
866
|
+
return any value. The plot appears in the current matplotlib backend.
|
|
867
|
+
|
|
868
|
+
Raises
|
|
869
|
+
------
|
|
870
|
+
RuntimeError
|
|
871
|
+
If called before the fit() method has been executed:
|
|
872
|
+
- No analysis results available for visualization
|
|
873
|
+
- Internal state inconsistent or incomplete
|
|
874
|
+
|
|
875
|
+
AttributeError
|
|
876
|
+
If required plot data is missing or corrupted:
|
|
877
|
+
- PDF data unavailable or deleted (check flush parameter)
|
|
878
|
+
- Data points array missing or malformed
|
|
879
|
+
- Maxima detection results incomplete
|
|
880
|
+
|
|
881
|
+
ImportError
|
|
882
|
+
If matplotlib is not available or not properly installed
|
|
883
|
+
|
|
884
|
+
MemoryError
|
|
885
|
+
If insufficient memory for plot generation (rare, for very large datasets)
|
|
886
|
+
|
|
887
|
+
Side Effects
|
|
888
|
+
-----------
|
|
889
|
+
- Displays interactive plot window (backend-dependent)
|
|
890
|
+
- May create temporary matplotlib figure and axis objects
|
|
891
|
+
- Does not modify any analysis results or object state
|
|
892
|
+
- Plot appearance depends on current matplotlib style settings
|
|
893
|
+
|
|
894
|
+
Examples
|
|
895
|
+
--------
|
|
896
|
+
**Basic Plotting:**
|
|
897
|
+
|
|
898
|
+
>>> # After running analysis
|
|
899
|
+
>>> homogeneity = DataHomogeneity(egdf_object)
|
|
900
|
+
>>> homogeneity.fit()
|
|
901
|
+
>>> homogeneity.plot() # Display with default settings
|
|
902
|
+
|
|
903
|
+
**Custom Formatting:**
|
|
904
|
+
|
|
905
|
+
>>> # Custom size and title
|
|
906
|
+
>>> homogeneity.plot(
|
|
907
|
+
... figsize=(14, 10),
|
|
908
|
+
... title="EGDF Homogeneity Analysis: Production Data"
|
|
909
|
+
... )
|
|
910
|
+
|
|
911
|
+
Notes
|
|
912
|
+
-----
|
|
913
|
+
**Visual Interpretation Guide:**
|
|
914
|
+
|
|
915
|
+
- **Green Status Box**: Data is homogeneous (single maximum, no negative PDF)
|
|
916
|
+
- **Red Status Box**: Data is heterogeneous (multiple maxima or negative values)
|
|
917
|
+
- **Red Markers**: Global maximum
|
|
918
|
+
- **Grey Markers**: Secondary maxima indicating potential heterogeneity
|
|
919
|
+
- **Smooth vs Raw PDF**: Comparison shows impact of noise filtering
|
|
920
|
+
|
|
921
|
+
**Plot Customization:**
|
|
922
|
+
|
|
923
|
+
The plot uses matplotlib's standard customization system:
|
|
924
|
+
- Colors follow standard scientific visualization conventions
|
|
925
|
+
- Font sizes and line weights optimized for readability
|
|
926
|
+
- Grid and legend placement maximize information density
|
|
927
|
+
- Axis labels and scales automatically adjusted for data range
|
|
928
|
+
|
|
929
|
+
**Performance Notes:**
|
|
930
|
+
|
|
931
|
+
- Plot generation is typically fast (< 1 second for most datasets)
|
|
932
|
+
- Large datasets may require longer rendering times
|
|
933
|
+
- Interactive backends may be slower than static ones
|
|
934
|
+
- Memory usage scales with plot resolution and data size
|
|
935
|
+
|
|
936
|
+
**Troubleshooting:**
|
|
937
|
+
|
|
938
|
+
Common issues and solutions:
|
|
939
|
+
- **Empty plot**: Check if fit() was called successfully
|
|
940
|
+
- **Missing data**: Verify flush=False if data needed for plotting
|
|
941
|
+
- **Poor visibility**: Adjust figsize or matplotlib DPI settings
|
|
942
|
+
- **Layout issues**: Use plt.tight_layout() or bbox_inches='tight'
|
|
943
|
+
|
|
944
|
+
**Mathematical Context:**
|
|
945
|
+
|
|
946
|
+
The visualization directly represents the mathematical foundation:
|
|
947
|
+
- PDF height indicates probability density magnitude
|
|
948
|
+
- Maximum positions show optimal data characteristics
|
|
949
|
+
- Smoothing reveals underlying distributional structure
|
|
950
|
+
- Multiple maxima indicate potential data clustering or heterogeneity
|
|
951
|
+
|
|
952
|
+
See Also
|
|
953
|
+
--------
|
|
954
|
+
fit : Perform the homogeneity analysis (required before plotting)
|
|
955
|
+
results : Access numerical analysis results
|
|
956
|
+
"""
|
|
957
|
+
self.logger.info("Generating homogeneity analysis plot")
|
|
958
|
+
if not self._fitted:
|
|
959
|
+
self.logger.error("Must call fit() before plotting. Run fit() method first.")
|
|
960
|
+
raise RuntimeError("Must call fit() before plotting. Run fit() method first.")
|
|
961
|
+
|
|
962
|
+
try:
|
|
963
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
964
|
+
|
|
965
|
+
pdf_data = self._get_pdf_data()
|
|
966
|
+
data_points = self._get_data_points()
|
|
967
|
+
smoothed_pdf = self._smooth_pdf()
|
|
968
|
+
|
|
969
|
+
# Plot PDF and smoothed PDF
|
|
970
|
+
ax.plot(data_points, pdf_data, 'b-', linewidth=2, label='PDF', alpha=0.7)
|
|
971
|
+
ax.plot(data_points, smoothed_pdf, 'orange', linestyle='--', linewidth=1.5,
|
|
972
|
+
label='Smoothed PDF', alpha=0.8)
|
|
973
|
+
|
|
974
|
+
# Plot detected maxima
|
|
975
|
+
for pick in self.picks:
|
|
976
|
+
pos = pick['position']
|
|
977
|
+
pdf_val = pick['pdf_value']
|
|
978
|
+
is_global = pick['is_global']
|
|
979
|
+
|
|
980
|
+
if is_global:
|
|
981
|
+
ax.axvline(pos, color='red', linestyle='-', linewidth=2, alpha=0.8)
|
|
982
|
+
ax.plot(pos, pdf_val, 'o', color='red', markersize=10,
|
|
983
|
+
label=f'Global maximum (Z0={pos:.3f})')
|
|
984
|
+
else:
|
|
985
|
+
ax.axvline(pos, color='grey', linestyle='-', linewidth=1, alpha=0.6)
|
|
986
|
+
ax.plot(pos, pdf_val, 'o', color='grey', markersize=6, alpha=0.7)
|
|
987
|
+
|
|
988
|
+
# Add Z0 line if different from global maximum
|
|
989
|
+
if self.z0 is not None and self.global_extremum_idx is not None:
|
|
990
|
+
global_maximum_pos = data_points[self.global_extremum_idx]
|
|
991
|
+
if abs(self.z0 - global_maximum_pos) > 0.001:
|
|
992
|
+
ax.axvline(self.z0, color='cyan', linestyle=':', linewidth=2, alpha=0.8,
|
|
993
|
+
label=f'Original Z0={self.z0:.3f}')
|
|
994
|
+
|
|
995
|
+
# Add homogeneity status text
|
|
996
|
+
status_text = "Homogeneous" if self.is_homogeneous else "Not Homogeneous"
|
|
997
|
+
status_color = 'green' if self.is_homogeneous else 'red'
|
|
998
|
+
|
|
999
|
+
ax.text(0.02, 0.98, status_text, transform=ax.transAxes,
|
|
1000
|
+
fontsize=12, fontweight='bold', color=status_color,
|
|
1001
|
+
verticalalignment='top',
|
|
1002
|
+
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor=status_color))
|
|
1003
|
+
|
|
1004
|
+
# Add analysis info
|
|
1005
|
+
info_text = f"Type: EGDF\n"
|
|
1006
|
+
info_text += f"Maxima: {len(self.picks)}\n"
|
|
1007
|
+
|
|
1008
|
+
if hasattr(self, 'params') and 'has_negative_pdf' in self.params:
|
|
1009
|
+
info_text += f"Negative PDF: {'Yes' if self.params['has_negative_pdf'] else 'No'}"
|
|
1010
|
+
|
|
1011
|
+
ax.text(0.02, 0.02, info_text, transform=ax.transAxes,
|
|
1012
|
+
fontsize=10, verticalalignment='bottom',
|
|
1013
|
+
bbox=dict(boxstyle='round', facecolor='lightgrey', alpha=0.7))
|
|
1014
|
+
|
|
1015
|
+
ax.set_xlabel('Data Points')
|
|
1016
|
+
ax.set_ylabel('PDF Values')
|
|
1017
|
+
|
|
1018
|
+
if title is None:
|
|
1019
|
+
homogeneous_str = "Homogeneous" if self.is_homogeneous else "Non-Homogeneous"
|
|
1020
|
+
title = f"EGDF {homogeneous_str} Data Analysis"
|
|
1021
|
+
ax.set_title(title)
|
|
1022
|
+
|
|
1023
|
+
ax.legend()
|
|
1024
|
+
ax.grid(True, alpha=0.3)
|
|
1025
|
+
|
|
1026
|
+
plt.tight_layout()
|
|
1027
|
+
plt.show()
|
|
1028
|
+
|
|
1029
|
+
except Exception as e:
|
|
1030
|
+
error_msg = f"Error creating plot: {str(e)}"
|
|
1031
|
+
self._append_error(error_msg, type(e).__name__)
|
|
1032
|
+
raise
|
|
1033
|
+
|
|
1034
|
+
def results(self) -> Dict[str, Any]:
|
|
1035
|
+
"""
|
|
1036
|
+
Retrieve comprehensive homogeneity analysis results and metadata.
|
|
1037
|
+
|
|
1038
|
+
This method provides access to all analysis results, parameters, and diagnostic
|
|
1039
|
+
information generated during the homogeneity assessment. It returns a complete
|
|
1040
|
+
dictionary containing quantitative results, detected maxima details, analysis
|
|
1041
|
+
parameters, original EGDF object information, and any errors or warnings
|
|
1042
|
+
encountered during processing.
|
|
1043
|
+
|
|
1044
|
+
**Result Categories:**
|
|
1045
|
+
|
|
1046
|
+
1. **Primary Results**: Core homogeneity findings (is_homogeneous, maxima count)
|
|
1047
|
+
2. **Maxima Details**: Complete information about detected peaks
|
|
1048
|
+
3. **Analysis Parameters**: Configuration settings used during analysis
|
|
1049
|
+
4. **EGDF Parameters**: Original parameters from the input EGDF object
|
|
1050
|
+
5. **Diagnostic Data**: Errors, warnings, and processing metadata
|
|
1051
|
+
6. **Quality Metrics**: PDF characteristics and numerical indicators
|
|
1052
|
+
|
|
1053
|
+
The returned dictionary maintains referential integrity and provides
|
|
1054
|
+
comprehensive traceability for analysis reproducibility and debugging.
|
|
1055
|
+
|
|
1056
|
+
Returns
|
|
1057
|
+
-------
|
|
1058
|
+
dict
|
|
1059
|
+
Comprehensive results dictionary with the following structure:
|
|
1060
|
+
|
|
1061
|
+
**Core Analysis Results:**
|
|
1062
|
+
- 'gdf_type' (str): Always 'egdf' for this class
|
|
1063
|
+
- 'is_homogeneous' (bool): Primary homogeneity determination
|
|
1064
|
+
- 'z0' (float): Global optimum value (Z0) from EGDF or detected maximum
|
|
1065
|
+
- 'global_extremum_idx' (int): Array index of global maximum
|
|
1066
|
+
- 'homogeneity_fitted' (bool): Confirmation flag for completed analysis
|
|
1067
|
+
|
|
1068
|
+
**Maxima Information:**
|
|
1069
|
+
- 'picks' (List[Dict]): Detected maxima with detailed properties:
|
|
1070
|
+
- 'index' (int): Array position of maximum
|
|
1071
|
+
- 'position' (float): Data value at maximum location
|
|
1072
|
+
- 'pdf_value' (float): Original PDF value at maximum
|
|
1073
|
+
- 'smoothed_pdf_value' (float): Smoothed PDF value at maximum
|
|
1074
|
+
- 'is_global' (bool): Flag indicating global maximum
|
|
1075
|
+
|
|
1076
|
+
**PDF Characteristics:**
|
|
1077
|
+
- 'has_negative_pdf' (bool): Whether PDF contains negative values
|
|
1078
|
+
- 'num_maxima' (int): Count of detected maxima
|
|
1079
|
+
- 'extrema_type' (str): Always 'maxima' for EGDF
|
|
1080
|
+
|
|
1081
|
+
**Analysis Configuration:**
|
|
1082
|
+
- 'analysis_parameters' (Dict): Settings used during analysis:
|
|
1083
|
+
- 'smoothing_sigma' (float): Gaussian smoothing parameter
|
|
1084
|
+
- 'min_height_ratio' (float): Minimum height threshold for detection
|
|
1085
|
+
- 'min_distance' (int): Minimum separation between maxima
|
|
1086
|
+
- 'flush' (bool): Memory management setting
|
|
1087
|
+
|
|
1088
|
+
**Original EGDF Data:**
|
|
1089
|
+
- 'gdf_parameters' (Dict): Complete parameter set from input EGDF object
|
|
1090
|
+
including S, S_opt, z0, data arrays, and fitted results
|
|
1091
|
+
|
|
1092
|
+
**Diagnostics (if present):**
|
|
1093
|
+
- 'errors' (List[Dict]): Analysis errors with method and type information
|
|
1094
|
+
- 'warnings' (List[Dict]): Analysis warnings and advisory messages
|
|
1095
|
+
|
|
1096
|
+
Raises
|
|
1097
|
+
------
|
|
1098
|
+
RuntimeError
|
|
1099
|
+
If called before fit() method execution:
|
|
1100
|
+
- "No analysis results available. Call fit() method first."
|
|
1101
|
+
- Analysis state is incomplete or inconsistent
|
|
1102
|
+
|
|
1103
|
+
RuntimeError
|
|
1104
|
+
If results storage is disabled:
|
|
1105
|
+
- "No results stored. Ensure catch=True during initialization."
|
|
1106
|
+
- catch=False prevents result storage for memory conservation
|
|
1107
|
+
|
|
1108
|
+
Examples
|
|
1109
|
+
--------
|
|
1110
|
+
**Basic Result Access:**
|
|
1111
|
+
|
|
1112
|
+
>>> # After running analysis
|
|
1113
|
+
>>> homogeneity = DataHomogeneity(egdf_object)
|
|
1114
|
+
>>> homogeneity.fit()
|
|
1115
|
+
>>> results = homogeneity.results()
|
|
1116
|
+
>>> print(f"Homogeneous: {results['is_homogeneous']}")
|
|
1117
|
+
>>> print(f"Maxima detected: {len(results['picks'])}")
|
|
1118
|
+
|
|
1119
|
+
**Detailed Maxima Analysis:**
|
|
1120
|
+
|
|
1121
|
+
>>> results = homogeneity.results()
|
|
1122
|
+
>>> for i, maximum in enumerate(results['picks']):
|
|
1123
|
+
... status = "Global" if maximum['is_global'] else "Local"
|
|
1124
|
+
... print(f"{status} maximum {i+1}:")
|
|
1125
|
+
... print(f" Position: {maximum['position']:.4f}")
|
|
1126
|
+
... print(f" PDF value: {maximum['pdf_value']:.4f}")
|
|
1127
|
+
... print(f" Smoothed PDF: {maximum['smoothed_pdf_value']:.4f}")
|
|
1128
|
+
|
|
1129
|
+
**Error and Warning Inspection:**
|
|
1130
|
+
|
|
1131
|
+
>>> results = homogeneity.results()
|
|
1132
|
+
>>> if 'errors' in results:
|
|
1133
|
+
... print("Analysis encountered errors:")
|
|
1134
|
+
... for error in results['errors']:
|
|
1135
|
+
... print(f" {error['method']}: {error['error']}")
|
|
1136
|
+
>>>
|
|
1137
|
+
>>> if 'warnings' in results:
|
|
1138
|
+
... print("Analysis warnings:")
|
|
1139
|
+
... for warning in results['warnings']:
|
|
1140
|
+
... print(f" {warning['method']}: {warning['warning']}")
|
|
1141
|
+
|
|
1142
|
+
**Parameter Traceability:**
|
|
1143
|
+
|
|
1144
|
+
>>> results = homogeneity.results()
|
|
1145
|
+
>>> analysis_config = results['analysis_parameters']
|
|
1146
|
+
>>> print("Analysis was performed with:")
|
|
1147
|
+
>>> print(f" Smoothing: {analysis_config['smoothing_sigma']}")
|
|
1148
|
+
>>> print(f" Min height ratio: {analysis_config['min_height_ratio']}")
|
|
1149
|
+
>>> print(f" Min distance: {analysis_config['min_distance']}")
|
|
1150
|
+
|
|
1151
|
+
Notes
|
|
1152
|
+
-----
|
|
1153
|
+
**Data Integrity:**
|
|
1154
|
+
|
|
1155
|
+
The returned dictionary is a deep copy of internal results, ensuring:
|
|
1156
|
+
- Modifications to returned data don't affect internal state
|
|
1157
|
+
- Thread-safe access to results
|
|
1158
|
+
- Consistent data even if original EGDF object changes
|
|
1159
|
+
|
|
1160
|
+
**Memory Considerations:**
|
|
1161
|
+
|
|
1162
|
+
- Results dictionary may contain large arrays (PDF points, data points)
|
|
1163
|
+
- Use flush=True during initialization to reduce memory footprint
|
|
1164
|
+
- Consider extracting only needed fields for memory-constrained environments
|
|
1165
|
+
|
|
1166
|
+
**Version Compatibility:**
|
|
1167
|
+
|
|
1168
|
+
The results structure is designed for forward/backward compatibility:
|
|
1169
|
+
- New fields added with default values for missing data
|
|
1170
|
+
- Deprecated fields maintained for transition periods
|
|
1171
|
+
- Type consistency maintained across versions
|
|
1172
|
+
|
|
1173
|
+
**Performance Notes:**
|
|
1174
|
+
|
|
1175
|
+
- Dictionary creation involves copying large data structures
|
|
1176
|
+
- Access time is O(1) for individual fields
|
|
1177
|
+
- Memory usage scales with original data size and PDF resolution
|
|
1178
|
+
|
|
1179
|
+
**Integration Patterns:**
|
|
1180
|
+
|
|
1181
|
+
Common usage patterns for results integration:
|
|
1182
|
+
- Store results in databases using JSON serialization
|
|
1183
|
+
- Pass results to downstream analysis pipelines
|
|
1184
|
+
- Generate reports using template systems
|
|
1185
|
+
- Create batch analysis summaries and comparisons
|
|
1186
|
+
|
|
1187
|
+
**Validation and Quality Control:**
|
|
1188
|
+
|
|
1189
|
+
The results include comprehensive quality indicators:
|
|
1190
|
+
- Error counts and descriptions for debugging
|
|
1191
|
+
- Warning flags for borderline cases
|
|
1192
|
+
- Parameter consistency checks
|
|
1193
|
+
- Numerical stability indicators
|
|
1194
|
+
|
|
1195
|
+
See Also
|
|
1196
|
+
--------
|
|
1197
|
+
fit : Perform the analysis to generate results
|
|
1198
|
+
plot : Visualize the analysis results
|
|
1199
|
+
DataHomogeneity.__init__ : Configure result storage with catch parameter
|
|
1200
|
+
"""
|
|
1201
|
+
self.logger.info("Retrieving homogeneity analysis results")
|
|
1202
|
+
if not self._fitted:
|
|
1203
|
+
self.logger.error("No analysis results available. Call fit() method first.")
|
|
1204
|
+
raise RuntimeError("No analysis results available. Call fit() method first.")
|
|
1205
|
+
|
|
1206
|
+
if not self.params:
|
|
1207
|
+
self.logger.error("No results stored. Ensure catch=True during initialization.")
|
|
1208
|
+
raise RuntimeError("No results stored. Ensure catch=True during initialization.")
|
|
1209
|
+
|
|
1210
|
+
return self.params.copy()
|
|
1211
|
+
|
|
1212
|
+
@property
|
|
1213
|
+
def fitted(self):
|
|
1214
|
+
"""bool: True if the analysis has been completed, False otherwise."""
|
|
1215
|
+
return self._fitted
|
|
1216
|
+
|
|
1217
|
+
def __repr__(self):
|
|
1218
|
+
return f"DataHomogeneity(gdf_type='egdf', fitted={self._fitted}, is_homogeneous={self.is_homogeneous})"
|