machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1047 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Z0 Estimator - Universal class for estimating Z0 point for GDF distributions
|
|
3
|
+
|
|
4
|
+
Z0 is the point where:
|
|
5
|
+
- For EGDF/ELDF: PDF reaches its global maximum
|
|
6
|
+
- For QLDF/QGDF: Distribution function equals 0.5 (median/50th percentile)
|
|
7
|
+
|
|
8
|
+
Author: Nirmal Parmar
|
|
9
|
+
Machine Gnostics
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import logging
|
|
14
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
15
|
+
from typing import Union, Dict, Any, Optional
|
|
16
|
+
|
|
17
|
+
class Z0Estimator:
|
|
18
|
+
"""
|
|
19
|
+
Universal Z0 estimator for all GDF (Gnostics Distribution Function) types.
|
|
20
|
+
|
|
21
|
+
This class automatically detects the distribution type and finds the appropriate Z0 point:
|
|
22
|
+
- For EGDF/ELDF: Finds the point where PDF reaches its global maximum
|
|
23
|
+
- For QLDF/QGDF: Finds the point where the distribution function equals 0.5
|
|
24
|
+
|
|
25
|
+
The estimator uses multiple advanced methods including spline optimization, polynomial fitting,
|
|
26
|
+
refined interpolation, and parabolic interpolation to achieve high accuracy.
|
|
27
|
+
|
|
28
|
+
Key Features:
|
|
29
|
+
- Automatic distribution type detection (EGDF, ELDF, QLDF, QGDF)
|
|
30
|
+
- Multiple estimation strategies (simple discrete vs advanced optimization)
|
|
31
|
+
- Robust handling of flat regions and edge cases
|
|
32
|
+
- Comprehensive diagnostic information
|
|
33
|
+
- Built-in visualization capabilities
|
|
34
|
+
- Automatic Z0 assignment back to GDF object
|
|
35
|
+
- Estimate Z0 gnostic error properties (Residual Entropy, RRE)
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
gdf: The fitted GDF object (EGDF, ELDF, QLDF, or QGDF)
|
|
39
|
+
gdf_type (str): Detected distribution type ('egdf', 'eldf', 'qldf', 'qgdf')
|
|
40
|
+
optimize (bool): Whether to use advanced optimization methods
|
|
41
|
+
verbose (bool): Whether to print detailed progress information
|
|
42
|
+
find_median (bool): True for QLDF/QGDF (find 0.5 point), False for EGDF/ELDF (find PDF max)
|
|
43
|
+
z0 (float): Estimated Z0 value (None until fit() is called)
|
|
44
|
+
estimation_info (dict): Detailed information about the estimation process
|
|
45
|
+
|
|
46
|
+
Usage Patterns:
|
|
47
|
+
|
|
48
|
+
1. Basic Usage (E-distributions - finds PDF maximum):
|
|
49
|
+
```python
|
|
50
|
+
from machinegnostics.magcal import EGDF
|
|
51
|
+
from machinegnostics.magcal import Z0Estimator
|
|
52
|
+
|
|
53
|
+
# Fit your distribution
|
|
54
|
+
egdf = EGDF(data=your_data)
|
|
55
|
+
egdf.fit()
|
|
56
|
+
|
|
57
|
+
# Estimate Z0
|
|
58
|
+
estimator = Z0Estimator(egdf, verbose=True)
|
|
59
|
+
z0 = estimator.fit()
|
|
60
|
+
print(f"Z0 at PDF maximum: {z0}")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
2. Q-distributions Usage:
|
|
64
|
+
```python
|
|
65
|
+
from machinegnostics.magcal import QLDF
|
|
66
|
+
from machinegnostics.magcal import Z0Estimator
|
|
67
|
+
|
|
68
|
+
# Fit your Q-distribution
|
|
69
|
+
qldf = QLDF(data=your_data)
|
|
70
|
+
qldf.fit()
|
|
71
|
+
|
|
72
|
+
# Estimate Z0 at median (0.5)
|
|
73
|
+
estimator = Z0Estimator(qldf, optimize=True, verbose=True)
|
|
74
|
+
z0 = estimator.fit()
|
|
75
|
+
print(f"Z0 at median (0.5): {z0}")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
3. Simple vs Advanced Estimation:
|
|
79
|
+
```python
|
|
80
|
+
# Fast discrete estimation (good for quick analysis)
|
|
81
|
+
estimator_simple = Z0Estimator(gdf_object, optimize=False)
|
|
82
|
+
z0_simple = estimator_simple.fit()
|
|
83
|
+
|
|
84
|
+
# Advanced optimization (higher accuracy, slower)
|
|
85
|
+
estimator_advanced = Z0Estimator(gdf_object, optimize=True, verbose=True)
|
|
86
|
+
z0_advanced = estimator_advanced.fit()
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
4. Getting Detailed Information:
|
|
90
|
+
```python
|
|
91
|
+
# Get estimation details
|
|
92
|
+
info = estimator.get_estimation_info()
|
|
93
|
+
print(f"Method used: {info['z0_method']}")
|
|
94
|
+
print(f"Target type: {info['target_type']}")
|
|
95
|
+
print(f"Distribution type: {info['gdf_type']}")
|
|
96
|
+
|
|
97
|
+
# Check what the estimator is looking for
|
|
98
|
+
if estimator.find_median:
|
|
99
|
+
print("Looking for median (0.5 point)")
|
|
100
|
+
else:
|
|
101
|
+
print("Looking for PDF maximum")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
5. Visualization:
|
|
105
|
+
```python
|
|
106
|
+
# Create diagnostic plots
|
|
107
|
+
estimator.plot_z0_analysis()
|
|
108
|
+
# Shows PDF with Z0 point + distribution function/CDF
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Advanced Methods (when optimize=True):
|
|
112
|
+
|
|
113
|
+
For Q-distributions (median finding):
|
|
114
|
+
- Spline interpolation with root finding
|
|
115
|
+
- Linear interpolation between bracketing points
|
|
116
|
+
- Polynomial fitting with root solving
|
|
117
|
+
|
|
118
|
+
For E-distributions (PDF maximum finding):
|
|
119
|
+
- Spline optimization over entire domain
|
|
120
|
+
- Polynomial fitting with critical point analysis
|
|
121
|
+
- Refined interpolation with fine grid search
|
|
122
|
+
- Parabolic interpolation using three-point method
|
|
123
|
+
|
|
124
|
+
Error Handling:
|
|
125
|
+
- Validates GDF object is properly fitted
|
|
126
|
+
- Checks for required data based on distribution type
|
|
127
|
+
- Graceful fallback to discrete methods if advanced methods fail
|
|
128
|
+
- Clear error messages for common issues
|
|
129
|
+
|
|
130
|
+
Performance Notes:
|
|
131
|
+
- Simple mode (optimize=False): Very fast, good accuracy for most cases
|
|
132
|
+
- Advanced mode (optimize=True): Higher accuracy, ~2-10x slower depending on data size
|
|
133
|
+
- Memory usage scales linearly with number of evaluation points
|
|
134
|
+
- Recommended to use verbose=True for diagnostic purposes
|
|
135
|
+
|
|
136
|
+
Notes:
|
|
137
|
+
- The GDF object must be fitted before passing to Z0Estimator
|
|
138
|
+
- For Q-distributions: finds where distribution function = 0.5 (median/50th percentile)
|
|
139
|
+
- For E-distributions: finds where PDF reaches its global maximum
|
|
140
|
+
- Advanced methods are tried in order of sophistication and reliability
|
|
141
|
+
- The estimated Z0 is automatically assigned back to the GDF object
|
|
142
|
+
- All methods handle flat regions by finding the middle point
|
|
143
|
+
- Works with any GDF subclass that follows the standard interface
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
|
|
147
|
+
Complete workflow example:
|
|
148
|
+
```python
|
|
149
|
+
import numpy as np
|
|
150
|
+
from machinegnostics.magcal import EGDF
|
|
151
|
+
from machinegnostics.magcal import Z0Estimator
|
|
152
|
+
|
|
153
|
+
# Generate some sample data
|
|
154
|
+
data = np.random.normal(0, 1, 1000)
|
|
155
|
+
|
|
156
|
+
# Fit EGDF
|
|
157
|
+
egdf = EGDF(data=data)
|
|
158
|
+
egdf.fit()
|
|
159
|
+
|
|
160
|
+
# Estimate Z0 with detailed output
|
|
161
|
+
estimator = Z0Estimator(egdf, optimize=True, verbose=True)
|
|
162
|
+
z0 = estimator.fit()
|
|
163
|
+
|
|
164
|
+
# Check results
|
|
165
|
+
print(f"\\nEstimated Z0: {z0:.6f}")
|
|
166
|
+
print(f"Original GDF Z0: {egdf.z0:.6f}") # Automatically updated
|
|
167
|
+
|
|
168
|
+
# Get detailed info
|
|
169
|
+
info = estimator.get_estimation_info()
|
|
170
|
+
print(f"Method used: {info['z0_method']}")
|
|
171
|
+
|
|
172
|
+
# Visualize results
|
|
173
|
+
estimator.plot_z0_analysis()
|
|
174
|
+
```
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(self,
|
|
178
|
+
gdf_object: object,
|
|
179
|
+
optimize: bool = True,
|
|
180
|
+
verbose: bool = False):
|
|
181
|
+
"""
|
|
182
|
+
Initialize the Z0 estimator.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
gdf_object: A fitted GDF object (EGDF, ELDF, QLDF, or QGDF)
|
|
186
|
+
Must have been fitted (gdf_object.fit() called) before passing here.
|
|
187
|
+
optimize (bool, optional): Whether to use advanced optimization methods.
|
|
188
|
+
If True, uses spline optimization, polynomial fitting, etc.
|
|
189
|
+
If False, uses simple discrete search.
|
|
190
|
+
Defaults to True.
|
|
191
|
+
verbose (bool, optional): Whether to print detailed progress information
|
|
192
|
+
during the estimation process. Defaults to False.
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
ValueError: If gdf_object is not fitted or doesn't contain required data
|
|
196
|
+
|
|
197
|
+
Examples:
|
|
198
|
+
>>> # With advanced optimization (recommended)
|
|
199
|
+
>>> estimator = Z0Estimator(fitted_gdf, optimize=True, verbose=True)
|
|
200
|
+
|
|
201
|
+
>>> # Simple discrete estimation (faster)
|
|
202
|
+
>>> estimator = Z0Estimator(fitted_gdf, optimize=False)
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
self._validate_gdf_object(gdf_object)
|
|
206
|
+
|
|
207
|
+
self.gdf = gdf_object
|
|
208
|
+
self.gdf_type = self._detect_gdf_type()
|
|
209
|
+
self.optimize = optimize
|
|
210
|
+
self.verbose = verbose
|
|
211
|
+
|
|
212
|
+
# Determine what we're looking for
|
|
213
|
+
self.find_median = self.gdf_type.lower() in ['qldf', 'qgdf']
|
|
214
|
+
|
|
215
|
+
# Results storage
|
|
216
|
+
self.z0 = None
|
|
217
|
+
self.estimation_info = {}
|
|
218
|
+
|
|
219
|
+
# logger
|
|
220
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
221
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
222
|
+
|
|
223
|
+
def _compute_error_properties_for_mean(self, z0):
|
|
224
|
+
"""
|
|
225
|
+
Compute error properties at the given Z0 point.
|
|
226
|
+
"""
|
|
227
|
+
self.logger.info("Computing error properties at Z0.")
|
|
228
|
+
# estimate q and q1
|
|
229
|
+
gc, q, q1 = self.gdf._calculate_gcq_at_given_zi(z0)
|
|
230
|
+
|
|
231
|
+
# fi and fj
|
|
232
|
+
fi_z0 = gc._fi(q, q1) # GME Gnostic Mean Estimating
|
|
233
|
+
fj_z0 = gc._fj(q, q1) # GMQ Gnostic Mean Quantifying
|
|
234
|
+
|
|
235
|
+
# entropy
|
|
236
|
+
i_e = np.mean(gc._ientropy(fj_z0))
|
|
237
|
+
j_e = np.mean(gc._jentropy(fj_z0))
|
|
238
|
+
self.residual_entropy = np.mean(gc._rentropy(i_e, j_e))
|
|
239
|
+
|
|
240
|
+
# RRE Relative Residual Entropy
|
|
241
|
+
self.RRE = np.mean((fj_z0 - fi_z0) / (fj_z0 + fi_z0))
|
|
242
|
+
|
|
243
|
+
# store to given gdf params
|
|
244
|
+
if hasattr(self.gdf, 'params'):
|
|
245
|
+
self.gdf.params['residual_entropy'] = float(self.residual_entropy)
|
|
246
|
+
self.gdf.params['RRE'] = float(self.RRE)
|
|
247
|
+
self.logger.info(f"Computed Residual Entropy: {self.residual_entropy}, RRE: {self.RRE}")
|
|
248
|
+
|
|
249
|
+
def fit(self) -> float:
|
|
250
|
+
"""
|
|
251
|
+
Estimate the Z0 point.
|
|
252
|
+
|
|
253
|
+
For EGDF/ELDF distributions, finds the point where PDF reaches its global maximum.
|
|
254
|
+
For QLDF/QGDF distributions, finds the point where the distribution function equals 0.5.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
float: The estimated Z0 value
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
ValueError: If required data is not available for estimation
|
|
261
|
+
|
|
262
|
+
Examples:
|
|
263
|
+
>>> z0 = estimator.fit()
|
|
264
|
+
>>> print(f"Estimated Z0: {z0:.6f}")
|
|
265
|
+
|
|
266
|
+
>>> # The Z0 is automatically assigned to the GDF object
|
|
267
|
+
>>> print(f"GDF Z0: {estimator.gdf.z0:.6f}")
|
|
268
|
+
|
|
269
|
+
Notes:
|
|
270
|
+
- For Q-distributions: finds closest point to 0.5 in distribution function
|
|
271
|
+
- For E-distributions: finds PDF maximum (existing logic)
|
|
272
|
+
- Advanced methods adapt to the target type automatically
|
|
273
|
+
- The estimated Z0 is automatically assigned to the original GDF object
|
|
274
|
+
"""
|
|
275
|
+
# Add the safe Z0 estimating trick here
|
|
276
|
+
if np.all(self.gdf.data == self.gdf.data[0]):
|
|
277
|
+
self.logger.info("All data values are the same. Returning the mean value as Z0.")
|
|
278
|
+
self.z0 = np.mean(self.gdf.data)
|
|
279
|
+
self.gdf.z0 = self.z0 # Assign Z0 back to the GDF object
|
|
280
|
+
return self.z0
|
|
281
|
+
|
|
282
|
+
self.logger.info("Fitting Z0 estimator.")
|
|
283
|
+
if self.find_median:
|
|
284
|
+
self.logger.info(f"Finding Z0 where {self.gdf_type.upper()} = 0.5 (median)")
|
|
285
|
+
self.z0 = self._fit_median()
|
|
286
|
+
else:
|
|
287
|
+
self.logger.info(f"Finding Z0 where {self.gdf_type.upper()} PDF reaches maximum")
|
|
288
|
+
self.z0 = self._fit_pdf_maximum()
|
|
289
|
+
|
|
290
|
+
# error in mean
|
|
291
|
+
self.logger.info("Computing error properties at estimated Z0.")
|
|
292
|
+
self._compute_error_properties_for_mean(self.z0)
|
|
293
|
+
self.logger.info(f"Estimated Z0.")
|
|
294
|
+
return self.z0
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _fit_median(self) -> float:
|
|
298
|
+
"""Find Z0 where Q-distribution equals 0.5 (median)."""
|
|
299
|
+
self.logger.info(f"Finding Z0 where {self.gdf_type.upper()} = 0.5 (median)")
|
|
300
|
+
|
|
301
|
+
# Get distribution function points and data points
|
|
302
|
+
dist_points = self._get_distribution_points()
|
|
303
|
+
di_points = self._get_di_points()
|
|
304
|
+
|
|
305
|
+
if len(dist_points) == 0:
|
|
306
|
+
self.logger.error("No distribution function data available for Z0 estimation")
|
|
307
|
+
raise ValueError("No distribution function data available for Z0 estimation")
|
|
308
|
+
|
|
309
|
+
# Find the point closest to 0.5
|
|
310
|
+
target_value = 0.5
|
|
311
|
+
diff_from_target = np.abs(dist_points - target_value)
|
|
312
|
+
closest_idx = np.argmin(diff_from_target)
|
|
313
|
+
closest_value = dist_points[closest_idx]
|
|
314
|
+
closest_location = di_points[closest_idx]
|
|
315
|
+
|
|
316
|
+
self.logger.info(f"Discrete closest to 0.5: {self.gdf_type.upper()}={closest_value:.6f} at x={closest_location:.6f} (index {closest_idx})")
|
|
317
|
+
|
|
318
|
+
if self.optimize:
|
|
319
|
+
self.z0 = self._find_z0_advanced_median(closest_idx, di_points, dist_points, target_value)
|
|
320
|
+
|
|
321
|
+
method_used = self._get_last_method_used()
|
|
322
|
+
self.logger.info(f"Advanced estimation complete. Method: {method_used}")
|
|
323
|
+
else:
|
|
324
|
+
self.z0 = closest_location
|
|
325
|
+
|
|
326
|
+
# Store simple estimation info
|
|
327
|
+
self.estimation_info = {
|
|
328
|
+
'z0': self.z0,
|
|
329
|
+
'z0_method': 'discrete_closest_to_median',
|
|
330
|
+
'z0_target_value': target_value,
|
|
331
|
+
'z0_actual_value': closest_value,
|
|
332
|
+
'z0_target_index': closest_idx,
|
|
333
|
+
'gdf_type': self.gdf_type,
|
|
334
|
+
'target_type': 'median (0.5)'
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
self.logger.info(f"Simple estimation: Using discrete closest to 0.5")
|
|
338
|
+
|
|
339
|
+
# Update GDF object with Z0
|
|
340
|
+
self.gdf.z0 = self.z0
|
|
341
|
+
if hasattr(self.gdf, 'catch') and self.gdf.catch and hasattr(self.gdf, 'params'):
|
|
342
|
+
self.gdf.params['z0'] = float(self.z0)
|
|
343
|
+
|
|
344
|
+
return self.z0
|
|
345
|
+
|
|
346
|
+
def _fit_pdf_maximum(self) -> float:
|
|
347
|
+
"""Find Z0 where PDF reaches maximum (existing logic for EGDF/ELDF)."""
|
|
348
|
+
self.logger.info(f"Finding Z0 where {self.gdf_type.upper()} PDF reaches maximum")
|
|
349
|
+
|
|
350
|
+
# Get PDF and data points
|
|
351
|
+
pdf_points = self._get_pdf_points()
|
|
352
|
+
di_points = self._get_di_points()
|
|
353
|
+
|
|
354
|
+
if len(pdf_points) == 0:
|
|
355
|
+
self.logger.error("No PDF data available for Z0 estimation")
|
|
356
|
+
raise ValueError("No PDF data available for Z0 estimation")
|
|
357
|
+
|
|
358
|
+
# Find the global maximum in the discrete data
|
|
359
|
+
global_max_idx = np.argmax(pdf_points)
|
|
360
|
+
# Handle flat top case - find middle of maximum region
|
|
361
|
+
global_max_idx = self._find_middle_of_flat_region(pdf_points, global_max_idx, find_min=False)
|
|
362
|
+
|
|
363
|
+
global_max_value = pdf_points[global_max_idx]
|
|
364
|
+
global_max_location = di_points[global_max_idx]
|
|
365
|
+
|
|
366
|
+
self.logger.info(f"Discrete global maximum: PDF={global_max_value:.6f} at x={global_max_location:.6f} (index {global_max_idx})")
|
|
367
|
+
|
|
368
|
+
if self.optimize:
|
|
369
|
+
z0_candidate = self._find_z0_advanced_pdf_max(global_max_idx, di_points, pdf_points)
|
|
370
|
+
# Check if advanced method is close to discrete maximum
|
|
371
|
+
if abs(z0_candidate - global_max_location) > 1e-6:
|
|
372
|
+
self.logger.info(f"Advanced method z0 ({z0_candidate}) differs from discrete max ({global_max_location}), using discrete max.")
|
|
373
|
+
self.z0 = global_max_location
|
|
374
|
+
self.estimation_info['z0_method'] = 'discrete_pdf_maximum'
|
|
375
|
+
else:
|
|
376
|
+
self.z0 = z0_candidate
|
|
377
|
+
|
|
378
|
+
# Store simple estimation info
|
|
379
|
+
self.estimation_info = {
|
|
380
|
+
'z0': self.z0,
|
|
381
|
+
'z0_method': 'discrete_pdf_maximum',
|
|
382
|
+
'z0_extremum_pdf_value': global_max_value,
|
|
383
|
+
'z0_extremum_pdf_index': global_max_idx,
|
|
384
|
+
'gdf_type': self.gdf_type,
|
|
385
|
+
'target_type': 'pdf_maximum'
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
self.logger.info(f"Simple estimation: Using discrete PDF maximum at Z0")
|
|
389
|
+
|
|
390
|
+
# Update GDF object with Z0
|
|
391
|
+
self.gdf.z0 = self.z0
|
|
392
|
+
if hasattr(self.gdf, 'catch') and self.gdf.catch and hasattr(self.gdf, 'params'):
|
|
393
|
+
self.gdf.params['z0'] = float(self.z0)
|
|
394
|
+
|
|
395
|
+
return self.z0
|
|
396
|
+
|
|
397
|
+
def get_estimation_info(self) -> Dict[str, Any]:
|
|
398
|
+
"""
|
|
399
|
+
Get detailed information about the Z0 estimation process.
|
|
400
|
+
|
|
401
|
+
Returns comprehensive information about how the Z0 value was estimated,
|
|
402
|
+
including the method used, target type, and various diagnostic values.
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Dict[str, Any]: Dictionary containing estimation details:
|
|
406
|
+
- z0 (float): The estimated Z0 value
|
|
407
|
+
- z0_method (str): Method used for estimation
|
|
408
|
+
- gdf_type (str): Type of distribution ('egdf', 'eldf', 'qldf', 'qgdf')
|
|
409
|
+
- target_type (str): What was being optimized ('median (0.5)' or 'pdf_maximum')
|
|
410
|
+
- Additional fields depending on the target type
|
|
411
|
+
|
|
412
|
+
Examples:
|
|
413
|
+
>>> estimator.fit()
|
|
414
|
+
>>> info = estimator.get_estimation_info()
|
|
415
|
+
>>> print(f"Z0: {info['z0']:.6f}")
|
|
416
|
+
>>> print(f"Method: {info['z0_method']}")
|
|
417
|
+
>>> print(f"Target: {info['target_type']}")
|
|
418
|
+
"""
|
|
419
|
+
self.logger.info("Retrieving estimation information.")
|
|
420
|
+
if not self.estimation_info:
|
|
421
|
+
return {"error": "No estimation performed yet. Call fit() first."}
|
|
422
|
+
return self.estimation_info.copy()
|
|
423
|
+
|
|
424
|
+
def plot_z0_analysis(self, figsize: tuple = (12, 6)) -> None:
|
|
425
|
+
"""
|
|
426
|
+
Create visualization plots showing the Z0 estimation results.
|
|
427
|
+
|
|
428
|
+
Generates a two-panel plot showing:
|
|
429
|
+
1. PDF curve with the estimated Z0 point marked
|
|
430
|
+
2. Distribution function curve (for Q-distributions) or CDF (for E-distributions)
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
figsize (tuple, optional): Figure size as (width, height) in inches.
|
|
434
|
+
Defaults to (12, 6).
|
|
435
|
+
"""
|
|
436
|
+
self.logger.info("Creating Z0 analysis plots.")
|
|
437
|
+
try:
|
|
438
|
+
import matplotlib.pyplot as plt
|
|
439
|
+
except ImportError:
|
|
440
|
+
self.logger.error("Matplotlib not available. Cannot create plots.")
|
|
441
|
+
return
|
|
442
|
+
|
|
443
|
+
if self.z0 is None:
|
|
444
|
+
self.logger.error("No Z0 estimation available. Call fit() first.")
|
|
445
|
+
return
|
|
446
|
+
|
|
447
|
+
pdf_points = self._get_pdf_points()
|
|
448
|
+
di_points = self._get_di_points()
|
|
449
|
+
|
|
450
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
|
|
451
|
+
|
|
452
|
+
# Plot 1: PDF with Z0 point
|
|
453
|
+
ax1.plot(di_points, pdf_points, 'b-', linewidth=2, label='PDF')
|
|
454
|
+
target_desc = "Median (0.5)" if self.find_median else "PDF Maximum"
|
|
455
|
+
ax1.axvline(self.z0, color='red', linestyle='--', linewidth=2,
|
|
456
|
+
label=f'Z0 ({target_desc}): {self.z0:.4f}')
|
|
457
|
+
ax1.scatter([self.z0], [np.interp(self.z0, di_points, pdf_points)],
|
|
458
|
+
color='red', s=100, zorder=5)
|
|
459
|
+
ax1.set_xlabel('Value')
|
|
460
|
+
ax1.set_ylabel('PDF')
|
|
461
|
+
ax1.set_title(f'{self.gdf_type.upper()} PDF with Z0 Point')
|
|
462
|
+
ax1.legend()
|
|
463
|
+
ax1.grid(True, alpha=0.3)
|
|
464
|
+
|
|
465
|
+
# Plot 2: Distribution function or CDF
|
|
466
|
+
if self.find_median:
|
|
467
|
+
# Plot Q-distribution function
|
|
468
|
+
dist_points = self._get_distribution_points()
|
|
469
|
+
if len(dist_points) > 0:
|
|
470
|
+
ax2.plot(di_points, dist_points, 'g-', linewidth=2, label=f'{self.gdf_type.upper()}')
|
|
471
|
+
ax2.axhline(0.5, color='orange', linestyle=':', linewidth=2, label='Target (0.5)')
|
|
472
|
+
ax2.axvline(self.z0, color='red', linestyle='--', linewidth=2,
|
|
473
|
+
label=f'Z0: {self.z0:.4f}')
|
|
474
|
+
ax2.scatter([self.z0], [0.5], color='red', s=100, zorder=5)
|
|
475
|
+
ax2.set_xlabel('Value')
|
|
476
|
+
ax2.set_ylabel(f'{self.gdf_type.upper()}')
|
|
477
|
+
ax2.set_title(f'{self.gdf_type.upper()} with Z0 at Median')
|
|
478
|
+
ax2.legend()
|
|
479
|
+
ax2.grid(True, alpha=0.3)
|
|
480
|
+
else:
|
|
481
|
+
self._plot_info_panel(ax2)
|
|
482
|
+
else:
|
|
483
|
+
# Plot CDF if available for E-distributions
|
|
484
|
+
if hasattr(self.gdf, 'cdf_points') and self.gdf.cdf_points is not None:
|
|
485
|
+
ax2.plot(di_points, self.gdf.cdf_points, 'g-', linewidth=2, label='CDF')
|
|
486
|
+
ax2.axvline(self.z0, color='red', linestyle='--', linewidth=2,
|
|
487
|
+
label=f'Z0: {self.z0:.4f}')
|
|
488
|
+
ax2.set_xlabel('Value')
|
|
489
|
+
ax2.set_ylabel('CDF')
|
|
490
|
+
ax2.set_title(f'{self.gdf_type.upper()} CDF with Z0 Point')
|
|
491
|
+
ax2.legend()
|
|
492
|
+
ax2.grid(True, alpha=0.3)
|
|
493
|
+
else:
|
|
494
|
+
self._plot_info_panel(ax2)
|
|
495
|
+
|
|
496
|
+
plt.tight_layout()
|
|
497
|
+
plt.show()
|
|
498
|
+
|
|
499
|
+
def _plot_info_panel(self, ax):
|
|
500
|
+
"""Plot estimation information panel."""
|
|
501
|
+
self.logger.info("Creating Z0 estimation information panel.")
|
|
502
|
+
target_desc = "Median (0.5)" if self.find_median else "PDF Maximum"
|
|
503
|
+
info_text = f"Z0 Estimation Info:\n"
|
|
504
|
+
info_text += f"Value: {self.z0:.6f}\n"
|
|
505
|
+
info_text += f"Method: {self.estimation_info.get('z0_method', 'unknown')}\n"
|
|
506
|
+
info_text += f"Target: {target_desc}\n"
|
|
507
|
+
info_text += f"Distribution: {self.gdf_type.upper()}"
|
|
508
|
+
ax.text(0.1, 0.5, info_text, transform=ax.transAxes, fontsize=12,
|
|
509
|
+
verticalalignment='center', bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))
|
|
510
|
+
ax.set_xlim([0, 1])
|
|
511
|
+
ax.set_ylim([0, 1])
|
|
512
|
+
ax.set_title('Z0 Estimation Information')
|
|
513
|
+
|
|
514
|
+
def _find_middle_of_flat_region(self, values, extremum_idx, find_min=True):
|
|
515
|
+
"""Find the middle point of a flat region (for PDF maximum finding)."""
|
|
516
|
+
self.logger.info("Checking for flat region around extremum index.")
|
|
517
|
+
|
|
518
|
+
n_points = len(values)
|
|
519
|
+
extremum_value = values[extremum_idx]
|
|
520
|
+
|
|
521
|
+
# Define tolerance for "flatness"
|
|
522
|
+
tolerance = np.std(values) * 0.01 # 1% of standard deviation
|
|
523
|
+
tolerance = max(tolerance, 1e-10) # Minimum tolerance
|
|
524
|
+
|
|
525
|
+
# Find the range of indices with similar values
|
|
526
|
+
similar_mask = np.abs(values - extremum_value) <= tolerance
|
|
527
|
+
similar_indices = np.where(similar_mask)[0]
|
|
528
|
+
|
|
529
|
+
if len(similar_indices) > 1:
|
|
530
|
+
# Find continuous regions
|
|
531
|
+
diff_indices = np.diff(similar_indices)
|
|
532
|
+
break_points = np.where(diff_indices > 1)[0]
|
|
533
|
+
|
|
534
|
+
if len(break_points) == 0:
|
|
535
|
+
# Single continuous region
|
|
536
|
+
middle_idx = similar_indices[len(similar_indices) // 2]
|
|
537
|
+
if self.verbose:
|
|
538
|
+
region_type = "minimum" if find_min else "maximum"
|
|
539
|
+
self.logger.info(f"Flat {region_type} region detected. Using middle point at index {middle_idx}")
|
|
540
|
+
return middle_idx
|
|
541
|
+
else:
|
|
542
|
+
# Multiple regions - find the one containing original extremum_idx
|
|
543
|
+
start_idx = 0
|
|
544
|
+
for break_point in break_points:
|
|
545
|
+
region_indices = similar_indices[start_idx:break_point + 1]
|
|
546
|
+
if extremum_idx in region_indices:
|
|
547
|
+
middle_idx = region_indices[len(region_indices) // 2]
|
|
548
|
+
return middle_idx
|
|
549
|
+
start_idx = break_point + 1
|
|
550
|
+
|
|
551
|
+
# Check last region
|
|
552
|
+
region_indices = similar_indices[start_idx:]
|
|
553
|
+
if extremum_idx in region_indices:
|
|
554
|
+
middle_idx = region_indices[len(region_indices) // 2]
|
|
555
|
+
return middle_idx
|
|
556
|
+
|
|
557
|
+
# If no flat region found or single point, return original index
|
|
558
|
+
return extremum_idx
|
|
559
|
+
|
|
560
|
+
def _validate_gdf_object(self, gdf_object):
|
|
561
|
+
if not hasattr(gdf_object, '_fitted'):
|
|
562
|
+
self.logger.error("GDF object must have '_fitted' attribute")
|
|
563
|
+
raise ValueError("GDF object must have '_fitted' attribute")
|
|
564
|
+
|
|
565
|
+
if not gdf_object._fitted:
|
|
566
|
+
self.logger.error("GDF object must be fitted before Z0 estimation")
|
|
567
|
+
raise ValueError("GDF object must be fitted before Z0 estimation")
|
|
568
|
+
|
|
569
|
+
# Check for required data based on distribution type
|
|
570
|
+
temp_gdf_type = self._detect_gdf_type_from_object(gdf_object)
|
|
571
|
+
|
|
572
|
+
if temp_gdf_type.lower() in ['qldf', 'qgdf']:
|
|
573
|
+
# For Q-distributions, need distribution function data
|
|
574
|
+
has_dist_data = (hasattr(gdf_object, 'cdf_points') and gdf_object.cdf_points is not None) or \
|
|
575
|
+
(hasattr(gdf_object, 'qgdf_points') and gdf_object.qgdf_points is not None) or \
|
|
576
|
+
(hasattr(gdf_object, 'qldf_points') and gdf_object.qldf_points is not None)
|
|
577
|
+
if not has_dist_data:
|
|
578
|
+
self.logger.error("Q-distribution object must contain distribution function data")
|
|
579
|
+
raise ValueError("Q-distribution object must contain distribution function data")
|
|
580
|
+
else:
|
|
581
|
+
# For E-distributions, need PDF data
|
|
582
|
+
has_pdf_points = hasattr(gdf_object, 'pdf_points') and gdf_object.pdf_points is not None
|
|
583
|
+
has_pdf = hasattr(gdf_object, 'pdf') and gdf_object.pdf is not None
|
|
584
|
+
if not (has_pdf_points or has_pdf):
|
|
585
|
+
self.logger.error("E-distribution object must contain PDF data")
|
|
586
|
+
raise ValueError("E-distribution object must contain PDF data")
|
|
587
|
+
|
|
588
|
+
# Check for data points
|
|
589
|
+
has_di_points = hasattr(gdf_object, 'di_points_n') and gdf_object.di_points_n is not None
|
|
590
|
+
has_data = hasattr(gdf_object, 'data') and gdf_object.data is not None
|
|
591
|
+
|
|
592
|
+
if not (has_di_points or has_data):
|
|
593
|
+
self.logger.error("GDF object must contain data points (di_points_n or data attribute)")
|
|
594
|
+
raise ValueError("GDF object must contain data points (di_points_n or data attribute)")
|
|
595
|
+
|
|
596
|
+
def _detect_gdf_type(self):
|
|
597
|
+
return self._detect_gdf_type_from_object(self.gdf)
|
|
598
|
+
|
|
599
|
+
def _detect_gdf_type_from_object(self, gdf_object):
|
|
600
|
+
"""Detect the type of GDF distribution from the object class name."""
|
|
601
|
+
class_name = gdf_object.__class__.__name__.lower()
|
|
602
|
+
|
|
603
|
+
if 'egdf' in class_name:
|
|
604
|
+
return 'egdf'
|
|
605
|
+
elif 'eldf' in class_name:
|
|
606
|
+
return 'eldf'
|
|
607
|
+
elif 'qgdf' in class_name:
|
|
608
|
+
return 'qgdf'
|
|
609
|
+
elif 'qldf' in class_name:
|
|
610
|
+
return 'qldf'
|
|
611
|
+
else:
|
|
612
|
+
# Fallback - assume E-distribution for unknown types
|
|
613
|
+
return 'unknown'
|
|
614
|
+
|
|
615
|
+
def _find_z0_advanced_median(self, closest_idx, di_points, dist_points, target_value):
|
|
616
|
+
"""Find Z0 using advanced methods to locate where distribution = 0.5."""
|
|
617
|
+
|
|
618
|
+
# Store basic info for all methods
|
|
619
|
+
closest_dist_value = dist_points[closest_idx]
|
|
620
|
+
closest_location = di_points[closest_idx]
|
|
621
|
+
|
|
622
|
+
self.estimation_info = {
|
|
623
|
+
'z0': None, # Will be updated
|
|
624
|
+
'z0_method': 'discrete_closest_to_median', # Will be updated if advanced method succeeds
|
|
625
|
+
'z0_target_value': target_value,
|
|
626
|
+
'z0_actual_value': closest_dist_value,
|
|
627
|
+
'gdf_type': self.gdf_type,
|
|
628
|
+
'target_type': 'median (0.5)',
|
|
629
|
+
'closest_idx': closest_idx,
|
|
630
|
+
'closest_location': closest_location,
|
|
631
|
+
'z0_interpolation_points': len(di_points)
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
# Try advanced methods in order of preference
|
|
635
|
+
advanced_methods = [
|
|
636
|
+
self._try_spline_median_finding,
|
|
637
|
+
self._try_linear_interpolation_median,
|
|
638
|
+
self._try_polynomial_median_finding
|
|
639
|
+
]
|
|
640
|
+
|
|
641
|
+
for method in advanced_methods:
|
|
642
|
+
try:
|
|
643
|
+
result = method(di_points, dist_points, target_value)
|
|
644
|
+
if result is not None:
|
|
645
|
+
self.estimation_info['z0'] = result
|
|
646
|
+
return result
|
|
647
|
+
except Exception as e:
|
|
648
|
+
if self.verbose:
|
|
649
|
+
self.logger.error(f"Method {method.__name__} failed: {e}")
|
|
650
|
+
continue
|
|
651
|
+
|
|
652
|
+
# All advanced methods failed - use discrete closest
|
|
653
|
+
self.logger.info("All advanced methods failed. Using discrete closest to 0.5.")
|
|
654
|
+
|
|
655
|
+
self.estimation_info['z0'] = closest_location
|
|
656
|
+
return closest_location
|
|
657
|
+
|
|
658
|
+
def _find_z0_advanced_pdf_max(self, global_max_idx, di_points, pdf_points):
|
|
659
|
+
"""Find Z0 using advanced methods for PDF maximum (existing logic)."""
|
|
660
|
+
|
|
661
|
+
# Store basic info for all methods
|
|
662
|
+
max_value = pdf_points[global_max_idx]
|
|
663
|
+
max_location = di_points[global_max_idx]
|
|
664
|
+
|
|
665
|
+
self.estimation_info = {
|
|
666
|
+
'z0': None, # Will be updated
|
|
667
|
+
'z0_method': 'discrete_pdf_maximum', # Will be updated if advanced method succeeds
|
|
668
|
+
'z0_extremum_pdf_value': max_value,
|
|
669
|
+
'gdf_type': self.gdf_type,
|
|
670
|
+
'target_type': 'pdf_maximum',
|
|
671
|
+
'global_extremum_idx': global_max_idx,
|
|
672
|
+
'global_extremum_location': max_location,
|
|
673
|
+
'z0_interpolation_points': len(di_points)
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
# Try advanced methods in order of preference (existing logic)
|
|
677
|
+
advanced_methods = [
|
|
678
|
+
self._try_spline_optimization_pdf,
|
|
679
|
+
self._try_polynomial_fitting_pdf,
|
|
680
|
+
self._try_refined_interpolation_pdf,
|
|
681
|
+
self._try_parabolic_interpolation_pdf
|
|
682
|
+
]
|
|
683
|
+
|
|
684
|
+
for method in advanced_methods:
|
|
685
|
+
try:
|
|
686
|
+
result = method(di_points, pdf_points, global_max_idx)
|
|
687
|
+
if result is not None:
|
|
688
|
+
self.estimation_info['z0'] = result
|
|
689
|
+
return result
|
|
690
|
+
except Exception as e:
|
|
691
|
+
if self.verbose:
|
|
692
|
+
self.logger.error(f"Method {method.__name__} failed: {e}")
|
|
693
|
+
continue
|
|
694
|
+
|
|
695
|
+
# All advanced methods failed - use discrete maximum
|
|
696
|
+
if self.verbose:
|
|
697
|
+
self.logger.info("All advanced methods failed. Using discrete PDF maximum.")
|
|
698
|
+
|
|
699
|
+
self.estimation_info['z0'] = max_location
|
|
700
|
+
return max_location
|
|
701
|
+
|
|
702
|
+
def _try_spline_median_finding(self, di_points, dist_points, target_value):
|
|
703
|
+
"""Use spline interpolation to find where distribution = target_value."""
|
|
704
|
+
try:
|
|
705
|
+
from scipy.interpolate import UnivariateSpline
|
|
706
|
+
from scipy.optimize import brentq
|
|
707
|
+
except ImportError:
|
|
708
|
+
if self.verbose:
|
|
709
|
+
self.logger.error("SciPy not available for spline median finding")
|
|
710
|
+
return None
|
|
711
|
+
|
|
712
|
+
try:
|
|
713
|
+
# Create spline interpolation
|
|
714
|
+
spline = UnivariateSpline(di_points, dist_points, s=0, k=3)
|
|
715
|
+
|
|
716
|
+
# Define function to find root of (spline(x) - target_value)
|
|
717
|
+
def target_function(x):
|
|
718
|
+
return spline(x) - target_value
|
|
719
|
+
|
|
720
|
+
# Find domain where we cross the target value
|
|
721
|
+
domain_min, domain_max = np.min(di_points), np.max(di_points)
|
|
722
|
+
|
|
723
|
+
# Check if target value is within the range
|
|
724
|
+
spline_min, spline_max = np.min(dist_points), np.max(dist_points)
|
|
725
|
+
if not (spline_min <= target_value <= spline_max):
|
|
726
|
+
return None
|
|
727
|
+
|
|
728
|
+
# Use root finding to locate exact crossing
|
|
729
|
+
try:
|
|
730
|
+
z0_candidate = brentq(target_function, domain_min, domain_max)
|
|
731
|
+
|
|
732
|
+
if domain_min <= z0_candidate <= domain_max:
|
|
733
|
+
self.estimation_info['z0_method'] = 'spline_median_finding'
|
|
734
|
+
if self.verbose:
|
|
735
|
+
self.logger.info(f"Spline median finding successful: Z0={z0_candidate:.8f} (target={target_value})")
|
|
736
|
+
return z0_candidate
|
|
737
|
+
except ValueError:
|
|
738
|
+
# Try linear search if brentq fails
|
|
739
|
+
fine_x = np.linspace(domain_min, domain_max, 10000)
|
|
740
|
+
fine_y = spline(fine_x)
|
|
741
|
+
closest_idx = np.argmin(np.abs(fine_y - target_value))
|
|
742
|
+
z0_candidate = fine_x[closest_idx]
|
|
743
|
+
|
|
744
|
+
self.estimation_info['z0_method'] = 'spline_median_search'
|
|
745
|
+
if self.verbose:
|
|
746
|
+
self.logger.info(f"Spline median search successful: Z0={z0_candidate:.8f} (target={target_value})")
|
|
747
|
+
return z0_candidate
|
|
748
|
+
|
|
749
|
+
except Exception as e:
|
|
750
|
+
if self.verbose:
|
|
751
|
+
self.logger.error(f"Spline median finding failed: {e}")
|
|
752
|
+
|
|
753
|
+
return None
|
|
754
|
+
|
|
755
|
+
def _try_linear_interpolation_median(self, di_points, dist_points, target_value):
|
|
756
|
+
"""Use linear interpolation to find where distribution = target_value."""
|
|
757
|
+
try:
|
|
758
|
+
# Find the interval containing the target value
|
|
759
|
+
for i in range(len(dist_points) - 1):
|
|
760
|
+
y1, y2 = dist_points[i], dist_points[i + 1]
|
|
761
|
+
|
|
762
|
+
# Check if target is between these two points
|
|
763
|
+
if (y1 <= target_value <= y2) or (y2 <= target_value <= y1):
|
|
764
|
+
x1, x2 = di_points[i], di_points[i + 1]
|
|
765
|
+
|
|
766
|
+
# Linear interpolation
|
|
767
|
+
if abs(y2 - y1) < 1e-15: # Avoid division by zero
|
|
768
|
+
z0_candidate = (x1 + x2) / 2 # Take midpoint if flat
|
|
769
|
+
else:
|
|
770
|
+
# Linear interpolation formula
|
|
771
|
+
t = (target_value - y1) / (y2 - y1)
|
|
772
|
+
z0_candidate = x1 + t * (x2 - x1)
|
|
773
|
+
|
|
774
|
+
self.estimation_info['z0_method'] = 'linear_interpolation_median'
|
|
775
|
+
if self.verbose:
|
|
776
|
+
self.logger.info(f"Linear interpolation median successful: Z0={z0_candidate:.8f} (target={target_value})")
|
|
777
|
+
return z0_candidate
|
|
778
|
+
|
|
779
|
+
except Exception as e:
|
|
780
|
+
if self.verbose:
|
|
781
|
+
self.logger.error(f"Linear interpolation median failed: {e}")
|
|
782
|
+
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
def _try_polynomial_median_finding(self, di_points, dist_points, target_value):
|
|
786
|
+
"""Use polynomial fitting to find where distribution = target_value."""
|
|
787
|
+
try:
|
|
788
|
+
# Try different polynomial degrees
|
|
789
|
+
for degree in [3, 2]:
|
|
790
|
+
if len(di_points) > degree + 1:
|
|
791
|
+
try:
|
|
792
|
+
coeffs = np.polyfit(di_points, dist_points, degree)
|
|
793
|
+
poly = np.poly1d(coeffs)
|
|
794
|
+
|
|
795
|
+
# Create target function
|
|
796
|
+
target_poly = poly - target_value
|
|
797
|
+
roots = np.roots(target_poly)
|
|
798
|
+
|
|
799
|
+
# Filter real roots within domain
|
|
800
|
+
real_roots = roots[np.isreal(roots)].real
|
|
801
|
+
domain_min, domain_max = np.min(di_points), np.max(di_points)
|
|
802
|
+
valid_roots = real_roots[(real_roots >= domain_min) & (real_roots <= domain_max)]
|
|
803
|
+
|
|
804
|
+
if len(valid_roots) > 0:
|
|
805
|
+
# Choose the root closest to the discrete solution
|
|
806
|
+
closest_idx = np.argmin(np.abs(dist_points - target_value))
|
|
807
|
+
discrete_location = di_points[closest_idx]
|
|
808
|
+
|
|
809
|
+
root_distances = np.abs(valid_roots - discrete_location)
|
|
810
|
+
best_root_idx = np.argmin(root_distances)
|
|
811
|
+
z0_candidate = valid_roots[best_root_idx]
|
|
812
|
+
|
|
813
|
+
self.estimation_info['z0_method'] = f'polynomial_median_degree_{degree}'
|
|
814
|
+
if self.verbose:
|
|
815
|
+
self.logger.info(f"Polynomial median finding (degree {degree}) successful: Z0={z0_candidate:.8f} (target={target_value})")
|
|
816
|
+
return z0_candidate
|
|
817
|
+
|
|
818
|
+
except (np.linalg.LinAlgError, ValueError):
|
|
819
|
+
continue
|
|
820
|
+
|
|
821
|
+
except Exception as e:
|
|
822
|
+
if self.verbose:
|
|
823
|
+
self.logger.error(f"Polynomial median finding failed: {e}")
|
|
824
|
+
|
|
825
|
+
return None
|
|
826
|
+
|
|
827
|
+
# Keep existing PDF optimization methods for E-distributions
|
|
828
|
+
def _try_spline_optimization_pdf(self, di_points, pdf_points, global_extremum_idx):
|
|
829
|
+
try:
|
|
830
|
+
from scipy.interpolate import UnivariateSpline
|
|
831
|
+
from scipy.optimize import minimize_scalar
|
|
832
|
+
except ImportError:
|
|
833
|
+
if self.verbose:
|
|
834
|
+
self.logger.error("SciPy not available for spline optimization")
|
|
835
|
+
return None
|
|
836
|
+
|
|
837
|
+
try:
|
|
838
|
+
# Create spline interpolation
|
|
839
|
+
spline = UnivariateSpline(di_points, pdf_points, s=0, k=3)
|
|
840
|
+
|
|
841
|
+
# Define objective function (maximize PDF)
|
|
842
|
+
objective = lambda x: -spline(x)
|
|
843
|
+
|
|
844
|
+
# Optimize over entire domain
|
|
845
|
+
domain_min, domain_max = np.min(di_points), np.max(di_points)
|
|
846
|
+
result = minimize_scalar(objective, bounds=(domain_min, domain_max), method='bounded')
|
|
847
|
+
|
|
848
|
+
if result.success:
|
|
849
|
+
z0_candidate = result.x
|
|
850
|
+
|
|
851
|
+
# Validate result
|
|
852
|
+
if domain_min <= z0_candidate <= domain_max:
|
|
853
|
+
self.estimation_info['z0_method'] = 'global_spline_optimization'
|
|
854
|
+
if self.verbose:
|
|
855
|
+
self.logger.info(f"Spline optimization successful: Z0={z0_candidate:.8f} (PDF maximum)")
|
|
856
|
+
return z0_candidate
|
|
857
|
+
|
|
858
|
+
except Exception as e:
|
|
859
|
+
if self.verbose:
|
|
860
|
+
self.logger.error(f"Spline optimization failed: {e}")
|
|
861
|
+
|
|
862
|
+
return None
|
|
863
|
+
|
|
864
|
+
def _try_polynomial_fitting_pdf(self, di_points, pdf_points, global_extremum_idx):
|
|
865
|
+
"""Try polynomial fitting around the PDF maximum region."""
|
|
866
|
+
n_points = len(di_points)
|
|
867
|
+
|
|
868
|
+
# Define window around extremum (larger for polynomial fitting)
|
|
869
|
+
window_size = min(max(n_points // 4, 5), n_points)
|
|
870
|
+
start_idx = max(0, global_extremum_idx - window_size // 2)
|
|
871
|
+
end_idx = min(n_points, start_idx + window_size)
|
|
872
|
+
start_idx = max(0, end_idx - window_size) # Adjust if near end
|
|
873
|
+
|
|
874
|
+
window_x = di_points[start_idx:end_idx]
|
|
875
|
+
window_y = pdf_points[start_idx:end_idx]
|
|
876
|
+
|
|
877
|
+
if len(window_x) < 5:
|
|
878
|
+
return None
|
|
879
|
+
|
|
880
|
+
try:
|
|
881
|
+
# Try different polynomial degrees
|
|
882
|
+
for degree in [4, 3, 2]:
|
|
883
|
+
if len(window_x) > degree + 1:
|
|
884
|
+
try:
|
|
885
|
+
coeffs = np.polyfit(window_x, window_y, degree)
|
|
886
|
+
poly = np.poly1d(coeffs)
|
|
887
|
+
|
|
888
|
+
# Find critical points
|
|
889
|
+
poly_deriv = np.polyder(poly)
|
|
890
|
+
critical_points = np.roots(poly_deriv)
|
|
891
|
+
|
|
892
|
+
# Filter real critical points within window
|
|
893
|
+
real_criticals = critical_points[np.isreal(critical_points)].real
|
|
894
|
+
valid_criticals = real_criticals[(real_criticals >= window_x[0]) &
|
|
895
|
+
(real_criticals <= window_x[-1])]
|
|
896
|
+
|
|
897
|
+
if len(valid_criticals) > 0:
|
|
898
|
+
# Evaluate polynomial at critical points
|
|
899
|
+
critical_values = poly(valid_criticals)
|
|
900
|
+
|
|
901
|
+
# Find the maximum
|
|
902
|
+
best_idx = np.argmax(critical_values)
|
|
903
|
+
z0_candidate = valid_criticals[best_idx]
|
|
904
|
+
|
|
905
|
+
# Validate using second derivative test
|
|
906
|
+
poly_second_deriv = np.polyder(poly_deriv)
|
|
907
|
+
second_deriv_value = poly_second_deriv(z0_candidate)
|
|
908
|
+
|
|
909
|
+
# Check if it's a maximum
|
|
910
|
+
if second_deriv_value < 0:
|
|
911
|
+
self.estimation_info['z0_method'] = f'global_polynomial_fitting_degree_{degree}'
|
|
912
|
+
if self.verbose:
|
|
913
|
+
self.logger.info(f"Polynomial fitting (degree {degree}) successful: Z0={z0_candidate:.8f} (PDF maximum)")
|
|
914
|
+
return z0_candidate
|
|
915
|
+
|
|
916
|
+
except (np.linalg.LinAlgError, ValueError):
|
|
917
|
+
continue
|
|
918
|
+
|
|
919
|
+
except Exception as e:
|
|
920
|
+
if self.verbose:
|
|
921
|
+
self.logger.error(f"Polynomial fitting failed: {e}")
|
|
922
|
+
|
|
923
|
+
return None
|
|
924
|
+
|
|
925
|
+
def _try_refined_interpolation_pdf(self, di_points, pdf_points, global_extremum_idx):
|
|
926
|
+
try:
|
|
927
|
+
from scipy.interpolate import interp1d
|
|
928
|
+
except ImportError:
|
|
929
|
+
return None
|
|
930
|
+
|
|
931
|
+
n_points = len(di_points)
|
|
932
|
+
|
|
933
|
+
# Define window around extremum
|
|
934
|
+
window_size = min(max(n_points // 6, 3), n_points)
|
|
935
|
+
start_idx = max(0, global_extremum_idx - window_size // 2)
|
|
936
|
+
end_idx = min(n_points, start_idx + window_size)
|
|
937
|
+
start_idx = max(0, end_idx - window_size)
|
|
938
|
+
|
|
939
|
+
window_x = di_points[start_idx:end_idx]
|
|
940
|
+
window_y = pdf_points[start_idx:end_idx]
|
|
941
|
+
|
|
942
|
+
if len(window_x) < 4:
|
|
943
|
+
return None
|
|
944
|
+
|
|
945
|
+
try:
|
|
946
|
+
# Create high-resolution interpolation
|
|
947
|
+
interp_func = interp1d(window_x, window_y, kind='cubic')
|
|
948
|
+
|
|
949
|
+
# Create fine grid
|
|
950
|
+
fine_x = np.linspace(window_x[0], window_x[-1], len(window_x) * 50)
|
|
951
|
+
fine_y = interp_func(fine_x)
|
|
952
|
+
|
|
953
|
+
# Find maximum in fine grid
|
|
954
|
+
fine_max_idx = np.argmax(fine_y)
|
|
955
|
+
z0_candidate = fine_x[fine_max_idx]
|
|
956
|
+
|
|
957
|
+
self.estimation_info['z0_method'] = 'global_refined_interpolation'
|
|
958
|
+
if self.verbose:
|
|
959
|
+
self.logger.info(f"Refined interpolation successful: Z0={z0_candidate:.8f} (PDF maximum)")
|
|
960
|
+
return z0_candidate
|
|
961
|
+
|
|
962
|
+
except Exception as e:
|
|
963
|
+
if self.verbose:
|
|
964
|
+
self.logger.error(f"Refined interpolation failed: {e}")
|
|
965
|
+
|
|
966
|
+
return None
|
|
967
|
+
|
|
968
|
+
def _try_parabolic_interpolation_pdf(self, di_points, pdf_points, global_extremum_idx):
|
|
969
|
+
n_points = len(di_points)
|
|
970
|
+
|
|
971
|
+
if global_extremum_idx == 0 or global_extremum_idx == n_points - 1:
|
|
972
|
+
return None # Cannot do parabolic interpolation at boundaries
|
|
973
|
+
|
|
974
|
+
# Use three points around extremum
|
|
975
|
+
x1, x2, x3 = di_points[global_extremum_idx-1:global_extremum_idx+2]
|
|
976
|
+
y1, y2, y3 = pdf_points[global_extremum_idx-1:global_extremum_idx+2]
|
|
977
|
+
|
|
978
|
+
try:
|
|
979
|
+
# Parabolic interpolation formula
|
|
980
|
+
denominator = (x1 - x2) * (x1 - x3) * (x2 - x3)
|
|
981
|
+
if abs(denominator) < 1e-15:
|
|
982
|
+
return None
|
|
983
|
+
|
|
984
|
+
A = (x3 * (y2 - y1) + x2 * (y1 - y3) + x1 * (y3 - y2)) / denominator
|
|
985
|
+
B = (x3*x3 * (y1 - y2) + x2*x2 * (y3 - y1) + x1*x1 * (y2 - y3)) / denominator
|
|
986
|
+
|
|
987
|
+
if abs(A) < 1e-15:
|
|
988
|
+
return None # Not a proper parabola
|
|
989
|
+
|
|
990
|
+
# Find vertex of parabola
|
|
991
|
+
z0_candidate = -B / (2 * A)
|
|
992
|
+
|
|
993
|
+
# Validate that it's a maximum and within bounds
|
|
994
|
+
is_maximum = A < 0
|
|
995
|
+
|
|
996
|
+
if is_maximum and x1 <= z0_candidate <= x3:
|
|
997
|
+
self.estimation_info['z0_method'] = 'global_parabolic_interpolation'
|
|
998
|
+
if self.verbose:
|
|
999
|
+
self.logger.info(f"Parabolic interpolation successful: Z0={z0_candidate:.8f} (PDF maximum)")
|
|
1000
|
+
return z0_candidate
|
|
1001
|
+
|
|
1002
|
+
except Exception as e:
|
|
1003
|
+
if self.verbose:
|
|
1004
|
+
self.logger.error(f"Parabolic interpolation failed: {e}")
|
|
1005
|
+
|
|
1006
|
+
return None
|
|
1007
|
+
|
|
1008
|
+
def _get_last_method_used(self):
|
|
1009
|
+
return self.estimation_info.get('z0_method', 'discrete_fallback')
|
|
1010
|
+
|
|
1011
|
+
def _get_pdf_points(self):
|
|
1012
|
+
"""Get PDF points for E-distributions."""
|
|
1013
|
+
self.logger.debug("Retrieving PDF points.")
|
|
1014
|
+
if hasattr(self.gdf, 'pdf_points') and self.gdf.pdf_points is not None:
|
|
1015
|
+
return np.array(self.gdf.pdf_points)
|
|
1016
|
+
elif hasattr(self.gdf, 'pdf') and self.gdf.pdf is not None:
|
|
1017
|
+
return np.array(self.gdf.pdf)
|
|
1018
|
+
else:
|
|
1019
|
+
return np.array([])
|
|
1020
|
+
|
|
1021
|
+
def _get_distribution_points(self):
|
|
1022
|
+
"""Get distribution function points for Q-distributions."""
|
|
1023
|
+
self.logger.debug("Retrieving distribution points.")
|
|
1024
|
+
if hasattr(self.gdf, 'cdf_points') and self.gdf.cdf_points is not None:
|
|
1025
|
+
return np.array(self.gdf.cdf_points)
|
|
1026
|
+
elif hasattr(self.gdf, 'qgdf_points') and self.gdf.qgdf_points is not None:
|
|
1027
|
+
return np.array(self.gdf.qgdf_points)
|
|
1028
|
+
elif hasattr(self.gdf, 'qldf_points') and self.gdf.qldf_points is not None:
|
|
1029
|
+
return np.array(self.gdf.qldf_points)
|
|
1030
|
+
else:
|
|
1031
|
+
return np.array([])
|
|
1032
|
+
|
|
1033
|
+
def _get_di_points(self):
|
|
1034
|
+
"""Get data points (di_points_n) or raw data."""
|
|
1035
|
+
self.logger.debug("Retrieving data points.")
|
|
1036
|
+
if hasattr(self.gdf, 'di_points_n') and self.gdf.di_points_n is not None:
|
|
1037
|
+
return np.array(self.gdf.di_points_n)
|
|
1038
|
+
elif hasattr(self.gdf, 'data') and self.gdf.data is not None:
|
|
1039
|
+
# If no evaluation points, use sorted data
|
|
1040
|
+
return np.sort(np.array(self.gdf.data))
|
|
1041
|
+
else:
|
|
1042
|
+
return np.array([])
|
|
1043
|
+
|
|
1044
|
+
def __repr__(self):
|
|
1045
|
+
target_type = "median (0.5)" if self.find_median else "PDF maximum"
|
|
1046
|
+
status = f"fitted (Z0={self.z0:.6f})" if self.z0 is not None else "not fitted"
|
|
1047
|
+
return f"Z0Estimator(gdf_type='{self.gdf_type}', target='{target_type}', {status})"
|