machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1234 @@
|
|
|
1
|
+
'''
|
|
2
|
+
QGDF: Quantifying Global Distribution Functions
|
|
3
|
+
|
|
4
|
+
Author: Nirmal Parmar
|
|
5
|
+
Machine Gnostics
|
|
6
|
+
'''
|
|
7
|
+
import numpy as np
|
|
8
|
+
import warnings
|
|
9
|
+
import logging
|
|
10
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
11
|
+
from typing import Dict, Any
|
|
12
|
+
from machinegnostics.magcal.gdf.base_distfunc import BaseDistFuncCompute
|
|
13
|
+
from machinegnostics.magcal.data_conversion import DataConversion
|
|
14
|
+
from machinegnostics.magcal.characteristics import GnosticsCharacteristics
|
|
15
|
+
from machinegnostics.magcal.gdf.z0_estimator import Z0Estimator
|
|
16
|
+
|
|
17
|
+
class BaseQGDF(BaseDistFuncCompute):
|
|
18
|
+
"""
|
|
19
|
+
Base class for Quantifying Global Distribution Functions (QGDF).
|
|
20
|
+
|
|
21
|
+
This class provides foundational methods and attributes for computing
|
|
22
|
+
and analyzing global distribution functions using various techniques.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
data (np.ndarray): Input data for distribution function computation.
|
|
26
|
+
n_points (int): Number of points for evaluation.
|
|
27
|
+
S (float): Smoothing parameter.
|
|
28
|
+
catch (bool): Flag to enable error catching.
|
|
29
|
+
verbose (bool): Flag to enable verbose output.
|
|
30
|
+
params (dict): Dictionary to store parameters and results.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self,
|
|
34
|
+
data: np.ndarray,
|
|
35
|
+
DLB: float = None,
|
|
36
|
+
DUB: float = None,
|
|
37
|
+
LB: float = None,
|
|
38
|
+
UB: float = None,
|
|
39
|
+
S = 'auto',
|
|
40
|
+
z0_optimize: bool = True,
|
|
41
|
+
tolerance: float = 1e-3,
|
|
42
|
+
data_form: str = 'a',
|
|
43
|
+
n_points: int = 500,
|
|
44
|
+
homogeneous: bool = True,
|
|
45
|
+
catch: bool = True,
|
|
46
|
+
weights: np.ndarray = None,
|
|
47
|
+
wedf: bool = True,
|
|
48
|
+
opt_method: str = 'L-BFGS-B',
|
|
49
|
+
verbose: bool = False,
|
|
50
|
+
max_data_size: int = 1000,
|
|
51
|
+
flush: bool = True):
|
|
52
|
+
super().__init__(data=data,
|
|
53
|
+
DLB=DLB,
|
|
54
|
+
DUB=DUB,
|
|
55
|
+
LB=LB,
|
|
56
|
+
UB=UB,
|
|
57
|
+
S=S,
|
|
58
|
+
z0_optimize=z0_optimize,
|
|
59
|
+
varS=False, # NOTE for QGDFF varS is always False
|
|
60
|
+
tolerance=tolerance,
|
|
61
|
+
data_form=data_form,
|
|
62
|
+
n_points=n_points,
|
|
63
|
+
homogeneous=homogeneous,
|
|
64
|
+
catch=catch,
|
|
65
|
+
weights=weights,
|
|
66
|
+
wedf=wedf,
|
|
67
|
+
opt_method=opt_method,
|
|
68
|
+
verbose=verbose,
|
|
69
|
+
max_data_size=max_data_size,
|
|
70
|
+
flush=flush)
|
|
71
|
+
|
|
72
|
+
# Store raw inputs
|
|
73
|
+
self.data = data
|
|
74
|
+
self.DLB = DLB
|
|
75
|
+
self.DUB = DUB
|
|
76
|
+
self.LB = LB
|
|
77
|
+
self.UB = UB
|
|
78
|
+
self.S = S
|
|
79
|
+
self.z0_optimize = z0_optimize
|
|
80
|
+
|
|
81
|
+
self.tolerance = tolerance
|
|
82
|
+
self.data_form = data_form
|
|
83
|
+
self.n_points = n_points
|
|
84
|
+
self.homogeneous = homogeneous
|
|
85
|
+
self.catch = catch
|
|
86
|
+
self.weights = weights if weights is not None else np.ones_like(data)
|
|
87
|
+
self.wedf = wedf
|
|
88
|
+
self.opt_method = opt_method
|
|
89
|
+
self.verbose = verbose
|
|
90
|
+
self.max_data_size = max_data_size
|
|
91
|
+
self.flush = flush
|
|
92
|
+
|
|
93
|
+
# Initialize state variables
|
|
94
|
+
self.params = {}
|
|
95
|
+
self._fitted = False
|
|
96
|
+
self._derivatives_calculated = False
|
|
97
|
+
self._marginal_analysis_done = False
|
|
98
|
+
|
|
99
|
+
# Initialize computation cache
|
|
100
|
+
self._computation_cache = {
|
|
101
|
+
'data_converter': None,
|
|
102
|
+
'characteristics_computer': None,
|
|
103
|
+
'weights_normalized': None,
|
|
104
|
+
'smooth_curves_generated': False
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Store initial parameters if catching
|
|
108
|
+
if self.catch:
|
|
109
|
+
self._store_initial_params()
|
|
110
|
+
|
|
111
|
+
# Validate all inputs
|
|
112
|
+
self._validate_inputs()
|
|
113
|
+
|
|
114
|
+
# logger setup
|
|
115
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
116
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
117
|
+
|
|
118
|
+
def _compute_qgdf_core(self, S, LB, UB, zi_data=None, zi_eval=None):
|
|
119
|
+
"""Core QGDF computation with caching."""
|
|
120
|
+
self.logger.info("Computing QGDF core.")
|
|
121
|
+
# Use provided data or default to instance data
|
|
122
|
+
if zi_data is None:
|
|
123
|
+
zi_data = self.z
|
|
124
|
+
if zi_eval is None:
|
|
125
|
+
zi_eval = zi_data
|
|
126
|
+
|
|
127
|
+
# Convert to infinite domain
|
|
128
|
+
zi_n = DataConversion._convert_fininf(zi_eval, LB, UB)
|
|
129
|
+
zi_d = DataConversion._convert_fininf(zi_data, LB, UB)
|
|
130
|
+
|
|
131
|
+
# Calculate R matrix with numerical stability
|
|
132
|
+
R = zi_n.reshape(-1, 1) / (zi_d.reshape(1, -1) + self._NUMERICAL_EPS)
|
|
133
|
+
|
|
134
|
+
# Get characteristics
|
|
135
|
+
gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
|
|
136
|
+
q, q1 = gc._get_q_q1(S=S)
|
|
137
|
+
|
|
138
|
+
# Calculate fidelities and irrelevances
|
|
139
|
+
fj = gc._fj(q=q, q1=q1)
|
|
140
|
+
hj = gc._hj(q=q, q1=q1)
|
|
141
|
+
|
|
142
|
+
# Estimate QGDF
|
|
143
|
+
return self._estimate_qgdf_from_moments(fj, hj), fj, hj
|
|
144
|
+
|
|
145
|
+
def _estimate_qgdf_from_moments_complex(self, fidelities, irrelevances):
|
|
146
|
+
"""Estimate QGDF using complex number approach to handle all cases."""
|
|
147
|
+
self.logger.info("Estimating QGDF using complex number approach.")
|
|
148
|
+
|
|
149
|
+
weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
|
|
150
|
+
|
|
151
|
+
# Add numerical stability for both large and small values
|
|
152
|
+
max_safe_value = np.sqrt(np.finfo(float).max) / 100 # More conservative
|
|
153
|
+
min_safe_value = np.sqrt(np.finfo(float).eps) * 100 # Avoid very small numbers
|
|
154
|
+
|
|
155
|
+
# Comprehensive clipping for extreme values (both large and small)
|
|
156
|
+
def safe_clip_values(values, name="values"):
|
|
157
|
+
"""Safely clip values to prevent both overflow and underflow issues."""
|
|
158
|
+
# Handle very small values (close to zero)
|
|
159
|
+
values_magnitude = np.abs(values)
|
|
160
|
+
too_small_mask = values_magnitude < min_safe_value
|
|
161
|
+
|
|
162
|
+
# Handle very large values
|
|
163
|
+
too_large_mask = values_magnitude > max_safe_value
|
|
164
|
+
|
|
165
|
+
if np.any(too_small_mask) and self.verbose:
|
|
166
|
+
small_count = np.sum(too_small_mask)
|
|
167
|
+
self.logger.info(f"Warning: {small_count} very small {name} values detected (< {min_safe_value:.2e})")
|
|
168
|
+
|
|
169
|
+
if np.any(too_large_mask) and self.verbose:
|
|
170
|
+
large_count = np.sum(too_large_mask)
|
|
171
|
+
self.logger.info(f"Warning: {large_count} very large {name} values detected (> {max_safe_value:.2e})")
|
|
172
|
+
|
|
173
|
+
# Clip small values to minimum safe value (preserving sign)
|
|
174
|
+
values_safe = np.where(too_small_mask,
|
|
175
|
+
np.sign(values) * min_safe_value,
|
|
176
|
+
values)
|
|
177
|
+
|
|
178
|
+
# Clip large values to maximum safe value (preserving sign)
|
|
179
|
+
values_safe = np.where(too_large_mask,
|
|
180
|
+
np.sign(values_safe) * max_safe_value,
|
|
181
|
+
values_safe)
|
|
182
|
+
|
|
183
|
+
return values_safe
|
|
184
|
+
|
|
185
|
+
# Apply safe clipping to both fidelities and irrelevances
|
|
186
|
+
fidelities_safe = safe_clip_values(fidelities, "fidelity")
|
|
187
|
+
irrelevances_safe = safe_clip_values(irrelevances, "irrelevance")
|
|
188
|
+
|
|
189
|
+
# Calculate weighted means (f̄Q and h̄Q from equation 15.35)
|
|
190
|
+
mean_fidelity = np.sum(weights * fidelities_safe, axis=0) / np.sum(weights) # f̄Q
|
|
191
|
+
mean_irrelevance = np.sum(weights * irrelevances_safe, axis=0) / np.sum(weights) # h̄Q
|
|
192
|
+
|
|
193
|
+
# Apply safe clipping to means as well
|
|
194
|
+
mean_fidelity = safe_clip_values(mean_fidelity, "mean_fidelity")
|
|
195
|
+
mean_irrelevance = safe_clip_values(mean_irrelevance, "mean_irrelevance")
|
|
196
|
+
|
|
197
|
+
# Convert to complex for robust calculation with overflow protection
|
|
198
|
+
f_complex = mean_fidelity.astype(complex)
|
|
199
|
+
h_complex = mean_irrelevance.astype(complex)
|
|
200
|
+
|
|
201
|
+
# Calculate the complex square root with comprehensive protection
|
|
202
|
+
# Check magnitudes before squaring
|
|
203
|
+
f_magnitude = np.abs(f_complex)
|
|
204
|
+
h_magnitude = np.abs(h_complex)
|
|
205
|
+
sqrt_max = np.sqrt(max_safe_value)
|
|
206
|
+
sqrt_min = np.sqrt(min_safe_value)
|
|
207
|
+
|
|
208
|
+
# Check for both very large and very small values before squaring
|
|
209
|
+
f_too_large = f_magnitude > sqrt_max
|
|
210
|
+
h_too_large = h_magnitude > sqrt_max
|
|
211
|
+
f_too_small = f_magnitude < sqrt_min
|
|
212
|
+
h_too_small = h_magnitude < sqrt_min
|
|
213
|
+
|
|
214
|
+
if np.any(f_too_large) or np.any(h_too_large) or np.any(f_too_small) or np.any(h_too_small):
|
|
215
|
+
if self.verbose:
|
|
216
|
+
self.logger.info("Warning: Extreme values detected in complex calculation. Using scaled approach.")
|
|
217
|
+
|
|
218
|
+
# Scale problematic values to safe range
|
|
219
|
+
f_scaled = np.where(f_too_large, sqrt_max * (f_complex / f_magnitude), f_complex)
|
|
220
|
+
f_scaled = np.where(f_too_small, sqrt_min * (f_complex / f_magnitude), f_scaled)
|
|
221
|
+
|
|
222
|
+
h_scaled = np.where(h_too_large, sqrt_max * (h_complex / h_magnitude), h_complex)
|
|
223
|
+
h_scaled = np.where(h_too_small, sqrt_min * (h_complex / h_magnitude), h_scaled)
|
|
224
|
+
|
|
225
|
+
diff_squared_complex = f_scaled**2 - h_scaled**2
|
|
226
|
+
scale_factor = 1.0
|
|
227
|
+
else:
|
|
228
|
+
diff_squared_complex = f_complex**2 - h_complex**2
|
|
229
|
+
scale_factor = 1.0
|
|
230
|
+
|
|
231
|
+
# Calculate denominator with protection against both zero and very small values
|
|
232
|
+
denominator_magnitude = np.abs(diff_squared_complex)
|
|
233
|
+
denominator_too_small = denominator_magnitude < min_safe_value
|
|
234
|
+
|
|
235
|
+
if np.any(denominator_too_small):
|
|
236
|
+
if self.verbose:
|
|
237
|
+
small_denom_count = np.sum(denominator_too_small)
|
|
238
|
+
self.logger.info(f"Warning: {small_denom_count} very small denominators in complex calculation.")
|
|
239
|
+
|
|
240
|
+
# Use sqrt with protection
|
|
241
|
+
denominator_complex = np.sqrt(diff_squared_complex)
|
|
242
|
+
denominator_complex = np.where(denominator_magnitude < min_safe_value,
|
|
243
|
+
min_safe_value + 0j, denominator_complex)
|
|
244
|
+
|
|
245
|
+
# Calculate hZ,j using complex arithmetic with comprehensive protection
|
|
246
|
+
h_zj_complex = h_complex / denominator_complex
|
|
247
|
+
|
|
248
|
+
# **FIX THE OVERFLOW ISSUE HERE**
|
|
249
|
+
# Check magnitude of h_zj_complex BEFORE any squaring operation
|
|
250
|
+
h_zj_magnitude = np.abs(h_zj_complex)
|
|
251
|
+
sqrt_max_for_square = np.sqrt(sqrt_max) # Even more conservative for squaring
|
|
252
|
+
|
|
253
|
+
h_zj_too_large_for_square = h_zj_magnitude > sqrt_max_for_square
|
|
254
|
+
h_zj_too_small = h_zj_magnitude < sqrt_min
|
|
255
|
+
|
|
256
|
+
if np.any(h_zj_too_large_for_square):
|
|
257
|
+
if self.verbose:
|
|
258
|
+
large_count = np.sum(h_zj_too_large_for_square)
|
|
259
|
+
self.logger.info(f"Warning: {large_count} h_zj values too large for safe squaring. Using approximation.")
|
|
260
|
+
|
|
261
|
+
# For very large |h_zj|, use the mathematical limit without squaring
|
|
262
|
+
# When |h_zj| >> 1: h_zj / sqrt(1 + h_zj²) ≈ h_zj / |h_zj| = sign(h_zj)
|
|
263
|
+
|
|
264
|
+
# Safe calculation for non-large values only
|
|
265
|
+
h_zj_safe = np.where(h_zj_too_large_for_square, 0, h_zj_complex) # Zero out large values
|
|
266
|
+
h_zj_squared_safe = h_zj_safe**2 # Only square the safe values
|
|
267
|
+
|
|
268
|
+
# Calculate result for safe values
|
|
269
|
+
safe_result = h_zj_safe / np.sqrt(1 + h_zj_squared_safe)
|
|
270
|
+
|
|
271
|
+
# Use approximation for large values
|
|
272
|
+
large_result = h_zj_complex / h_zj_magnitude
|
|
273
|
+
|
|
274
|
+
# Combine results
|
|
275
|
+
h_gq_complex = np.where(h_zj_too_large_for_square, large_result, safe_result)
|
|
276
|
+
|
|
277
|
+
elif np.any(h_zj_too_small):
|
|
278
|
+
self.logger.info("Warning: Very small h_zj values in complex calculation.")
|
|
279
|
+
|
|
280
|
+
# For very small |h_zj|: h_zj / sqrt(1 + h_zj²) ≈ h_zj (linear approximation)
|
|
281
|
+
h_gq_complex = np.where(h_zj_too_small,
|
|
282
|
+
h_zj_complex, # linear approximation - no squaring!
|
|
283
|
+
h_zj_complex / np.sqrt(1 + h_zj_complex**2)) # safe squaring only
|
|
284
|
+
else:
|
|
285
|
+
# All values are safe for squaring - proceed normally
|
|
286
|
+
try:
|
|
287
|
+
# Only square when we know it's safe
|
|
288
|
+
h_zj_squared = h_zj_complex**2
|
|
289
|
+
h_gq_complex = h_zj_complex / np.sqrt(1 + h_zj_squared)
|
|
290
|
+
except (OverflowError, FloatingPointError, ZeroDivisionError) as e:
|
|
291
|
+
# log error
|
|
292
|
+
error_msg = f"Exception in h_gq calculation: {e}"
|
|
293
|
+
self.params['errors'].append({
|
|
294
|
+
'method': '_calculate_pdf_from_moments',
|
|
295
|
+
'error': error_msg,
|
|
296
|
+
'exception_type': type(e).__name__
|
|
297
|
+
})
|
|
298
|
+
if self.verbose:
|
|
299
|
+
self.logger.info(f"Warning: Unexpected exception in h_gq calculation ({e}). Using approximation.")
|
|
300
|
+
# Fallback to magnitude-based approach
|
|
301
|
+
h_gq_complex = h_zj_complex / (h_zj_magnitude + min_safe_value)
|
|
302
|
+
|
|
303
|
+
# Extract meaningful results from complex calculation
|
|
304
|
+
h_gq_real = np.real(h_gq_complex)
|
|
305
|
+
h_gq_imag = np.imag(h_gq_complex)
|
|
306
|
+
h_gq_magnitude = np.abs(h_gq_complex)
|
|
307
|
+
|
|
308
|
+
# Determine how to handle complex results with small value protection
|
|
309
|
+
is_purely_real = np.abs(h_gq_imag) < min_safe_value
|
|
310
|
+
is_real_dominant = np.abs(h_gq_real) >= np.abs(h_gq_imag)
|
|
311
|
+
|
|
312
|
+
if self.verbose and not np.all(is_purely_real):
|
|
313
|
+
complex_count = np.sum(~is_purely_real)
|
|
314
|
+
self.logger.info(f"Info: {complex_count} points have complex intermediate results.")
|
|
315
|
+
|
|
316
|
+
# Strategy for handling complex results with numerical stability
|
|
317
|
+
h_gq_final = np.where(is_purely_real,
|
|
318
|
+
h_gq_real, # Use real part for essentially real results
|
|
319
|
+
np.where(is_real_dominant,
|
|
320
|
+
h_gq_real, # Use real part when real component dominates
|
|
321
|
+
h_gq_magnitude * np.sign(h_gq_real))) # Use magnitude with sign
|
|
322
|
+
|
|
323
|
+
# Clip to reasonable range to prevent further overflow/underflow
|
|
324
|
+
h_gq_final = np.clip(h_gq_final, -10, 10)
|
|
325
|
+
|
|
326
|
+
# Calculate QGDF using the processed hGQ values
|
|
327
|
+
qgdf_from_hgq = (1 + h_gq_final) / 2
|
|
328
|
+
|
|
329
|
+
# Also calculate using direct ratio as backup with small value protection
|
|
330
|
+
mean_fidelity_safe = np.where(np.abs(mean_fidelity) < min_safe_value,
|
|
331
|
+
np.sign(mean_fidelity) * min_safe_value, mean_fidelity)
|
|
332
|
+
|
|
333
|
+
ratio = mean_irrelevance / mean_fidelity_safe
|
|
334
|
+
|
|
335
|
+
# Handle extreme ratios (both large and small)
|
|
336
|
+
ratio_magnitude = np.abs(ratio)
|
|
337
|
+
ratio_too_large = ratio_magnitude > 10
|
|
338
|
+
ratio_too_small = ratio_magnitude < min_safe_value
|
|
339
|
+
|
|
340
|
+
ratio_safe = np.where(ratio_too_large, 10 * np.tanh(ratio / 10), ratio)
|
|
341
|
+
ratio_safe = np.where(ratio_too_small, np.sign(ratio) * min_safe_value, ratio_safe)
|
|
342
|
+
|
|
343
|
+
qgdf_from_ratio = (1 - ratio_safe) / 2
|
|
344
|
+
|
|
345
|
+
# Use complex method for difficult cases, ratio method for simple cases
|
|
346
|
+
use_complex_method = ~is_purely_real | ratio_too_large | ratio_too_small
|
|
347
|
+
|
|
348
|
+
qgdf_values = np.where(use_complex_method,
|
|
349
|
+
qgdf_from_hgq,
|
|
350
|
+
qgdf_from_ratio)
|
|
351
|
+
|
|
352
|
+
# Apply final constraints
|
|
353
|
+
qgdf_values = np.clip(qgdf_values, 0, 1)
|
|
354
|
+
qgdf_values = np.maximum.accumulate(qgdf_values)
|
|
355
|
+
|
|
356
|
+
return qgdf_values.flatten()
|
|
357
|
+
|
|
358
|
+
def _estimate_qgdf_from_moments(self, fidelities, irrelevances):
|
|
359
|
+
"""Main QGDF estimation method with complex number fallback."""
|
|
360
|
+
self.logger.info("Estimating QGDF from moments with fallback.")
|
|
361
|
+
try:
|
|
362
|
+
# First try the complex number approach
|
|
363
|
+
return self._estimate_qgdf_from_moments_complex(fidelities, irrelevances)
|
|
364
|
+
except Exception as e:
|
|
365
|
+
# log error
|
|
366
|
+
error_msg = f"Exception in complex QGDF estimation: {e}"
|
|
367
|
+
self.logger.error(error_msg)
|
|
368
|
+
if self.verbose:
|
|
369
|
+
self.logger.info(f"Complex method failed: {e}. Using fallback approach.")
|
|
370
|
+
self.params['errors'].append({
|
|
371
|
+
'method': '_estimate_qgdf_from_moments',
|
|
372
|
+
'error': error_msg,
|
|
373
|
+
'exception_type': type(e).__name__
|
|
374
|
+
})
|
|
375
|
+
|
|
376
|
+
# Fallback to the robust real-number approach
|
|
377
|
+
return self._estimate_qgdf_from_moments_fallback(fidelities, irrelevances)
|
|
378
|
+
|
|
379
|
+
def _estimate_qgdf_from_moments_fallback(self, fidelities, irrelevances):
|
|
380
|
+
"""Fallback method using real numbers only."""
|
|
381
|
+
self.logger.info("Estimating QGDF using fallback real-number approach.")
|
|
382
|
+
weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
|
|
383
|
+
|
|
384
|
+
# Calculate weighted means
|
|
385
|
+
mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
|
|
386
|
+
mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
|
|
387
|
+
|
|
388
|
+
# Direct ratio approach (always mathematically valid)
|
|
389
|
+
mean_fidelity_safe = np.where(np.abs(mean_fidelity) < self._NUMERICAL_EPS,
|
|
390
|
+
np.sign(mean_fidelity) * self._NUMERICAL_EPS, mean_fidelity)
|
|
391
|
+
|
|
392
|
+
ratio = mean_irrelevance / mean_fidelity_safe
|
|
393
|
+
ratio_limited = np.where(np.abs(ratio) > 5, 5 * np.tanh(ratio / 5), ratio)
|
|
394
|
+
|
|
395
|
+
# hzj NOTE for QGDF book eq not working properly
|
|
396
|
+
# hzj = mean_irrelevance / (np.sqrt(mean_fidelity_safe**2 + mean_irrelevance**2))
|
|
397
|
+
|
|
398
|
+
# # hgq
|
|
399
|
+
# h_gq = hzj / (np.sqrt(1 + hzj**2))
|
|
400
|
+
|
|
401
|
+
# qgdf_values = (1 + h_gq/mean_fidelity_safe) / 2
|
|
402
|
+
|
|
403
|
+
qgdf_values = (1 - ratio_limited) / 2
|
|
404
|
+
qgdf_values = np.clip(qgdf_values, 0, 1)
|
|
405
|
+
qgdf_values = np.maximum.accumulate(qgdf_values)
|
|
406
|
+
|
|
407
|
+
return qgdf_values.flatten()
|
|
408
|
+
|
|
409
|
+
# NOTE fi and hi derivative base logic
|
|
410
|
+
# this give little of PDF
|
|
411
|
+
# can be improved
|
|
412
|
+
# def _calculate_pdf_from_moments(self, fidelities, irrelevances):
|
|
413
|
+
# """Calculate first derivative of QGDF (which is the PDF) from stored fidelities and irrelevances."""
|
|
414
|
+
# if fidelities is None or irrelevances is None:
|
|
415
|
+
# # log error
|
|
416
|
+
# error_msg = "Fidelities and irrelevances must be calculated first"
|
|
417
|
+
# self.params['errors'].append({
|
|
418
|
+
# 'method': '_calculate_pdf_from_moments',
|
|
419
|
+
# 'error': error_msg,
|
|
420
|
+
# 'exception_type': 'ValueError'
|
|
421
|
+
# })
|
|
422
|
+
# raise ValueError("Fidelities and irrelevances must be calculated first")
|
|
423
|
+
|
|
424
|
+
# weights = self.weights.reshape(-1, 1)
|
|
425
|
+
|
|
426
|
+
# # First order moments using QGDF's fj and hj
|
|
427
|
+
# f1 = np.sum(weights * fidelities, axis=0) / np.sum(weights) # f̄Q
|
|
428
|
+
# h1 = np.sum(weights * irrelevances, axis=0) / np.sum(weights) # h̄Q
|
|
429
|
+
|
|
430
|
+
# # Second order moments (scaled by S as in EGDF pattern)
|
|
431
|
+
# f2s = np.sum(weights * (fidelities**2 / self.S_opt), axis=0) / np.sum(weights) # F2
|
|
432
|
+
# h2s = np.sum(weights * (irrelevances**2 / self.S_opt), axis=0) / np.sum(weights) # H2
|
|
433
|
+
# fhs = np.sum(weights * (fidelities * irrelevances / self.S_opt), axis=0) / np.sum(weights) # FH
|
|
434
|
+
|
|
435
|
+
# # Calculate Nj = Σ(1/f²ᵢ,ⱼ) + Σ H²ᵢ,ⱼ (from equation 10.8)
|
|
436
|
+
# eps = np.finfo(float).eps
|
|
437
|
+
# f_inv_squared = np.sum(weights * (1 / (fidelities**2 + eps)), axis=0) / np.sum(weights)
|
|
438
|
+
# h_squared = np.sum(weights * irrelevances**2, axis=0) / np.sum(weights)
|
|
439
|
+
# Nj = f_inv_squared + h_squared
|
|
440
|
+
# Nj = np.where(Nj == 0, eps, Nj)
|
|
441
|
+
|
|
442
|
+
# # Calculate denominator w = (2 * Nj)^2 for QGDF derivative
|
|
443
|
+
# w = (2 * Nj)**2
|
|
444
|
+
# w = np.where(w == 0, eps, w)
|
|
445
|
+
|
|
446
|
+
# # QGDF PDF formula: dQGDF/dZ₀ = (1/SZ₀) * (1/(2 * Nⱼ²)) * [F2 - H2 + f̄_E * h̄_E * FH]
|
|
447
|
+
# numerator = f2s - h2s + f1 * h1 * fhs
|
|
448
|
+
# first_derivative = (1 / self.S_opt) * numerator / ( Nj**2)
|
|
449
|
+
|
|
450
|
+
# return first_derivative.flatten()
|
|
451
|
+
|
|
452
|
+
def _calculate_pdf_from_moments(self, fidelities, irrelevances):
|
|
453
|
+
self.logger.info("Calculating PDF from moments.")
|
|
454
|
+
"""Calculate PDF from fidelities and irrelevances with corrected mathematical formulation."""
|
|
455
|
+
self.logger.info("Calculating PDF from moments")
|
|
456
|
+
if fidelities is None or irrelevances is None:
|
|
457
|
+
# log error
|
|
458
|
+
self.logger.error("Fidelities and irrelevances must be calculated first.")
|
|
459
|
+
raise ValueError("Fidelities and irrelevances must be calculated first")
|
|
460
|
+
|
|
461
|
+
weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
|
|
462
|
+
|
|
463
|
+
# Numerical stability constants
|
|
464
|
+
max_safe_value = np.sqrt(np.finfo(float).max) / 10
|
|
465
|
+
min_safe_value = np.sqrt(np.finfo(float).eps) * 100
|
|
466
|
+
|
|
467
|
+
def safe_clip_for_pdf(values, name="values"):
|
|
468
|
+
"""Safely clip values for PDF calculations."""
|
|
469
|
+
values_magnitude = np.abs(values)
|
|
470
|
+
too_small_mask = values_magnitude < min_safe_value
|
|
471
|
+
too_large_mask = values_magnitude > max_safe_value
|
|
472
|
+
|
|
473
|
+
values_safe = np.where(too_small_mask,
|
|
474
|
+
np.sign(values) * min_safe_value, values)
|
|
475
|
+
values_safe = np.where(too_large_mask,
|
|
476
|
+
np.sign(values_safe) * max_safe_value, values_safe)
|
|
477
|
+
return values_safe
|
|
478
|
+
|
|
479
|
+
# Apply clipping
|
|
480
|
+
fidelities_safe = safe_clip_for_pdf(fidelities, "fidelity")
|
|
481
|
+
irrelevances_safe = safe_clip_for_pdf(irrelevances, "irrelevance")
|
|
482
|
+
|
|
483
|
+
# Calculate weighted means
|
|
484
|
+
mean_fidelity = np.sum(weights * fidelities_safe, axis=0) / np.sum(weights) # f̄Q
|
|
485
|
+
mean_irrelevance = np.sum(weights * irrelevances_safe, axis=0) / np.sum(weights) # h̄Q
|
|
486
|
+
|
|
487
|
+
# Apply safety to means
|
|
488
|
+
mean_fidelity = safe_clip_for_pdf(mean_fidelity, "mean_fidelity")
|
|
489
|
+
mean_irrelevance = safe_clip_for_pdf(mean_irrelevance, "mean_irrelevance")
|
|
490
|
+
|
|
491
|
+
# CORRECTED PDF CALCULATION FOR QGDF
|
|
492
|
+
# The PDF should be the derivative of QGDF with respect to the data points
|
|
493
|
+
# Based on QGDF = (1 + h_GQ)/2, where h_GQ = h̄Q/√(f̄Q² - h̄Q²)/√(1 + (h̄Q/√(f̄Q² - h̄Q²))²)
|
|
494
|
+
|
|
495
|
+
S_value = self.S_opt if hasattr(self, 'S_opt') else 1.0
|
|
496
|
+
|
|
497
|
+
# Calculate the denominator √(f̄Q² - h̄Q²) with protection
|
|
498
|
+
mean_fidelity_safe = np.where(np.abs(mean_fidelity) < min_safe_value,
|
|
499
|
+
np.sign(mean_fidelity) * min_safe_value, mean_fidelity)
|
|
500
|
+
|
|
501
|
+
# For QGDF, the correct mathematical relationship is different from what's implemented
|
|
502
|
+
# The PDF should be derived from d(QGDF)/dz, not from an empirical ratio formula
|
|
503
|
+
|
|
504
|
+
# Corrected approach: Use the mathematical derivative of the QGDF equation
|
|
505
|
+
# d(QGDF)/dz = (1/2) * d(h_GQ)/dz
|
|
506
|
+
|
|
507
|
+
# Calculate h_Z,j = h̄Q / √(f̄Q² - h̄Q²)
|
|
508
|
+
denominator_squared = mean_fidelity_safe**2 - mean_irrelevance**2
|
|
509
|
+
|
|
510
|
+
# Ensure denominator is positive and safe
|
|
511
|
+
denominator_squared = np.maximum(denominator_squared, min_safe_value)
|
|
512
|
+
denominator = np.sqrt(denominator_squared)
|
|
513
|
+
|
|
514
|
+
h_zj = mean_irrelevance / denominator
|
|
515
|
+
|
|
516
|
+
# Clip h_zj to avoid overflow
|
|
517
|
+
h_zj = np.clip(h_zj, 1, 1e12)
|
|
518
|
+
|
|
519
|
+
# Calculate h_GQ = h_Z,j / √(1 + h_Z,j²)
|
|
520
|
+
h_zj_squared = np.minimum(h_zj**2, max_safe_value) # Prevent overflow
|
|
521
|
+
h_gq_denominator = np.sqrt(1 + h_zj_squared)
|
|
522
|
+
h_gq = h_zj / h_gq_denominator
|
|
523
|
+
|
|
524
|
+
# For PDF calculation, we need the derivative of h_GQ with respect to z
|
|
525
|
+
# This involves second-order moments which should be calculated properly
|
|
526
|
+
|
|
527
|
+
# Second order moments (this is where the original method had issues)
|
|
528
|
+
f2 = np.sum(weights * fidelities_safe**2, axis=0) / np.sum(weights)
|
|
529
|
+
h2 = np.sum(weights * irrelevances_safe**2, axis=0) / np.sum(weights)
|
|
530
|
+
fh = np.sum(weights * fidelities_safe * irrelevances_safe, axis=0) / np.sum(weights)
|
|
531
|
+
|
|
532
|
+
# Apply safety to second moments
|
|
533
|
+
f2 = safe_clip_for_pdf(f2, "f2")
|
|
534
|
+
h2 = safe_clip_for_pdf(h2, "h2")
|
|
535
|
+
fh = safe_clip_for_pdf(fh, "fh")
|
|
536
|
+
|
|
537
|
+
# Corrected PDF formula for QGDF:
|
|
538
|
+
# PDF = (1/S) * derivative_term where derivative_term comes from differentiating h_GQ
|
|
539
|
+
|
|
540
|
+
# This is a simplified but more mathematically sound approach
|
|
541
|
+
# clip values to avoid overflow in multiplications [0, 1e12]
|
|
542
|
+
mean_irrelevance = np.clip(mean_irrelevance, 1, 1e12)
|
|
543
|
+
mean_fidelity = np.clip(mean_fidelity, 0, 1e12)
|
|
544
|
+
fh = np.clip(fh, -1e12, 1e12)
|
|
545
|
+
derivative_factor = f2 - h2 + mean_fidelity * mean_irrelevance * fh
|
|
546
|
+
|
|
547
|
+
# Apply scaling and ensure positive values
|
|
548
|
+
pdf_values = (1 / S_value) * np.maximum(derivative_factor, min_safe_value)
|
|
549
|
+
|
|
550
|
+
# Final clipping
|
|
551
|
+
pdf_values = np.clip(pdf_values, min_safe_value, max_safe_value)
|
|
552
|
+
|
|
553
|
+
return pdf_values.flatten()
|
|
554
|
+
|
|
555
|
+
def _calculate_final_results(self):
|
|
556
|
+
"""Calculate final QGDF and PDF with optimized parameters."""
|
|
557
|
+
self.logger.info("Calculating final QGDF and PDF results.")
|
|
558
|
+
# Convert to infinite domain
|
|
559
|
+
# zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
|
|
560
|
+
zi_d = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
|
|
561
|
+
self.zi = zi_d
|
|
562
|
+
|
|
563
|
+
# Calculate QGDF and get moments
|
|
564
|
+
qgdf_values, fj, hj = self._compute_qgdf_core(self.S_opt, self.LB_opt, self.UB_opt)
|
|
565
|
+
|
|
566
|
+
# Store for derivative calculations
|
|
567
|
+
self.fj = fj
|
|
568
|
+
self.hj = hj
|
|
569
|
+
self.qgdf = qgdf_values
|
|
570
|
+
self.pdf = self._calculate_pdf_from_moments(fj, hj)
|
|
571
|
+
|
|
572
|
+
if self.catch:
|
|
573
|
+
self.params.update({
|
|
574
|
+
'qgdf': self.qgdf.copy(),
|
|
575
|
+
'pdf': self.pdf.copy(),
|
|
576
|
+
'zi': self.zi.copy()
|
|
577
|
+
})
|
|
578
|
+
|
|
579
|
+
def _generate_smooth_curves(self):
|
|
580
|
+
"""Generate smooth curves for plotting and analysis."""
|
|
581
|
+
self.logger.info("Generating smooth curves for QGDF and PDF.")
|
|
582
|
+
try:
|
|
583
|
+
# Generate smooth QGDF and PDF
|
|
584
|
+
smooth_qgdf, self.smooth_fj, self.smooth_hj = self._compute_qgdf_core(
|
|
585
|
+
self.S_opt, self.LB_opt, self.UB_opt,
|
|
586
|
+
zi_data=self.z_points_n, zi_eval=self.z
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
smooth_pdf = self._calculate_pdf_from_moments(self.smooth_fj, self.smooth_hj)
|
|
590
|
+
|
|
591
|
+
self.qgdf_points = smooth_qgdf
|
|
592
|
+
self.pdf_points = smooth_pdf
|
|
593
|
+
|
|
594
|
+
# Store zi_n for derivative calculations
|
|
595
|
+
self.zi_n = DataConversion._convert_fininf(self.z_points_n, self.LB_opt, self.UB_opt)
|
|
596
|
+
|
|
597
|
+
# Mark as generated
|
|
598
|
+
self._computation_cache['smooth_curves_generated'] = True
|
|
599
|
+
|
|
600
|
+
if self.catch:
|
|
601
|
+
self.params.update({
|
|
602
|
+
'qgdf_points': self.qgdf_points.copy(),
|
|
603
|
+
'pdf_points': self.pdf_points.copy(),
|
|
604
|
+
'zi_points': self.zi_n.copy()
|
|
605
|
+
})
|
|
606
|
+
|
|
607
|
+
self.logger.info(f"Generated smooth curves with {self.n_points} points.")
|
|
608
|
+
|
|
609
|
+
except Exception as e:
|
|
610
|
+
# Log the error
|
|
611
|
+
error_msg = f"Could not generate smooth curves: {e}"
|
|
612
|
+
self.logger.error(error_msg)
|
|
613
|
+
self.params['errors'].append({
|
|
614
|
+
'method': '_generate_smooth_curves',
|
|
615
|
+
'error': error_msg,
|
|
616
|
+
'exception_type': type(e).__name__
|
|
617
|
+
})
|
|
618
|
+
|
|
619
|
+
self.logger.warning(f"Could not generate smooth curves: {e}")
|
|
620
|
+
# Create fallback points using original data
|
|
621
|
+
self.qgdf_points = self.qgdf.copy() if hasattr(self, 'qgdf') else None
|
|
622
|
+
self.pdf_points = self.pdf.copy() if hasattr(self, 'pdf') else None
|
|
623
|
+
self._computation_cache['smooth_curves_generated'] = False
|
|
624
|
+
|
|
625
|
+
def _get_results(self)-> dict:
|
|
626
|
+
"""Return fitting results."""
|
|
627
|
+
self.logger.info("Getting results from QGDF fitting.")
|
|
628
|
+
|
|
629
|
+
if not self._fitted:
|
|
630
|
+
error_msg = "Must fit QGDF before getting results."
|
|
631
|
+
self.logger.error(error_msg)
|
|
632
|
+
self.params['errors'].append({
|
|
633
|
+
'method': '_get_results',
|
|
634
|
+
'error': error_msg,
|
|
635
|
+
'exception_type': 'RuntimeError'
|
|
636
|
+
})
|
|
637
|
+
raise RuntimeError("Must fit QGDF before getting results.")
|
|
638
|
+
|
|
639
|
+
# selected key from params if exists
|
|
640
|
+
keys = ['DLB', 'DUB', 'LB', 'UB', 'S_opt', 'z0', 'qgdf', 'pdf',
|
|
641
|
+
'qgdf_points', 'pdf_points', 'zi', 'zi_points', 'weights']
|
|
642
|
+
results = {key: self.params.get(key) for key in keys if key in self.params}
|
|
643
|
+
return results
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
|
|
647
|
+
"""Enhanced plotting with better organization."""
|
|
648
|
+
self.logger.info("Plotting QGDF and PDF results.")
|
|
649
|
+
import matplotlib.pyplot as plt
|
|
650
|
+
|
|
651
|
+
if plot_smooth and (len(self.data) > self.max_data_size) and self.verbose:
|
|
652
|
+
self.logger.warning(f"Given data size ({len(self.data)}) exceeds max_data_size ({self.max_data_size}). For optimal compute performance, set 'plot_smooth=False', or 'max_data_size' to a larger value whichever is appropriate.")
|
|
653
|
+
|
|
654
|
+
if not self.catch:
|
|
655
|
+
self.logger.warning("Plot is not available with argument catch=False")
|
|
656
|
+
return
|
|
657
|
+
|
|
658
|
+
if not self._fitted:
|
|
659
|
+
self.logger.error("Must fit QGDF before plotting.")
|
|
660
|
+
raise RuntimeError("Must fit QGDF before plotting.")
|
|
661
|
+
|
|
662
|
+
# Validate plot parameter
|
|
663
|
+
if plot not in ['gdf', 'pdf', 'both']:
|
|
664
|
+
self.logger.error("Invalid plot parameter.")
|
|
665
|
+
raise ValueError("plot parameter must be 'gdf', 'pdf', or 'both'")
|
|
666
|
+
|
|
667
|
+
# Check data availability
|
|
668
|
+
if plot in ['gdf', 'both'] and self.params.get('qgdf') is None:
|
|
669
|
+
self.logger.error("QGDF must be calculated before plotting GDF.")
|
|
670
|
+
raise ValueError("QGDF must be calculated before plotting GDF")
|
|
671
|
+
if plot in ['pdf', 'both'] and self.params.get('pdf') is None:
|
|
672
|
+
self.logger.error("PDF must be calculated before plotting PDF.")
|
|
673
|
+
raise ValueError("PDF must be calculated before plotting PDF.")
|
|
674
|
+
|
|
675
|
+
# Prepare data
|
|
676
|
+
x_points = self.data
|
|
677
|
+
qgdf_plot = self.params.get('qgdf')
|
|
678
|
+
pdf_plot = self.params.get('pdf')
|
|
679
|
+
wedf = self.params.get('wedf')
|
|
680
|
+
ksdf = self.params.get('ksdf')
|
|
681
|
+
|
|
682
|
+
# Check smooth plotting availability
|
|
683
|
+
has_smooth = (hasattr(self, 'di_points_n') and hasattr(self, 'qgdf_points')
|
|
684
|
+
and hasattr(self, 'pdf_points') and self.di_points_n is not None
|
|
685
|
+
and self.qgdf_points is not None and self.pdf_points is not None)
|
|
686
|
+
plot_smooth = plot_smooth and has_smooth
|
|
687
|
+
|
|
688
|
+
# Create figure
|
|
689
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
690
|
+
|
|
691
|
+
# Plot QGDF if requested
|
|
692
|
+
if plot in ['gdf', 'both']:
|
|
693
|
+
self._plot_qgdf(ax1, x_points, qgdf_plot, plot_smooth, extra_df, wedf, ksdf)
|
|
694
|
+
|
|
695
|
+
# Plot PDF if requested
|
|
696
|
+
if plot in ['pdf', 'both']:
|
|
697
|
+
if plot == 'pdf':
|
|
698
|
+
self._plot_pdf(ax1, x_points, pdf_plot, plot_smooth, is_secondary=False)
|
|
699
|
+
else:
|
|
700
|
+
ax2 = ax1.twinx()
|
|
701
|
+
self._plot_pdf(ax2, x_points, pdf_plot, plot_smooth, is_secondary=True)
|
|
702
|
+
|
|
703
|
+
# Add bounds and formatting
|
|
704
|
+
self._add_plot_formatting(ax1, plot, bounds)
|
|
705
|
+
|
|
706
|
+
# Add Z0 vertical line if available
|
|
707
|
+
if hasattr(self, 'z0') and self.z0 is not None:
|
|
708
|
+
ax1.axvline(x=self.z0, color='magenta', linestyle='-.', linewidth=1,
|
|
709
|
+
alpha=0.8, label=f'Z0={self.z0:.3f}')
|
|
710
|
+
# Update legend to include Z0
|
|
711
|
+
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
|
|
712
|
+
|
|
713
|
+
plt.tight_layout()
|
|
714
|
+
plt.show()
|
|
715
|
+
|
|
716
|
+
def _plot_qgdf(self, ax, x_points, qgdf_plot, plot_smooth, extra_df, wedf, ksdf):
|
|
717
|
+
"""Plot QGDF components."""
|
|
718
|
+
self.logger.info("Plotting QGDF components.")
|
|
719
|
+
if plot_smooth and hasattr(self, 'qgdf_points') and self.qgdf_points is not None:
|
|
720
|
+
ax.plot(x_points, qgdf_plot, 'o', color='blue', label='QGDF', markersize=4)
|
|
721
|
+
ax.plot(self.di_points_n, self.qgdf_points, color='blue',
|
|
722
|
+
linestyle='-', linewidth=2, alpha=0.8)
|
|
723
|
+
else:
|
|
724
|
+
ax.plot(x_points, qgdf_plot, 'o-', color='blue', label='QGDF',
|
|
725
|
+
markersize=4, linewidth=1, alpha=0.8)
|
|
726
|
+
|
|
727
|
+
if extra_df:
|
|
728
|
+
if wedf is not None:
|
|
729
|
+
ax.plot(x_points, wedf, 's', color='lightblue',
|
|
730
|
+
label='WEDF', markersize=3, alpha=0.8)
|
|
731
|
+
if ksdf is not None:
|
|
732
|
+
ax.plot(x_points, ksdf, 's', color='cyan',
|
|
733
|
+
label='KS Points', markersize=3, alpha=0.8)
|
|
734
|
+
|
|
735
|
+
ax.set_ylabel('QGDF', color='blue')
|
|
736
|
+
ax.tick_params(axis='y', labelcolor='blue')
|
|
737
|
+
ax.set_ylim(0, 1)
|
|
738
|
+
|
|
739
|
+
def _plot_pdf(self, ax, x_points, pdf_plot, plot_smooth, is_secondary=False):
|
|
740
|
+
"""Plot PDF components."""
|
|
741
|
+
self.logger.info("Plotting PDF components.")
|
|
742
|
+
color = 'red'
|
|
743
|
+
|
|
744
|
+
if plot_smooth and hasattr(self, 'pdf_points') and self.pdf_points is not None:
|
|
745
|
+
ax.plot(x_points, pdf_plot, 'o', color=color, label='PDF', markersize=4)
|
|
746
|
+
ax.plot(self.di_points_n, self.pdf_points, color=color,
|
|
747
|
+
linestyle='-', linewidth=2, alpha=0.8)
|
|
748
|
+
max_pdf = np.max(self.pdf_points)
|
|
749
|
+
else:
|
|
750
|
+
ax.plot(x_points, pdf_plot, 'o-', color=color, label='PDF',
|
|
751
|
+
markersize=4, linewidth=1, alpha=0.8)
|
|
752
|
+
max_pdf = np.max(pdf_plot)
|
|
753
|
+
|
|
754
|
+
ax.set_ylabel('PDF', color=color)
|
|
755
|
+
ax.tick_params(axis='y', labelcolor=color)
|
|
756
|
+
ax.set_ylim(0, max_pdf * 1.1)
|
|
757
|
+
|
|
758
|
+
if is_secondary:
|
|
759
|
+
ax.legend(loc='upper right', bbox_to_anchor=(1, 1))
|
|
760
|
+
|
|
761
|
+
def _add_plot_formatting(self, ax1, plot, bounds):
|
|
762
|
+
"""Add formatting, bounds, and legends to plot."""
|
|
763
|
+
self.logger.info("Adding plot formatting and bounds.")
|
|
764
|
+
ax1.set_xlabel('Data Points')
|
|
765
|
+
|
|
766
|
+
# Add bounds if requested
|
|
767
|
+
if bounds:
|
|
768
|
+
bound_info = [
|
|
769
|
+
(self.params.get('DLB'), 'green', '-', 'DLB'),
|
|
770
|
+
(self.params.get('DUB'), 'orange', '-', 'DUB'),
|
|
771
|
+
(self.params.get('LB'), 'purple', '--', 'LB'),
|
|
772
|
+
(self.params.get('UB'), 'brown', '--', 'UB')
|
|
773
|
+
]
|
|
774
|
+
|
|
775
|
+
for bound, color, style, name in bound_info:
|
|
776
|
+
if bound is not None:
|
|
777
|
+
ax1.axvline(x=bound, color=color, linestyle=style, linewidth=2,
|
|
778
|
+
alpha=0.8, label=f"{name}={bound:.3f}")
|
|
779
|
+
|
|
780
|
+
# Add shaded regions
|
|
781
|
+
if self.params.get('LB') is not None:
|
|
782
|
+
ax1.axvspan(self.data.min(), self.params['LB'], alpha=0.15, color='purple')
|
|
783
|
+
if self.params.get('UB') is not None:
|
|
784
|
+
ax1.axvspan(self.params['UB'], self.data.max(), alpha=0.15, color='brown')
|
|
785
|
+
|
|
786
|
+
# Set limits and add grid
|
|
787
|
+
data_range = self.params['DUB'] - self.params['DLB']
|
|
788
|
+
padding = data_range * 0.1
|
|
789
|
+
ax1.set_xlim(self.params['DLB'] - padding, self.params['DUB'] + padding)
|
|
790
|
+
|
|
791
|
+
# Set title
|
|
792
|
+
titles = {
|
|
793
|
+
'gdf': 'QGDF' + (' with Bounds' if bounds else ''),
|
|
794
|
+
'pdf': 'PDF' + (' with Bounds' if bounds else ''),
|
|
795
|
+
'both': 'QGDF and PDF' + (' with Bounds' if bounds else '')
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
ax1.set_title(titles[plot])
|
|
799
|
+
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
|
|
800
|
+
ax1.grid(True, alpha=0.3)
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
def _get_qgdf_second_derivative(self):
|
|
804
|
+
"""Calculate second derivative of QGDF with corrected mathematical formulation."""
|
|
805
|
+
self.logger.info("Calculating second derivative of QGDF.")
|
|
806
|
+
if self.fj is None or self.hj is None:
|
|
807
|
+
self.logger.error("Fidelities and irrelevances must be calculated before second derivative estimation.")
|
|
808
|
+
raise ValueError("Fidelities and irrelevances must be calculated before second derivative estimation.")
|
|
809
|
+
|
|
810
|
+
weights = self.weights.reshape(-1, 1)
|
|
811
|
+
|
|
812
|
+
# Calculate all required moments
|
|
813
|
+
f1 = np.sum(weights * self.fj, axis=0) / np.sum(weights) # f̄Q
|
|
814
|
+
h1 = np.sum(weights * self.hj, axis=0) / np.sum(weights) # h̄Q
|
|
815
|
+
f2 = np.sum(weights * self.fj**2, axis=0) / np.sum(weights)
|
|
816
|
+
h2 = np.sum(weights * self.hj**2, axis=0) / np.sum(weights)
|
|
817
|
+
fh = np.sum(weights * self.fj * self.hj, axis=0) / np.sum(weights)
|
|
818
|
+
|
|
819
|
+
# Additional moments for second derivative
|
|
820
|
+
f3 = np.sum(weights * self.fj**3, axis=0) / np.sum(weights)
|
|
821
|
+
h3 = np.sum(weights * self.hj**3, axis=0) / np.sum(weights)
|
|
822
|
+
f2h = np.sum(weights * self.fj**2 * self.hj, axis=0) / np.sum(weights)
|
|
823
|
+
fh2 = np.sum(weights * self.fj * self.hj**2, axis=0) / np.sum(weights)
|
|
824
|
+
|
|
825
|
+
eps = np.finfo(float).eps
|
|
826
|
+
f1_safe = np.where(np.abs(f1) < eps, np.sign(f1) * eps, f1)
|
|
827
|
+
|
|
828
|
+
# CORRECTED: Based on the actual QGDF equation QGDF = (1 + h_GQ)/2
|
|
829
|
+
# where h_GQ = h_zj / √(1 + h_zj²) and h_zj = h̄Q / √(f̄Q² - h̄Q²)
|
|
830
|
+
|
|
831
|
+
# Calculate first derivatives of weighted means
|
|
832
|
+
# These are derived from the variance-covariance relationships
|
|
833
|
+
df1_dz = (f2 - f1**2) / self.S_opt # Corrected: variance formula
|
|
834
|
+
dh1_dz = (h2 - h1**2) / self.S_opt # Corrected: variance formula
|
|
835
|
+
|
|
836
|
+
# Calculate second derivatives
|
|
837
|
+
d2f1_dz2 = (f3 - 3*f1*f2 + 2*f1**3) / (self.S_opt**2) # Third central moment
|
|
838
|
+
d2h1_dz2 = (h3 - 3*h1*h2 + 2*h1**3) / (self.S_opt**2) # Third central moment
|
|
839
|
+
|
|
840
|
+
# Calculate derivatives of h_zj = h̄Q / √(f̄Q² - h̄Q²)
|
|
841
|
+
denominator_squared = f1_safe**2 - h1**2
|
|
842
|
+
denominator_squared = np.maximum(denominator_squared, eps)
|
|
843
|
+
denominator = np.sqrt(denominator_squared)
|
|
844
|
+
|
|
845
|
+
h_zj = h1 / denominator
|
|
846
|
+
|
|
847
|
+
# First derivative of h_zj using quotient rule
|
|
848
|
+
d_numerator = dh1_dz
|
|
849
|
+
d_denominator = (f1_safe * df1_dz - h1 * dh1_dz) / denominator
|
|
850
|
+
|
|
851
|
+
dh_zj_dz = (d_numerator * denominator - h_zj * d_denominator) / denominator
|
|
852
|
+
|
|
853
|
+
# Second derivative of h_zj (more complex)
|
|
854
|
+
d2_numerator = d2h1_dz2
|
|
855
|
+
# For d²(denominator), we need more careful calculation
|
|
856
|
+
temp_term = f1_safe * d2f1_dz2 - h1 * d2h1_dz2 - df1_dz**2 - dh1_dz**2
|
|
857
|
+
d2_denominator = (temp_term * denominator - d_denominator**2) / denominator
|
|
858
|
+
|
|
859
|
+
d2h_zj_dz2 = ((d2_numerator * denominator - d_numerator * d_denominator) * denominator -
|
|
860
|
+
(d_numerator * denominator - h_zj * d_denominator) * d_denominator) / (denominator**2)
|
|
861
|
+
|
|
862
|
+
# Calculate derivatives of h_GQ = h_zj / √(1 + h_zj²)
|
|
863
|
+
h_zj_squared = np.minimum(h_zj**2, 1e10) # Prevent overflow
|
|
864
|
+
h_gq_denominator = np.sqrt(1 + h_zj_squared)
|
|
865
|
+
|
|
866
|
+
# First derivative of h_GQ
|
|
867
|
+
dh_gq_dz = dh_zj_dz / (h_gq_denominator**3)
|
|
868
|
+
|
|
869
|
+
# Second derivative of h_GQ
|
|
870
|
+
term1 = d2h_zj_dz2 / (h_gq_denominator**3)
|
|
871
|
+
term2 = -3 * dh_zj_dz**2 * h_zj / (h_gq_denominator**5)
|
|
872
|
+
|
|
873
|
+
d2h_gq_dz2 = term1 + term2
|
|
874
|
+
|
|
875
|
+
# Finally, second derivative of QGDF = (1/2) * d²(h_GQ)/dz²
|
|
876
|
+
second_derivative = 0.5 * d2h_gq_dz2
|
|
877
|
+
|
|
878
|
+
return second_derivative.flatten()
|
|
879
|
+
|
|
880
|
+
def _get_qgdf_third_derivative(self):
|
|
881
|
+
"""Calculate third derivative of QGDF with corrected mathematical formulation."""
|
|
882
|
+
self.logger.info("Calculating third derivative of QGDF.")
|
|
883
|
+
if self.fj is None or self.hj is None:
|
|
884
|
+
self.logger.error("Fidelities and irrelevances must be calculated before third derivative estimation.")
|
|
885
|
+
raise ValueError("Fidelities and irrelevances must be calculated before third derivative estimation.")
|
|
886
|
+
|
|
887
|
+
weights = self.weights.reshape(-1, 1)
|
|
888
|
+
|
|
889
|
+
# Calculate all required moments up to 4th order
|
|
890
|
+
f1 = np.sum(weights * self.fj, axis=0) / np.sum(weights)
|
|
891
|
+
h1 = np.sum(weights * self.hj, axis=0) / np.sum(weights)
|
|
892
|
+
f2 = np.sum(weights * self.fj**2, axis=0) / np.sum(weights)
|
|
893
|
+
h2 = np.sum(weights * self.hj**2, axis=0) / np.sum(weights)
|
|
894
|
+
f3 = np.sum(weights * self.fj**3, axis=0) / np.sum(weights)
|
|
895
|
+
h3 = np.sum(weights * self.hj**3, axis=0) / np.sum(weights)
|
|
896
|
+
f4 = np.sum(weights * self.fj**4, axis=0) / np.sum(weights)
|
|
897
|
+
h4 = np.sum(weights * self.hj**4, axis=0) / np.sum(weights)
|
|
898
|
+
|
|
899
|
+
eps = np.finfo(float).eps
|
|
900
|
+
f1_safe = np.where(np.abs(f1) < eps, np.sign(f1) * eps, f1)
|
|
901
|
+
|
|
902
|
+
# Calculate derivatives up to third order
|
|
903
|
+
df1_dz = (f2 - f1**2) / self.S_opt
|
|
904
|
+
dh1_dz = (h2 - h1**2) / self.S_opt
|
|
905
|
+
|
|
906
|
+
d2f1_dz2 = (f3 - 3*f1*f2 + 2*f1**3) / (self.S_opt**2)
|
|
907
|
+
d2h1_dz2 = (h3 - 3*h1*h2 + 2*h1**3) / (self.S_opt**2)
|
|
908
|
+
|
|
909
|
+
d3f1_dz3 = (f4 - 4*f1*f3 + 6*f1**2*f2 - 3*f1**4) / (self.S_opt**3)
|
|
910
|
+
d3h1_dz3 = (h4 - 4*h1*h3 + 6*h1**2*h2 - 3*h1**4) / (self.S_opt**3)
|
|
911
|
+
|
|
912
|
+
# Calculate h_zj and its derivatives (simplified approach)
|
|
913
|
+
denominator_squared = f1_safe**2 - h1**2
|
|
914
|
+
denominator_squared = np.maximum(denominator_squared, eps)
|
|
915
|
+
denominator = np.sqrt(denominator_squared)
|
|
916
|
+
|
|
917
|
+
h_zj = h1 / denominator
|
|
918
|
+
|
|
919
|
+
# For third derivative, use numerical differentiation as analytical form is extremely complex
|
|
920
|
+
h = 1e-6 * np.std(self.data) if np.std(self.data) > 0 else 1e-6
|
|
921
|
+
|
|
922
|
+
# Store original values
|
|
923
|
+
original_zi = self.zi.copy()
|
|
924
|
+
original_fi = self.fj.copy()
|
|
925
|
+
original_hi = self.hj.copy()
|
|
926
|
+
|
|
927
|
+
try:
|
|
928
|
+
# Calculate second derivative at nearby points
|
|
929
|
+
second_derivs = []
|
|
930
|
+
points = [-h, 0, h]
|
|
931
|
+
|
|
932
|
+
for delta in points:
|
|
933
|
+
self.zi = original_zi + delta
|
|
934
|
+
self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
|
|
935
|
+
second_deriv = self._get_qgdf_second_derivative()
|
|
936
|
+
second_derivs.append(second_deriv)
|
|
937
|
+
|
|
938
|
+
# Use finite difference formula for third derivative
|
|
939
|
+
# f'''(x) ≈ [f''(x+h) - f''(x-h)] / (2h)
|
|
940
|
+
third_derivative = (second_derivs[2] - second_derivs[0]) / (2 * h)
|
|
941
|
+
|
|
942
|
+
return third_derivative.flatten()
|
|
943
|
+
|
|
944
|
+
finally:
|
|
945
|
+
# Always restore original state
|
|
946
|
+
self.zi = original_zi
|
|
947
|
+
self.fj = original_fi
|
|
948
|
+
self.hj = original_hi
|
|
949
|
+
|
|
950
|
+
def _get_qgdf_fourth_derivative(self):
|
|
951
|
+
"""Calculate fourth derivative of QGDF using corrected numerical differentiation."""
|
|
952
|
+
self.logger.info("Calculating fourth derivative of QGDF.")
|
|
953
|
+
if self.fj is None or self.hj is None:
|
|
954
|
+
self.logger.error("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
|
|
955
|
+
raise ValueError("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
|
|
956
|
+
|
|
957
|
+
# Use adaptive step size based on data scale
|
|
958
|
+
data_scale = np.std(self.data) if np.std(self.data) > 0 else 1.0
|
|
959
|
+
h = max(1e-6 * data_scale, 1e-10)
|
|
960
|
+
|
|
961
|
+
# Store original state
|
|
962
|
+
original_fi = self.fj.copy()
|
|
963
|
+
original_hi = self.hj.copy()
|
|
964
|
+
original_zi = self.zi.copy()
|
|
965
|
+
|
|
966
|
+
try:
|
|
967
|
+
# Use 5-point stencil for better accuracy
|
|
968
|
+
# f''''(x) ≈ [f'''(x-2h) - 8f'''(x-h) + 8f'''(x+h) - f'''(x+2h)] / (12h)
|
|
969
|
+
points = [-2*h, -h, 0, h, 2*h]
|
|
970
|
+
third_derivatives = []
|
|
971
|
+
|
|
972
|
+
for delta in points:
|
|
973
|
+
self.zi = original_zi + delta
|
|
974
|
+
self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
|
|
975
|
+
third_deriv = self._get_qgdf_third_derivative()
|
|
976
|
+
third_derivatives.append(third_deriv)
|
|
977
|
+
|
|
978
|
+
# Apply 5-point finite difference formula
|
|
979
|
+
fourth_derivative = (third_derivatives[0] - 8*third_derivatives[1] +
|
|
980
|
+
8*third_derivatives[3] - third_derivatives[4]) / (12*h)
|
|
981
|
+
|
|
982
|
+
# REMOVED THE INCORRECT MULTIPLICATION BY self.zi
|
|
983
|
+
# The original code incorrectly multiplied by self.zi
|
|
984
|
+
|
|
985
|
+
return fourth_derivative.flatten()
|
|
986
|
+
|
|
987
|
+
finally:
|
|
988
|
+
# Always restore original state
|
|
989
|
+
self.fj = original_fi
|
|
990
|
+
self.hj = original_hi
|
|
991
|
+
self.zi = original_zi
|
|
992
|
+
|
|
993
|
+
def _calculate_fidelities_irrelevances_at_given_zi_corrected(self, zi):
|
|
994
|
+
"""Helper method to recalculate fidelities and irrelevances for current zi."""
|
|
995
|
+
self.logger.info("Calculating fidelities and irrelevances at given zi.")
|
|
996
|
+
# FIXED: Convert the data points to infinite domain, not the evaluation points
|
|
997
|
+
zi_data = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt) # Data points
|
|
998
|
+
zi_eval = DataConversion._convert_fininf(zi, self.LB_opt, self.UB_opt) # Evaluation points
|
|
999
|
+
|
|
1000
|
+
# Calculate R matrix with proper dimensions
|
|
1001
|
+
eps = np.finfo(float).eps
|
|
1002
|
+
R = zi_eval.reshape(-1, 1) / (zi_data.reshape(1, -1) + eps)
|
|
1003
|
+
|
|
1004
|
+
# Get characteristics
|
|
1005
|
+
gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
|
|
1006
|
+
q, q1 = gc._get_q_q1(S=self.S_opt)
|
|
1007
|
+
|
|
1008
|
+
# Store fidelities and irrelevances
|
|
1009
|
+
self.fj = gc._fj(q=q, q1=q1)
|
|
1010
|
+
self.hj = gc._hj(q=q, q1=q1)
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def _fit_qgdf(self, plot: bool = False):
|
|
1014
|
+
"""Fit the QGDF to the data."""
|
|
1015
|
+
self.logger.info("Starting QGDF fitting process.")
|
|
1016
|
+
try:
|
|
1017
|
+
|
|
1018
|
+
# Step 1: Data preprocessing
|
|
1019
|
+
self.logger.info("Preprocessing data for QGDF fitting.")
|
|
1020
|
+
self.data = np.sort(self.data)
|
|
1021
|
+
self._estimate_data_bounds()
|
|
1022
|
+
self._transform_data_to_standard_domain()
|
|
1023
|
+
self._estimate_weights()
|
|
1024
|
+
|
|
1025
|
+
# Step 2: Bounds estimation
|
|
1026
|
+
self.logger.info("Estimating initial probable bounds.")
|
|
1027
|
+
self._estimate_initial_probable_bounds()
|
|
1028
|
+
self._generate_evaluation_points()
|
|
1029
|
+
|
|
1030
|
+
# Step 3: Get distribution function values for optimization
|
|
1031
|
+
self.logger.info("Getting distribution function values for optimization.")
|
|
1032
|
+
self.df_values = self._get_distribution_function_values(use_wedf=self.wedf)
|
|
1033
|
+
|
|
1034
|
+
# Step 4: Parameter optimization
|
|
1035
|
+
self.logger.info("Optimizing QGDF parameters.")
|
|
1036
|
+
self._determine_optimization_strategy(egdf=False) # NOTE for QGDF egdf is False
|
|
1037
|
+
|
|
1038
|
+
# Step 5: Calculate final QGDF and PDF
|
|
1039
|
+
self.logger.info("Calculating final QGDF and PDF with optimized parameters.")
|
|
1040
|
+
self._calculate_final_results()
|
|
1041
|
+
|
|
1042
|
+
# Step 6: Generate smooth curves for plotting and analysis
|
|
1043
|
+
self.logger.info("Generating smooth curves for QGDF and PDF.")
|
|
1044
|
+
self._generate_smooth_curves()
|
|
1045
|
+
|
|
1046
|
+
# Step 7: Transform bounds back to original domain
|
|
1047
|
+
self.logger.info("Transforming bounds back to original domain.")
|
|
1048
|
+
self._transform_bounds_to_original_domain()
|
|
1049
|
+
# Mark as fitted (Step 8 is now optional via marginal_analysis())
|
|
1050
|
+
self._fitted = True
|
|
1051
|
+
|
|
1052
|
+
# Step 8: Z0 estimate with Z0Estimator
|
|
1053
|
+
self.logger.info("Estimating Z0 point with Z0Estimator.")
|
|
1054
|
+
self._compute_z0(optimize=self.z0_optimize)
|
|
1055
|
+
# derivatives calculation
|
|
1056
|
+
# self._calculate_all_derivatives()
|
|
1057
|
+
|
|
1058
|
+
self.logger.info("QGDF fitting completed successfully.")
|
|
1059
|
+
|
|
1060
|
+
if plot:
|
|
1061
|
+
self.logger.info("Plotting QGDF and PDF.")
|
|
1062
|
+
self._plot()
|
|
1063
|
+
|
|
1064
|
+
# clean up computation cache
|
|
1065
|
+
if self.flush:
|
|
1066
|
+
self.logger.info("Cleaning up computation cache.")
|
|
1067
|
+
self._cleanup_computation_cache()
|
|
1068
|
+
|
|
1069
|
+
except Exception as e:
|
|
1070
|
+
error_msg = f"QGDF fitting failed: {e}"
|
|
1071
|
+
self.logger.error(error_msg)
|
|
1072
|
+
self.params['errors'].append({
|
|
1073
|
+
'method': '_fit_QGDF',
|
|
1074
|
+
'error': error_msg,
|
|
1075
|
+
'exception_type': type(e).__name__
|
|
1076
|
+
})
|
|
1077
|
+
|
|
1078
|
+
self.logger.error(f"Error during QGDF fitting: {e}")
|
|
1079
|
+
raise e
|
|
1080
|
+
|
|
1081
|
+
# z0 compute
|
|
1082
|
+
def _compute_z0(self, optimize: bool = None):
|
|
1083
|
+
"""
|
|
1084
|
+
Compute the Z0 point where PDF is maximum using the Z0Estimator class.
|
|
1085
|
+
|
|
1086
|
+
Parameters:
|
|
1087
|
+
-----------
|
|
1088
|
+
optimize : bool, optional
|
|
1089
|
+
If True, use interpolation-based methods for higher accuracy.
|
|
1090
|
+
If False, use simple linear search on existing points.
|
|
1091
|
+
If None, uses the instance's z0_optimize setting.
|
|
1092
|
+
"""
|
|
1093
|
+
self.logger.info("Computing Z0 point using Z0Estimator.")
|
|
1094
|
+
|
|
1095
|
+
if self.z is None:
|
|
1096
|
+
self.logger.error("Data must be transformed (self.z) before Z0 estimation.")
|
|
1097
|
+
raise ValueError("Data must be transformed (self.z) before Z0 estimation.")
|
|
1098
|
+
|
|
1099
|
+
# Use provided optimize parameter or fall back to instance setting
|
|
1100
|
+
use_optimize = optimize if optimize is not None else self.z0_optimize
|
|
1101
|
+
|
|
1102
|
+
self.logger.info('QGDF: Computing Z0 point using Z0Estimator...')
|
|
1103
|
+
|
|
1104
|
+
try:
|
|
1105
|
+
# Create Z0Estimator instance with proper constructor signature
|
|
1106
|
+
z0_estimator = Z0Estimator(
|
|
1107
|
+
gdf_object=self, # Pass the QGDF object itself
|
|
1108
|
+
optimize=use_optimize,
|
|
1109
|
+
verbose=self.verbose
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
# Call fit() method to estimate Z0
|
|
1113
|
+
self.z0 = z0_estimator.fit()
|
|
1114
|
+
|
|
1115
|
+
# Get estimation info for debugging and storage
|
|
1116
|
+
if self.catch:
|
|
1117
|
+
estimation_info = z0_estimator.get_estimation_info()
|
|
1118
|
+
self.params.update({
|
|
1119
|
+
'z0': float(self.z0) if self.z0 is not None else None,
|
|
1120
|
+
'z0_method': estimation_info.get('z0_method', 'unknown'),
|
|
1121
|
+
'z0_estimation_info': estimation_info
|
|
1122
|
+
})
|
|
1123
|
+
|
|
1124
|
+
method_used = z0_estimator.get_estimation_info().get('z0_method', 'unknown')
|
|
1125
|
+
self.logger.info(f'QGDF: Z0 point computed successfully, (method: {method_used})')
|
|
1126
|
+
|
|
1127
|
+
except Exception as e:
|
|
1128
|
+
# Log the error
|
|
1129
|
+
error_msg = f"Z0 estimation failed: {str(e)}"
|
|
1130
|
+
self.params['errors'].append({
|
|
1131
|
+
'method': '_compute_z0',
|
|
1132
|
+
'error': error_msg,
|
|
1133
|
+
'exception_type': type(e).__name__
|
|
1134
|
+
})
|
|
1135
|
+
|
|
1136
|
+
self.logger.warning(f"Warning: Z0Estimator failed with error: {e}")
|
|
1137
|
+
self.logger.info("Falling back to simple maximum finding...")
|
|
1138
|
+
|
|
1139
|
+
# Fallback to simple maximum finding
|
|
1140
|
+
self._compute_z0_fallback()
|
|
1141
|
+
|
|
1142
|
+
if self.catch:
|
|
1143
|
+
self.params.update({
|
|
1144
|
+
'z0': float(self.z0),
|
|
1145
|
+
'z0_method': 'fallback_simple_maximum',
|
|
1146
|
+
'z0_estimation_info': {'error': str(e)}
|
|
1147
|
+
})
|
|
1148
|
+
|
|
1149
|
+
def _compute_z0_fallback(self):
|
|
1150
|
+
"""
|
|
1151
|
+
Fallback method for Z0 computation using simple maximum finding.
|
|
1152
|
+
"""
|
|
1153
|
+
if not hasattr(self, 'di_points_n') or not hasattr(self, 'pdf_points'):
|
|
1154
|
+
self.logger.error("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
|
|
1155
|
+
raise ValueError("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
|
|
1156
|
+
|
|
1157
|
+
self.logger.info('Using fallback method for Z0 point...')
|
|
1158
|
+
|
|
1159
|
+
# Find index with maximum PDF
|
|
1160
|
+
max_idx = np.argmax(self.pdf_points)
|
|
1161
|
+
self.z0 = self.di_points_n[max_idx]
|
|
1162
|
+
|
|
1163
|
+
self.logger.info(f"Z0 point (fallback method).")
|
|
1164
|
+
|
|
1165
|
+
def analyze_z0(self, figsize: tuple = (12, 6)) -> Dict[str, Any]:
|
|
1166
|
+
"""
|
|
1167
|
+
Analyze and visualize Z0 estimation results.
|
|
1168
|
+
|
|
1169
|
+
Parameters:
|
|
1170
|
+
-----------
|
|
1171
|
+
figsize : tuple
|
|
1172
|
+
Figure size for the plot
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
--------
|
|
1176
|
+
Dict[str, Any]
|
|
1177
|
+
Z0 analysis information
|
|
1178
|
+
"""
|
|
1179
|
+
self.logger.info("Analyzing Z0 estimation results.")
|
|
1180
|
+
if not hasattr(self, 'z0') or self.z0 is None:
|
|
1181
|
+
self.logger.error("Z0 must be computed before analysis. Call fit() first.")
|
|
1182
|
+
raise ValueError("Z0 must be computed before analysis. Call fit() first.")
|
|
1183
|
+
|
|
1184
|
+
# Create Z0Estimator for analysis
|
|
1185
|
+
z0_estimator = Z0Estimator(
|
|
1186
|
+
gdf_object=self,
|
|
1187
|
+
optimize=self.z0_optimize,
|
|
1188
|
+
verbose=self.verbose
|
|
1189
|
+
)
|
|
1190
|
+
|
|
1191
|
+
# Re-estimate for analysis (this is safe since it's already computed)
|
|
1192
|
+
z0_estimator.fit()
|
|
1193
|
+
|
|
1194
|
+
# Get detailed info
|
|
1195
|
+
analysis_info = z0_estimator.get_estimation_info()
|
|
1196
|
+
|
|
1197
|
+
# Create visualization
|
|
1198
|
+
z0_estimator.plot_z0_analysis(figsize=figsize)
|
|
1199
|
+
|
|
1200
|
+
return analysis_info
|
|
1201
|
+
|
|
1202
|
+
def _calculate_all_derivatives(self):
|
|
1203
|
+
"""Calculate all derivatives and store in params."""
|
|
1204
|
+
self.logger.info("Calculating all QGDF derivatives.")
|
|
1205
|
+
if not self._fitted:
|
|
1206
|
+
self.logger.error("Must fit QGDF before calculating derivatives.")
|
|
1207
|
+
raise RuntimeError("Must fit QGDF before calculating derivatives.")
|
|
1208
|
+
|
|
1209
|
+
try:
|
|
1210
|
+
# Calculate derivatives using analytical methods
|
|
1211
|
+
second_deriv = self._get_qgdf_second_derivative()
|
|
1212
|
+
third_deriv = self._get_qgdf_third_derivative()
|
|
1213
|
+
fourth_deriv = self._get_qgdf_fourth_derivative()
|
|
1214
|
+
|
|
1215
|
+
# Store in params
|
|
1216
|
+
if self.catch:
|
|
1217
|
+
self.params.update({
|
|
1218
|
+
'second_derivative': second_deriv.copy(),
|
|
1219
|
+
'third_derivative': third_deriv.copy(),
|
|
1220
|
+
'fourth_derivative': fourth_deriv.copy()
|
|
1221
|
+
})
|
|
1222
|
+
|
|
1223
|
+
self.logger.info("QGDF derivatives calculated and stored successfully.")
|
|
1224
|
+
|
|
1225
|
+
except Exception as e:
|
|
1226
|
+
# Log error
|
|
1227
|
+
error_msg = f"Derivative calculation failed: {e}"
|
|
1228
|
+
self.logger.error(error_msg)
|
|
1229
|
+
self.params['errors'].append({
|
|
1230
|
+
'method': '_calculate_all_derivatives',
|
|
1231
|
+
'error': error_msg,
|
|
1232
|
+
'exception_type': type(e).__name__
|
|
1233
|
+
})
|
|
1234
|
+
self.logger.warning(f"Could not calculate derivatives: {e}")
|