machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,823 @@
|
|
|
1
|
+
"""
|
|
2
|
+
base class for EGDF
|
|
3
|
+
EGDF - Estimating Global Distribution Function.
|
|
4
|
+
|
|
5
|
+
Author: Nirmal Parmar
|
|
6
|
+
Machine Gnostics
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import warnings
|
|
11
|
+
import logging
|
|
12
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
13
|
+
from typing import Dict, Any
|
|
14
|
+
from scipy.optimize import minimize
|
|
15
|
+
from machinegnostics.magcal.characteristics import GnosticsCharacteristics
|
|
16
|
+
from machinegnostics.magcal.data_conversion import DataConversion
|
|
17
|
+
from machinegnostics.magcal.gdf.base_distfunc import BaseDistFuncCompute
|
|
18
|
+
from machinegnostics.magcal.gdf.z0_estimator import Z0Estimator
|
|
19
|
+
|
|
20
|
+
class BaseEGDF(BaseDistFuncCompute):
|
|
21
|
+
"""
|
|
22
|
+
Base class for EGDF (Estimating Global Distribution Function).
|
|
23
|
+
|
|
24
|
+
This class provides a comprehensive framework for estimating global distribution
|
|
25
|
+
functions with optimization capabilities and derivative analysis.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self,
|
|
29
|
+
data: np.ndarray,
|
|
30
|
+
DLB: float = None,
|
|
31
|
+
DUB: float = None,
|
|
32
|
+
LB: float = None,
|
|
33
|
+
UB: float = None,
|
|
34
|
+
S = 'auto',
|
|
35
|
+
z0_optimize: bool = True,
|
|
36
|
+
tolerance: float = 1e-3,
|
|
37
|
+
data_form: str = 'a',
|
|
38
|
+
n_points: int = 500,
|
|
39
|
+
homogeneous: bool = True,
|
|
40
|
+
catch: bool = True,
|
|
41
|
+
weights: np.ndarray = None,
|
|
42
|
+
wedf: bool = True,
|
|
43
|
+
opt_method: str = 'L-BFGS-B',
|
|
44
|
+
verbose: bool = False,
|
|
45
|
+
max_data_size: int = 1000,
|
|
46
|
+
flush: bool = True):
|
|
47
|
+
super().__init__(data=data,
|
|
48
|
+
DLB=DLB,
|
|
49
|
+
DUB=DUB,
|
|
50
|
+
LB=LB,
|
|
51
|
+
UB=UB,
|
|
52
|
+
S=S,
|
|
53
|
+
z0_optimize=z0_optimize,
|
|
54
|
+
varS=False, # NOTE for EGDfF varS is always False
|
|
55
|
+
tolerance=tolerance,
|
|
56
|
+
data_form=data_form,
|
|
57
|
+
n_points=n_points,
|
|
58
|
+
homogeneous=homogeneous,
|
|
59
|
+
catch=catch,
|
|
60
|
+
weights=weights,
|
|
61
|
+
wedf=wedf,
|
|
62
|
+
opt_method=opt_method,
|
|
63
|
+
verbose=verbose,
|
|
64
|
+
max_data_size=max_data_size,
|
|
65
|
+
flush=flush)
|
|
66
|
+
|
|
67
|
+
# Store raw inputs
|
|
68
|
+
self.data = data
|
|
69
|
+
self.DLB = DLB
|
|
70
|
+
self.DUB = DUB
|
|
71
|
+
self.LB = LB
|
|
72
|
+
self.UB = UB
|
|
73
|
+
self.S = S
|
|
74
|
+
self.z0_optimize = z0_optimize
|
|
75
|
+
|
|
76
|
+
self.tolerance = tolerance
|
|
77
|
+
self.data_form = data_form
|
|
78
|
+
self.n_points = n_points
|
|
79
|
+
self.homogeneous = homogeneous
|
|
80
|
+
self.catch = catch
|
|
81
|
+
self.weights = weights if weights is not None else np.ones_like(data)
|
|
82
|
+
self.wedf = wedf
|
|
83
|
+
self.opt_method = opt_method
|
|
84
|
+
self.verbose = verbose
|
|
85
|
+
self.max_data_size = max_data_size
|
|
86
|
+
self.flush = flush
|
|
87
|
+
|
|
88
|
+
# Initialize state variables
|
|
89
|
+
self.params = {}
|
|
90
|
+
self._fitted = False
|
|
91
|
+
self._derivatives_calculated = False
|
|
92
|
+
self._marginal_analysis_done = False
|
|
93
|
+
|
|
94
|
+
# Initialize computation cache
|
|
95
|
+
self._computation_cache = {
|
|
96
|
+
'data_converter': None,
|
|
97
|
+
'characteristics_computer': None,
|
|
98
|
+
'weights_normalized': None,
|
|
99
|
+
'smooth_curves_generated': False
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# Store initial parameters if catching
|
|
103
|
+
if self.catch:
|
|
104
|
+
self._store_initial_params()
|
|
105
|
+
|
|
106
|
+
# Validate all inputs
|
|
107
|
+
self._validate_inputs()
|
|
108
|
+
|
|
109
|
+
# logger
|
|
110
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
111
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _compute_egdf_core(self, S, LB, UB, zi_data=None, zi_eval=None):
|
|
115
|
+
"""Core EGDF computation with caching."""
|
|
116
|
+
# self.logger.info("Starting core EGDF computation.")
|
|
117
|
+
# Use provided data or default to instance data
|
|
118
|
+
if zi_data is None:
|
|
119
|
+
zi_data = self.z
|
|
120
|
+
if zi_eval is None:
|
|
121
|
+
zi_eval = zi_data
|
|
122
|
+
|
|
123
|
+
# Convert to infinite domain
|
|
124
|
+
zi_n = DataConversion._convert_fininf(zi_eval, LB, UB)
|
|
125
|
+
zi_d = DataConversion._convert_fininf(zi_data, LB, UB)
|
|
126
|
+
|
|
127
|
+
# Calculate R matrix with numerical stability
|
|
128
|
+
R = zi_n.reshape(-1, 1) / (zi_d.reshape(1, -1) + self._NUMERICAL_EPS)
|
|
129
|
+
|
|
130
|
+
# Get characteristics
|
|
131
|
+
gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
|
|
132
|
+
q, q1 = gc._get_q_q1(S=S)
|
|
133
|
+
|
|
134
|
+
# Calculate fidelities and irrelevances
|
|
135
|
+
fi = gc._fi(q=q, q1=q1)
|
|
136
|
+
hi = gc._hi(q=q, q1=q1)
|
|
137
|
+
|
|
138
|
+
# Estimate EGDF
|
|
139
|
+
return self._estimate_egdf_from_moments(fi, hi), fi, hi
|
|
140
|
+
|
|
141
|
+
def _estimate_egdf_from_moments(self, fidelities, irrelevances):
|
|
142
|
+
"""Estimate EGDF from fidelities and irrelevances."""
|
|
143
|
+
# self.logger.info("Estimating EGDF from moments.")
|
|
144
|
+
weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
|
|
145
|
+
|
|
146
|
+
mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
|
|
147
|
+
mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
|
|
148
|
+
|
|
149
|
+
M_zi = np.sqrt(mean_fidelity**2 + mean_irrelevance**2)
|
|
150
|
+
M_zi = np.where(M_zi == 0, self._NUMERICAL_EPS, M_zi)
|
|
151
|
+
|
|
152
|
+
egdf_values = (1 - mean_irrelevance / M_zi) / 2
|
|
153
|
+
egdf_values = np.maximum.accumulate(egdf_values)
|
|
154
|
+
egdf_values = np.clip(egdf_values, 0, 1)
|
|
155
|
+
|
|
156
|
+
return egdf_values.flatten()
|
|
157
|
+
|
|
158
|
+
# NOTE: PDF calculation as mentioned in a new book
|
|
159
|
+
# def _calculate_pdf_from_moments(self, fidelities, irrelevances):
|
|
160
|
+
# """Calculate PDF from fidelities and irrelevances."""
|
|
161
|
+
# weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
|
|
162
|
+
|
|
163
|
+
# mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
|
|
164
|
+
# mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
|
|
165
|
+
|
|
166
|
+
# F2 = np.sum(weights * fidelities**2, axis=0) / np.sum(weights)
|
|
167
|
+
# FH = np.sum(weights * fidelities * irrelevances, axis=0) / np.sum(weights)
|
|
168
|
+
|
|
169
|
+
# M_zi = np.sqrt(mean_fidelity**2 + mean_irrelevance**2)
|
|
170
|
+
# M_zi = np.where(M_zi == 0, self._NUMERICAL_EPS, M_zi)
|
|
171
|
+
# M_zi_cubed = M_zi**3
|
|
172
|
+
|
|
173
|
+
# numerator = (mean_fidelity**2) * F2 + mean_fidelity * mean_irrelevance * FH
|
|
174
|
+
# S_value = self.S_opt if hasattr(self, 'S_opt') else 1.0
|
|
175
|
+
# density = (1 / S_value) * (numerator / M_zi_cubed)
|
|
176
|
+
|
|
177
|
+
# if np.any(density < 0):
|
|
178
|
+
# warnings.warn("PDF contains negative values, indicating potential non-homogeneous data", RuntimeWarning)
|
|
179
|
+
# return density.flatten()
|
|
180
|
+
|
|
181
|
+
def _calculate_pdf_from_moments(self, fidelities, irrelevances): # PDF
|
|
182
|
+
"""Calculate first derivative of EGDF (which is the PDF) from stored fidelities and irrelevances."""
|
|
183
|
+
self.logger.info("Calculating PDF from moments.")
|
|
184
|
+
if fidelities is None or irrelevances is None:
|
|
185
|
+
raise ValueError("Fidelities and irrelevances must be calculated before first derivative estimation.")
|
|
186
|
+
|
|
187
|
+
weights = self.weights.reshape(-1, 1)
|
|
188
|
+
|
|
189
|
+
# First order moments
|
|
190
|
+
f1 = np.sum(weights * fidelities, axis=0) / np.sum(weights) # mean_fidelity
|
|
191
|
+
h1 = np.sum(weights * irrelevances, axis=0) / np.sum(weights) # mean_irrelevance
|
|
192
|
+
|
|
193
|
+
# Second order moments (scaled by S as in MATLAB)
|
|
194
|
+
f2s = np.sum(weights * (fidelities**2 / self.S_opt), axis=0) / np.sum(weights)
|
|
195
|
+
fhs = np.sum(weights * (fidelities * irrelevances / self.S_opt), axis=0) / np.sum(weights)
|
|
196
|
+
|
|
197
|
+
# Calculate denominator w = (f1^2 + h1^2)^(3/2)
|
|
198
|
+
w = (f1**2 + h1**2)**(3/2)
|
|
199
|
+
eps = np.finfo(float).eps
|
|
200
|
+
w = np.where(w == 0, eps, w)
|
|
201
|
+
|
|
202
|
+
# First derivative formula from MATLAB: y = (f1^2 * f2s + f1 * h1 * fhs) / w
|
|
203
|
+
numerator = f1**2 * f2s + f1 * h1 * fhs
|
|
204
|
+
first_derivative = numerator / w
|
|
205
|
+
# first_derivative = first_derivative / self.zi
|
|
206
|
+
|
|
207
|
+
# if np.any(first_derivative < 0):
|
|
208
|
+
# warnings.warn("EGDF first derivative (PDF) contains negative values, indicating potential non-homogeneous data", RuntimeWarning)
|
|
209
|
+
return first_derivative.flatten()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _calculate_final_results(self):
|
|
213
|
+
"""Calculate final EGDF and PDF with optimized parameters."""
|
|
214
|
+
self.logger.info("Calculating final EGDF and PDF with optimized parameters.")
|
|
215
|
+
# Convert to infinite domain
|
|
216
|
+
# zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
|
|
217
|
+
zi_d = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
|
|
218
|
+
self.zi = zi_d
|
|
219
|
+
|
|
220
|
+
# Calculate EGDF and get moments
|
|
221
|
+
egdf_values, fi, hi = self._compute_egdf_core(self.S_opt, self.LB_opt, self.UB_opt)
|
|
222
|
+
|
|
223
|
+
# Store for derivative calculations
|
|
224
|
+
self.fi = fi
|
|
225
|
+
self.hi = hi
|
|
226
|
+
self.egdf = egdf_values
|
|
227
|
+
self.pdf = self._calculate_pdf_from_moments(fi, hi)
|
|
228
|
+
|
|
229
|
+
if self.catch:
|
|
230
|
+
self.logger.info("Catching parameters for later use.")
|
|
231
|
+
self.params.update({
|
|
232
|
+
'egdf': self.egdf.copy(),
|
|
233
|
+
'pdf': self.pdf.copy(),
|
|
234
|
+
'zi': self.zi.copy()
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
def _generate_smooth_curves(self):
|
|
238
|
+
"""Generate smooth curves for plotting and analysis."""
|
|
239
|
+
self.logger.info("Generating smooth curves for EGDF and PDF.")
|
|
240
|
+
try:
|
|
241
|
+
# Generate smooth EGDF and PDF
|
|
242
|
+
smooth_egdf, self.smooth_fi, self.smooth_hi = self._compute_egdf_core(
|
|
243
|
+
self.S_opt, self.LB_opt, self.UB_opt,
|
|
244
|
+
zi_data=self.z_points_n, zi_eval=self.z
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
smooth_pdf = self._calculate_pdf_from_moments(self.smooth_fi, self.smooth_hi)
|
|
248
|
+
|
|
249
|
+
self.egdf_points = smooth_egdf
|
|
250
|
+
self.pdf_points = smooth_pdf
|
|
251
|
+
|
|
252
|
+
# Store zi_n for derivative calculations
|
|
253
|
+
self.zi_n = DataConversion._convert_fininf(self.z_points_n, self.LB_opt, self.UB_opt)
|
|
254
|
+
|
|
255
|
+
# Mark as generated
|
|
256
|
+
self._computation_cache['smooth_curves_generated'] = True
|
|
257
|
+
|
|
258
|
+
if self.catch:
|
|
259
|
+
self.logger.info("Catching parameters for later use.")
|
|
260
|
+
self.params.update({
|
|
261
|
+
'egdf_points': self.egdf_points.copy(),
|
|
262
|
+
'pdf_points': self.pdf_points.copy(),
|
|
263
|
+
'zi_points': self.zi_n.copy()
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
self.logger.info(f"Generated smooth curves with {self.n_points} points.")
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
# Log the error
|
|
270
|
+
error_msg = f"Could not generate smooth curves: {e}"
|
|
271
|
+
self.logger.error(error_msg)
|
|
272
|
+
self.params['errors'].append({
|
|
273
|
+
'method': '_generate_smooth_curves',
|
|
274
|
+
'error': error_msg,
|
|
275
|
+
'exception_type': type(e).__name__
|
|
276
|
+
})
|
|
277
|
+
self.logger.warning(f"Could not generate smooth curves: {e}")
|
|
278
|
+
# Create fallback points using original data
|
|
279
|
+
self.egdf_points = self.egdf.copy() if hasattr(self, 'egdf') else None
|
|
280
|
+
self.pdf_points = self.pdf.copy() if hasattr(self, 'pdf') else None
|
|
281
|
+
self._computation_cache['smooth_curves_generated'] = False
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
|
|
285
|
+
"""Enhanced plotting with better organization."""
|
|
286
|
+
self.logger.info("Starting plot generation.")
|
|
287
|
+
|
|
288
|
+
import matplotlib.pyplot as plt
|
|
289
|
+
|
|
290
|
+
if plot_smooth and (len(self.data) > self.max_data_size) and self.verbose:
|
|
291
|
+
self.logger.warning(f"Given data size ({len(self.data)}) exceeds max_data_size ({self.max_data_size}). For optimal compute performance, set 'plot_smooth=False', or 'max_data_size' to a larger value whichever is appropriate.")
|
|
292
|
+
|
|
293
|
+
if not self.catch:
|
|
294
|
+
self.logger.warning("Plot is not available with argument catch=False")
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
if not self._fitted:
|
|
298
|
+
self.logger.error("Must fit EGDF before plotting.")
|
|
299
|
+
raise RuntimeError("Must fit EGDF before plotting.")
|
|
300
|
+
|
|
301
|
+
# Validate plot parameter
|
|
302
|
+
if plot not in ['gdf', 'pdf', 'both']:
|
|
303
|
+
self.logger.error("Invalid plot parameter. Must be 'gdf', 'pdf', or 'both'.")
|
|
304
|
+
raise ValueError("plot parameter must be 'gdf', 'pdf', or 'both'")
|
|
305
|
+
|
|
306
|
+
# Check data availability
|
|
307
|
+
if plot in ['gdf', 'both'] and self.params.get('egdf') is None:
|
|
308
|
+
self.logger.error("EGDF must be calculated before plotting GDF")
|
|
309
|
+
raise ValueError("EGDF must be calculated before plotting GDF")
|
|
310
|
+
if plot in ['pdf', 'both'] and self.params.get('pdf') is None:
|
|
311
|
+
self.logger.error("PDF must be calculated before plotting PDF")
|
|
312
|
+
raise ValueError("PDF must be calculated before plotting PDF")
|
|
313
|
+
|
|
314
|
+
# Prepare data
|
|
315
|
+
self.logger.info("Preparing data for plotting.")
|
|
316
|
+
x_points = self.data
|
|
317
|
+
egdf_plot = self.params.get('egdf')
|
|
318
|
+
pdf_plot = self.params.get('pdf')
|
|
319
|
+
wedf = self.params.get('wedf')
|
|
320
|
+
ksdf = self.params.get('ksdf')
|
|
321
|
+
|
|
322
|
+
# Check smooth plotting availability
|
|
323
|
+
has_smooth = (hasattr(self, 'di_points_n') and hasattr(self, 'egdf_points')
|
|
324
|
+
and hasattr(self, 'pdf_points') and self.di_points_n is not None
|
|
325
|
+
and self.egdf_points is not None and self.pdf_points is not None)
|
|
326
|
+
plot_smooth = plot_smooth and has_smooth
|
|
327
|
+
|
|
328
|
+
# Create figure
|
|
329
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
330
|
+
|
|
331
|
+
# Plot EGDF if requested
|
|
332
|
+
if plot in ['gdf', 'both']:
|
|
333
|
+
self._plot_egdf(ax1, x_points, egdf_plot, plot_smooth, extra_df, wedf, ksdf)
|
|
334
|
+
|
|
335
|
+
# Plot PDF if requested
|
|
336
|
+
if plot in ['pdf', 'both']:
|
|
337
|
+
if plot == 'pdf':
|
|
338
|
+
self._plot_pdf(ax1, x_points, pdf_plot, plot_smooth, is_secondary=False)
|
|
339
|
+
else:
|
|
340
|
+
ax2 = ax1.twinx()
|
|
341
|
+
self._plot_pdf(ax2, x_points, pdf_plot, plot_smooth, is_secondary=True)
|
|
342
|
+
|
|
343
|
+
# Add bounds and formatting
|
|
344
|
+
self._add_plot_formatting(ax1, plot, bounds)
|
|
345
|
+
|
|
346
|
+
# Add Z0 vertical line if available
|
|
347
|
+
if hasattr(self, 'z0') and self.z0 is not None:
|
|
348
|
+
ax1.axvline(x=self.z0, color='magenta', linestyle='-.', linewidth=1,
|
|
349
|
+
alpha=0.8, label=f'Z0={self.z0:.3f}')
|
|
350
|
+
# Update legend to include Z0
|
|
351
|
+
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
|
|
352
|
+
|
|
353
|
+
plt.tight_layout()
|
|
354
|
+
plt.show()
|
|
355
|
+
|
|
356
|
+
def _plot_egdf(self, ax, x_points, egdf_plot, plot_smooth, extra_df, wedf, ksdf):
|
|
357
|
+
"""Plot EGDF components."""
|
|
358
|
+
self.logger.info("Plotting EGDF.")
|
|
359
|
+
if plot_smooth and hasattr(self, 'egdf_points') and self.egdf_points is not None:
|
|
360
|
+
ax.plot(x_points, egdf_plot, 'o', color='blue', label='EGDF', markersize=4)
|
|
361
|
+
ax.plot(self.di_points_n, self.egdf_points, color='blue',
|
|
362
|
+
linestyle='-', linewidth=2, alpha=0.8)
|
|
363
|
+
else:
|
|
364
|
+
ax.plot(x_points, egdf_plot, 'o-', color='blue', label='EGDF',
|
|
365
|
+
markersize=4, linewidth=1, alpha=0.8)
|
|
366
|
+
|
|
367
|
+
if extra_df:
|
|
368
|
+
if wedf is not None:
|
|
369
|
+
ax.plot(x_points, wedf, 's', color='lightblue',
|
|
370
|
+
label='WEDF', markersize=3, alpha=0.8)
|
|
371
|
+
if ksdf is not None:
|
|
372
|
+
ax.plot(x_points, ksdf, 's', color='cyan',
|
|
373
|
+
label='KS Points', markersize=3, alpha=0.8)
|
|
374
|
+
|
|
375
|
+
ax.set_ylabel('EGDF', color='blue')
|
|
376
|
+
ax.tick_params(axis='y', labelcolor='blue')
|
|
377
|
+
ax.set_ylim(0, 1)
|
|
378
|
+
|
|
379
|
+
def _plot_pdf(self, ax, x_points, pdf_plot, plot_smooth, is_secondary=False):
|
|
380
|
+
"""Plot PDF components."""
|
|
381
|
+
self.logger.info("Plotting PDF.")
|
|
382
|
+
color = 'red'
|
|
383
|
+
if plot_smooth and hasattr(self, 'pdf_points') and self.pdf_points is not None:
|
|
384
|
+
ax.plot(x_points, pdf_plot, 'o', color=color, label='PDF', markersize=4)
|
|
385
|
+
ax.plot(self.di_points_n, self.pdf_points, color=color,
|
|
386
|
+
linestyle='-', linewidth=2, alpha=0.8)
|
|
387
|
+
max_pdf = np.max(self.pdf_points)
|
|
388
|
+
else:
|
|
389
|
+
ax.plot(x_points, pdf_plot, 'o-', color=color, label='PDF',
|
|
390
|
+
markersize=4, linewidth=1, alpha=0.8)
|
|
391
|
+
max_pdf = np.max(pdf_plot)
|
|
392
|
+
|
|
393
|
+
ax.set_ylabel('PDF', color=color)
|
|
394
|
+
ax.tick_params(axis='y', labelcolor=color)
|
|
395
|
+
ax.set_ylim(0, max_pdf * 1.1)
|
|
396
|
+
|
|
397
|
+
if is_secondary:
|
|
398
|
+
ax.legend(loc='upper right', bbox_to_anchor=(1, 1))
|
|
399
|
+
|
|
400
|
+
def _add_plot_formatting(self, ax1, plot, bounds):
|
|
401
|
+
"""Add formatting, bounds, and legends to plot."""
|
|
402
|
+
ax1.set_xlabel('Data Points')
|
|
403
|
+
|
|
404
|
+
# Add bounds if requested
|
|
405
|
+
if bounds:
|
|
406
|
+
bound_info = [
|
|
407
|
+
(self.params.get('DLB'), 'green', '-', 'DLB'),
|
|
408
|
+
(self.params.get('DUB'), 'orange', '-', 'DUB'),
|
|
409
|
+
(self.params.get('LB'), 'purple', '--', 'LB'),
|
|
410
|
+
(self.params.get('UB'), 'brown', '--', 'UB')
|
|
411
|
+
]
|
|
412
|
+
|
|
413
|
+
for bound, color, style, name in bound_info:
|
|
414
|
+
if bound is not None:
|
|
415
|
+
ax1.axvline(x=bound, color=color, linestyle=style, linewidth=2,
|
|
416
|
+
alpha=0.8, label=f"{name}={bound:.3f}")
|
|
417
|
+
|
|
418
|
+
# Add shaded regions
|
|
419
|
+
if self.params.get('LB') is not None:
|
|
420
|
+
ax1.axvspan(self.data.min(), self.params['LB'], alpha=0.15, color='purple')
|
|
421
|
+
if self.params.get('UB') is not None:
|
|
422
|
+
ax1.axvspan(self.params['UB'], self.data.max(), alpha=0.15, color='brown')
|
|
423
|
+
|
|
424
|
+
# Set limits and add grid
|
|
425
|
+
data_range = self.params['DUB'] - self.params['DLB']
|
|
426
|
+
padding = data_range * 0.1
|
|
427
|
+
ax1.set_xlim(self.params['DLB'] - padding, self.params['DUB'] + padding)
|
|
428
|
+
|
|
429
|
+
# Set title
|
|
430
|
+
titles = {
|
|
431
|
+
'gdf': 'EGDF' + (' with Bounds' if bounds else ''),
|
|
432
|
+
'pdf': 'PDF' + (' with Bounds' if bounds else ''),
|
|
433
|
+
'both': 'EGDF and PDF' + (' with Bounds' if bounds else '')
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
ax1.set_title(titles[plot])
|
|
437
|
+
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
|
|
438
|
+
ax1.grid(True, alpha=0.3)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
# =============================================================================
|
|
443
|
+
# Derivative
|
|
444
|
+
# =============================================================================
|
|
445
|
+
def _get_egdf_second_derivative(self):
|
|
446
|
+
"""Calculate second derivative of EGDF from stored fidelities and irrelevances."""
|
|
447
|
+
self.logger.info("Calculating second derivative of EGDF.")
|
|
448
|
+
if self.fi is None or self.hi is None:
|
|
449
|
+
self.logger.error("Fidelities and irrelevances must be calculated before second derivative estimation.")
|
|
450
|
+
raise ValueError("Fidelities and irrelevances must be calculated before second derivative estimation.")
|
|
451
|
+
|
|
452
|
+
weights = self.weights.reshape(-1, 1)
|
|
453
|
+
|
|
454
|
+
# Moment calculations
|
|
455
|
+
f1 = np.sum(weights * self.fi, axis=0) / np.sum(weights)
|
|
456
|
+
h1 = np.sum(weights * self.hi, axis=0) / np.sum(weights)
|
|
457
|
+
f2 = np.sum(weights * self.fi**2, axis=0) / np.sum(weights)
|
|
458
|
+
f3 = np.sum(weights * self.fi**3, axis=0) / np.sum(weights)
|
|
459
|
+
fh = np.sum(weights * self.fi * self.hi, axis=0) / np.sum(weights)
|
|
460
|
+
fh2 = np.sum(weights * self.fi * self.hi**2, axis=0) / np.sum(weights)
|
|
461
|
+
f2h = np.sum(weights * self.fi**2 * self.hi, axis=0) / np.sum(weights)
|
|
462
|
+
|
|
463
|
+
# Calculate components
|
|
464
|
+
b = f1**2 * f2 + f1 * h1 * fh
|
|
465
|
+
d = f1**2 + h1**2
|
|
466
|
+
eps = np.finfo(float).eps
|
|
467
|
+
d = np.where(d == 0, eps, d)
|
|
468
|
+
|
|
469
|
+
# Following
|
|
470
|
+
term1 = f1 * (h1 * (f3 - fh2) - f2 * fh)
|
|
471
|
+
term2 = 2 * f1**2 * f2h + h1 * fh**2
|
|
472
|
+
term3 = (6 * b * (f1 * fh - h1 * f2)) / d
|
|
473
|
+
|
|
474
|
+
d2 = -1 / (d**(1.5)) * (2 * (term1 - term2) + term3)
|
|
475
|
+
second_derivative = d2 / (self.S_opt**2)
|
|
476
|
+
# second_derivative = second_derivative / self.zi**2
|
|
477
|
+
self.logger.info("Second derivative calculation completed.")
|
|
478
|
+
return second_derivative.flatten()
|
|
479
|
+
|
|
480
|
+
def _get_egdf_third_derivative(self):
|
|
481
|
+
"""Calculate third derivative of EGDF from stored fidelities and irrelevances."""
|
|
482
|
+
self.logger.info("Calculating third derivative of EGDF.")
|
|
483
|
+
if self.fi is None or self.hi is None:
|
|
484
|
+
self.logger.error("Fidelities and irrelevances must be calculated before third derivative estimation.")
|
|
485
|
+
raise ValueError("Fidelities and irrelevances must be calculated before third derivative estimation.")
|
|
486
|
+
|
|
487
|
+
weights = self.weights.reshape(-1, 1)
|
|
488
|
+
|
|
489
|
+
# All required moments
|
|
490
|
+
f1 = np.sum(weights * self.fi, axis=0) / np.sum(weights)
|
|
491
|
+
h1 = np.sum(weights * self.hi, axis=0) / np.sum(weights)
|
|
492
|
+
f2 = np.sum(weights * self.fi**2, axis=0) / np.sum(weights)
|
|
493
|
+
f3 = np.sum(weights * self.fi**3, axis=0) / np.sum(weights)
|
|
494
|
+
f4 = np.sum(weights * self.fi**4, axis=0) / np.sum(weights)
|
|
495
|
+
fh = np.sum(weights * self.fi * self.hi, axis=0) / np.sum(weights)
|
|
496
|
+
h2 = np.sum(weights * self.hi**2, axis=0) / np.sum(weights)
|
|
497
|
+
fh2 = np.sum(weights * self.fi * self.hi**2, axis=0) / np.sum(weights)
|
|
498
|
+
f2h = np.sum(weights * self.fi**2 * self.hi, axis=0) / np.sum(weights)
|
|
499
|
+
f2h2 = np.sum(weights * self.fi**2 * self.hi**2, axis=0) / np.sum(weights)
|
|
500
|
+
f3h = np.sum(weights * self.fi**3 * self.hi, axis=0) / np.sum(weights)
|
|
501
|
+
fh3 = np.sum(weights * self.fi * self.hi**3, axis=0) / np.sum(weights)
|
|
502
|
+
|
|
503
|
+
# Following
|
|
504
|
+
# Derivative calculations
|
|
505
|
+
dh1 = -f2
|
|
506
|
+
df1 = fh
|
|
507
|
+
df2 = 2 * f2h
|
|
508
|
+
dfh = -f3 + fh2
|
|
509
|
+
dfh2 = -2 * f3h + fh3
|
|
510
|
+
df3 = 3 * f3h
|
|
511
|
+
df2h = -f4 + 2 * f2h2
|
|
512
|
+
|
|
513
|
+
# u4 and its derivative
|
|
514
|
+
u4 = h1 * f3 - h1 * fh2 - f2 * fh
|
|
515
|
+
du4 = dh1 * f3 + h1 * df3 - dh1 * fh2 - h1 * dfh2 - df2 * fh - f2 * dfh
|
|
516
|
+
|
|
517
|
+
# u and its derivative
|
|
518
|
+
u = f1 * u4
|
|
519
|
+
du = df1 * u4 + f1 * du4
|
|
520
|
+
|
|
521
|
+
# v components
|
|
522
|
+
v4a = (f1**2) * f2h
|
|
523
|
+
dv4a = 2 * f1 * df1 * f2h + (f1**2) * df2h
|
|
524
|
+
v4b = h1 * fh**2
|
|
525
|
+
dv4b = dh1 * (fh**2) + 2 * h1 * fh * dfh
|
|
526
|
+
|
|
527
|
+
v = 2 * v4a + v4b
|
|
528
|
+
dv = 2 * dv4a + dv4b
|
|
529
|
+
|
|
530
|
+
# x components
|
|
531
|
+
x4a = f1**2 * f2 + f1 * h1 * fh
|
|
532
|
+
dx4a = 2 * f1 * df1 * f2 + (f1**2) * df2 + df1 * h1 * fh + f1 * dh1 * fh + f1 * h1 * dfh
|
|
533
|
+
x4b = f1 * fh - h1 * f2
|
|
534
|
+
dx4b = df1 * fh + f1 * dfh - dh1 * f2 - h1 * df2
|
|
535
|
+
|
|
536
|
+
x = 6 * x4a * x4b
|
|
537
|
+
dx = 6 * (dx4a * x4b + x4a * dx4b)
|
|
538
|
+
|
|
539
|
+
# d components
|
|
540
|
+
d = f1**2 + h1**2
|
|
541
|
+
dd = 2 * (f1 * df1 + h1 * dh1)
|
|
542
|
+
eps = np.finfo(float).eps
|
|
543
|
+
d = np.where(d == 0, eps, d)
|
|
544
|
+
|
|
545
|
+
# Final calculation
|
|
546
|
+
term1 = (du - dv) / (d**1.5) - (1.5 * (u - v)) / (d**2.5) * dd
|
|
547
|
+
term2 = dx / (d**2.5) - (2.5 * x) / (d**3.5) * dd
|
|
548
|
+
|
|
549
|
+
d3p = -2 * term1 - term2
|
|
550
|
+
third_derivative = 2 * d3p / (self.S_opt**3)
|
|
551
|
+
# third_derivative = third_derivative / (self.zi**3)
|
|
552
|
+
self.logger.info("Third derivative calculation completed.")
|
|
553
|
+
return third_derivative.flatten()
|
|
554
|
+
|
|
555
|
+
def _get_egdf_fourth_derivative(self):
|
|
556
|
+
"""Calculate fourth derivative of EGDF using numerical differentiation."""
|
|
557
|
+
self.logger.info("Calculating fourth derivative of EGDF using numerical differentiation.")
|
|
558
|
+
if self.fi is None or self.hi is None:
|
|
559
|
+
self.logger.error("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
|
|
560
|
+
raise ValueError("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
|
|
561
|
+
|
|
562
|
+
# For fourth derivative, use numerical differentiation as it's complex
|
|
563
|
+
dz = 1e-7
|
|
564
|
+
|
|
565
|
+
# Get third derivatives at slightly shifted points
|
|
566
|
+
zi_plus = self.zi + dz
|
|
567
|
+
zi_minus = self.zi - dz
|
|
568
|
+
|
|
569
|
+
# Store original zi
|
|
570
|
+
original_zi = self.zi.copy()
|
|
571
|
+
|
|
572
|
+
# Calculate third derivative at zi + dz
|
|
573
|
+
self.zi = zi_plus
|
|
574
|
+
self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
|
|
575
|
+
third_plus = self._get_egdf_third_derivative()
|
|
576
|
+
|
|
577
|
+
# Calculate third derivative at zi - dz
|
|
578
|
+
self.zi = zi_minus
|
|
579
|
+
self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
|
|
580
|
+
third_minus = self._get_egdf_third_derivative()
|
|
581
|
+
|
|
582
|
+
# Restore original zi and recalculate fi, hi
|
|
583
|
+
self.zi = original_zi
|
|
584
|
+
self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
|
|
585
|
+
|
|
586
|
+
# Numerical derivative
|
|
587
|
+
fourth_derivative = (third_plus - third_minus) / (2 * dz) * self.zi
|
|
588
|
+
|
|
589
|
+
self.logger.info("Fourth derivative calculation completed.")
|
|
590
|
+
return fourth_derivative.flatten()
|
|
591
|
+
|
|
592
|
+
def _calculate_fidelities_irrelevances_at_given_zi(self, zi):
|
|
593
|
+
"""Helper method to recalculate fidelities and irrelevances for current zi."""
|
|
594
|
+
self.logger.info("Recalculating fidelities and irrelevances for given zi.")
|
|
595
|
+
if self.LB_opt is None or self.UB_opt is None or self.S_opt is None:
|
|
596
|
+
self.logger.error("Optimized parameters LB_opt, UB_opt, and S_opt must be set before recalculating fidelities and irrelevances.")
|
|
597
|
+
# Convert to infinite domain
|
|
598
|
+
zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
|
|
599
|
+
# is zi given then use it, else use self.zi
|
|
600
|
+
if zi is None:
|
|
601
|
+
zi_d = self.zi
|
|
602
|
+
else:
|
|
603
|
+
zi_d = zi
|
|
604
|
+
|
|
605
|
+
# Calculate R matrix
|
|
606
|
+
eps = np.finfo(float).eps
|
|
607
|
+
R = zi_n.reshape(-1, 1) / (zi_d + eps).reshape(1, -1)
|
|
608
|
+
|
|
609
|
+
# Get characteristics
|
|
610
|
+
gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
|
|
611
|
+
q, q1 = gc._get_q_q1(S=self.S_opt)
|
|
612
|
+
|
|
613
|
+
# Store fidelities and irrelevances
|
|
614
|
+
self.fi = gc._fi(q=q, q1=q1)
|
|
615
|
+
self.hi = gc._hi(q=q, q1=q1)
|
|
616
|
+
|
|
617
|
+
def _get_results(self)-> dict:
|
|
618
|
+
"""Return fitting results."""
|
|
619
|
+
self.logger.info("Retrieving fitting results.")
|
|
620
|
+
if not self._fitted:
|
|
621
|
+
raise RuntimeError("Must fit EGDF before getting results.")
|
|
622
|
+
|
|
623
|
+
# selected key from params if exists
|
|
624
|
+
keys = ['DLB', 'DUB', 'LB', 'UB', 'S_opt', 'z0', 'egdf', 'pdf',
|
|
625
|
+
'egdf_points', 'pdf_points', 'zi', 'zi_points', 'weights']
|
|
626
|
+
results = {key: self.params.get(key) for key in keys if key in self.params}
|
|
627
|
+
return results
|
|
628
|
+
|
|
629
|
+
# =============================================================================
|
|
630
|
+
# MAIN FITTING PROCESS
|
|
631
|
+
# =============================================================================
|
|
632
|
+
|
|
633
|
+
def _fit_egdf(self, plot:bool = True):
|
|
634
|
+
"""Main fitting process with improved organization."""
|
|
635
|
+
self.logger.info("Starting EGDF fitting process.")
|
|
636
|
+
try:
|
|
637
|
+
# Step 1: Data preprocessing
|
|
638
|
+
self.logger.info("Starting data preprocessing.")
|
|
639
|
+
self.data = np.sort(self.data)
|
|
640
|
+
self._estimate_data_bounds()
|
|
641
|
+
self._transform_data_to_standard_domain()
|
|
642
|
+
self._estimate_weights()
|
|
643
|
+
|
|
644
|
+
# Step 2: Bounds estimation
|
|
645
|
+
self.logger.info("Starting bounds estimation.")
|
|
646
|
+
self._estimate_initial_probable_bounds()
|
|
647
|
+
self._generate_evaluation_points()
|
|
648
|
+
|
|
649
|
+
# Step 3: Get distribution function values for optimization
|
|
650
|
+
self.logger.info("Getting distribution function values for optimization.")
|
|
651
|
+
self.df_values = self._get_distribution_function_values(use_wedf=self.wedf)
|
|
652
|
+
|
|
653
|
+
# Step 4: Parameter optimization
|
|
654
|
+
self.logger.info("Starting parameter optimization.")
|
|
655
|
+
self._determine_optimization_strategy()
|
|
656
|
+
|
|
657
|
+
# Step 5: Calculate final EGDF and PDF
|
|
658
|
+
self.logger.info("Calculating final EGDF and PDF.")
|
|
659
|
+
self._calculate_final_results()
|
|
660
|
+
|
|
661
|
+
# Step 6: Generate smooth curves for plotting and analysis
|
|
662
|
+
self.logger.info("Generating smooth curves for plotting and analysis.")
|
|
663
|
+
self._generate_smooth_curves()
|
|
664
|
+
|
|
665
|
+
# Step 7: Transform bounds back to original domain
|
|
666
|
+
self.logger.info("Transforming bounds back to original domain.")
|
|
667
|
+
self._transform_bounds_to_original_domain()
|
|
668
|
+
|
|
669
|
+
# Mark as fitted (Step 8 is now optional via marginal_analysis())
|
|
670
|
+
self._fitted = True
|
|
671
|
+
|
|
672
|
+
# Compute Z0 point
|
|
673
|
+
self.logger.info("Computing Z0 point.")
|
|
674
|
+
self._compute_z0()
|
|
675
|
+
|
|
676
|
+
self.logger.info("EGDF fitting completed successfully.")
|
|
677
|
+
|
|
678
|
+
if plot:
|
|
679
|
+
self.logger.info("Plotting results.")
|
|
680
|
+
self._plot()
|
|
681
|
+
|
|
682
|
+
# clean up computation cache
|
|
683
|
+
if self.flush:
|
|
684
|
+
self.logger.info("Cleaning up computation cache.")
|
|
685
|
+
self._cleanup_computation_cache()
|
|
686
|
+
|
|
687
|
+
except Exception as e:
|
|
688
|
+
error_msg = f"EGDF fitting failed: {e}"
|
|
689
|
+
self.logger.error(error_msg)
|
|
690
|
+
self.params['errors'].append({
|
|
691
|
+
'method': '_fit_egdf',
|
|
692
|
+
'error': error_msg,
|
|
693
|
+
'exception_type': type(e).__name__
|
|
694
|
+
})
|
|
695
|
+
self.logger.info(f"Error during EGDF fitting: {e}")
|
|
696
|
+
raise e
|
|
697
|
+
|
|
698
|
+
# z0 compute
|
|
699
|
+
def _compute_z0(self, optimize: bool = None):
|
|
700
|
+
"""
|
|
701
|
+
Compute the Z0 point where PDF is maximum using the Z0Estimator class.
|
|
702
|
+
|
|
703
|
+
Parameters:
|
|
704
|
+
-----------
|
|
705
|
+
optimize : bool, optional
|
|
706
|
+
If True, use interpolation-based methods for higher accuracy.
|
|
707
|
+
If False, use simple linear search on existing points.
|
|
708
|
+
If None, uses the instance's z0_optimize setting.
|
|
709
|
+
"""
|
|
710
|
+
self.logger.info("Starting Z0 computation.")
|
|
711
|
+
if self.z is None:
|
|
712
|
+
self.logger.error("Data must be transformed (self.z) before Z0 estimation.")
|
|
713
|
+
raise ValueError("Data must be transformed (self.z) before Z0 estimation.")
|
|
714
|
+
|
|
715
|
+
# Use provided optimize parameter or fall back to instance setting
|
|
716
|
+
use_optimize = optimize if optimize is not None else self.z0_optimize
|
|
717
|
+
|
|
718
|
+
self.logger.info("EGDF: Computing Z0 point using Z0Estimator...")
|
|
719
|
+
|
|
720
|
+
try:
|
|
721
|
+
# Create Z0Estimator instance with proper constructor signature
|
|
722
|
+
z0_estimator = Z0Estimator(
|
|
723
|
+
gdf_object=self, # Pass the EGDF object itself
|
|
724
|
+
optimize=use_optimize,
|
|
725
|
+
verbose=self.verbose
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# Call fit() method to estimate Z0
|
|
729
|
+
self.z0 = z0_estimator.fit()
|
|
730
|
+
|
|
731
|
+
# Get estimation info for debugging and storage
|
|
732
|
+
if self.catch:
|
|
733
|
+
estimation_info = z0_estimator.get_estimation_info()
|
|
734
|
+
self.params.update({
|
|
735
|
+
'z0': float(self.z0) if self.z0 is not None else None,
|
|
736
|
+
'z0_method': estimation_info.get('z0_method', 'unknown'),
|
|
737
|
+
'z0_estimation_info': estimation_info
|
|
738
|
+
})
|
|
739
|
+
|
|
740
|
+
method_used = z0_estimator.get_estimation_info().get('z0_method', 'unknown')
|
|
741
|
+
self.logger.info(f'EGDF: Z0 point computed successfully, (method: {method_used})')
|
|
742
|
+
|
|
743
|
+
except Exception as e:
|
|
744
|
+
# Log the error
|
|
745
|
+
error_msg = f"Z0 estimation failed: {str(e)}"
|
|
746
|
+
self.logger.error(error_msg)
|
|
747
|
+
self.params['errors'].append({
|
|
748
|
+
'method': '_compute_z0',
|
|
749
|
+
'error': error_msg,
|
|
750
|
+
'exception_type': type(e).__name__
|
|
751
|
+
})
|
|
752
|
+
|
|
753
|
+
self.logger.warning(f"Warning: Z0Estimator failed with error: {e}")
|
|
754
|
+
self.logger.info("Falling back to simple maximum finding...")
|
|
755
|
+
|
|
756
|
+
# Fallback to simple maximum finding
|
|
757
|
+
self.logger.info("Using fallback method for Z0 computation.")
|
|
758
|
+
self._compute_z0_fallback()
|
|
759
|
+
|
|
760
|
+
if self.catch:
|
|
761
|
+
self.logger.info("Catching fallback Z0 parameters for later use.")
|
|
762
|
+
self.params.update({
|
|
763
|
+
'z0': float(self.z0),
|
|
764
|
+
'z0_method': 'fallback_simple_maximum',
|
|
765
|
+
'z0_estimation_info': {'error': str(e)}
|
|
766
|
+
})
|
|
767
|
+
|
|
768
|
+
def _compute_z0_fallback(self):
|
|
769
|
+
"""
|
|
770
|
+
Fallback method for Z0 computation using simple maximum finding.
|
|
771
|
+
"""
|
|
772
|
+
self.logger.info("Starting fallback Z0 computation.")
|
|
773
|
+
|
|
774
|
+
if not hasattr(self, 'di_points_n') or not hasattr(self, 'pdf_points'):
|
|
775
|
+
self.logger.error("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
|
|
776
|
+
raise ValueError("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
|
|
777
|
+
|
|
778
|
+
self.logger.info('Using fallback method for Z0 point...')
|
|
779
|
+
|
|
780
|
+
# Find index with maximum PDF
|
|
781
|
+
max_idx = np.argmax(self.pdf_points)
|
|
782
|
+
self.z0 = self.di_points_n[max_idx]
|
|
783
|
+
|
|
784
|
+
self.logger.info(f"Z0 point (fallback method).")
|
|
785
|
+
|
|
786
|
+
def analyze_z0(self, figsize: tuple = (12, 6)) -> Dict[str, Any]:
|
|
787
|
+
"""
|
|
788
|
+
Analyze and visualize Z0 estimation results.
|
|
789
|
+
|
|
790
|
+
Parameters:
|
|
791
|
+
-----------
|
|
792
|
+
figsize : tuple
|
|
793
|
+
Figure size for the plot
|
|
794
|
+
|
|
795
|
+
Returns:
|
|
796
|
+
--------
|
|
797
|
+
Dict[str, Any]
|
|
798
|
+
Z0 analysis information
|
|
799
|
+
"""
|
|
800
|
+
self.logger.info("Starting Z0 analysis.")
|
|
801
|
+
if not hasattr(self, 'z0') or self.z0 is None:
|
|
802
|
+
self.logger.error("Z0 must be computed before analysis. Call fit() first.")
|
|
803
|
+
raise ValueError("Z0 must be computed before analysis. Call fit() first.")
|
|
804
|
+
|
|
805
|
+
# Create Z0Estimator for analysis
|
|
806
|
+
z0_estimator = Z0Estimator(
|
|
807
|
+
gdf_object=self,
|
|
808
|
+
optimize=self.z0_optimize,
|
|
809
|
+
verbose=self.verbose
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# Re-estimate for analysis (this is safe since it's already computed)
|
|
813
|
+
z0_estimator.fit()
|
|
814
|
+
|
|
815
|
+
# Get detailed info
|
|
816
|
+
analysis_info = z0_estimator.get_estimation_info()
|
|
817
|
+
|
|
818
|
+
# Create visualization
|
|
819
|
+
z0_estimator.plot_z0_analysis(figsize=figsize)
|
|
820
|
+
|
|
821
|
+
self.logger.info("Z0 analysis completed.")
|
|
822
|
+
return analysis_info
|
|
823
|
+
|