machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1234 @@
1
+ '''
2
+ QGDF: Quantifying Global Distribution Functions
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ '''
7
+ import numpy as np
8
+ import warnings
9
+ import logging
10
+ from machinegnostics.magcal.util.logging import get_logger
11
+ from typing import Dict, Any
12
+ from machinegnostics.magcal.gdf.base_distfunc import BaseDistFuncCompute
13
+ from machinegnostics.magcal.data_conversion import DataConversion
14
+ from machinegnostics.magcal.characteristics import GnosticsCharacteristics
15
+ from machinegnostics.magcal.gdf.z0_estimator import Z0Estimator
16
+
17
+ class BaseQGDF(BaseDistFuncCompute):
18
+ """
19
+ Base class for Quantifying Global Distribution Functions (QGDF).
20
+
21
+ This class provides foundational methods and attributes for computing
22
+ and analyzing global distribution functions using various techniques.
23
+
24
+ Attributes:
25
+ data (np.ndarray): Input data for distribution function computation.
26
+ n_points (int): Number of points for evaluation.
27
+ S (float): Smoothing parameter.
28
+ catch (bool): Flag to enable error catching.
29
+ verbose (bool): Flag to enable verbose output.
30
+ params (dict): Dictionary to store parameters and results.
31
+ """
32
+
33
+ def __init__(self,
34
+ data: np.ndarray,
35
+ DLB: float = None,
36
+ DUB: float = None,
37
+ LB: float = None,
38
+ UB: float = None,
39
+ S = 'auto',
40
+ z0_optimize: bool = True,
41
+ tolerance: float = 1e-3,
42
+ data_form: str = 'a',
43
+ n_points: int = 500,
44
+ homogeneous: bool = True,
45
+ catch: bool = True,
46
+ weights: np.ndarray = None,
47
+ wedf: bool = True,
48
+ opt_method: str = 'L-BFGS-B',
49
+ verbose: bool = False,
50
+ max_data_size: int = 1000,
51
+ flush: bool = True):
52
+ super().__init__(data=data,
53
+ DLB=DLB,
54
+ DUB=DUB,
55
+ LB=LB,
56
+ UB=UB,
57
+ S=S,
58
+ z0_optimize=z0_optimize,
59
+ varS=False, # NOTE for QGDFF varS is always False
60
+ tolerance=tolerance,
61
+ data_form=data_form,
62
+ n_points=n_points,
63
+ homogeneous=homogeneous,
64
+ catch=catch,
65
+ weights=weights,
66
+ wedf=wedf,
67
+ opt_method=opt_method,
68
+ verbose=verbose,
69
+ max_data_size=max_data_size,
70
+ flush=flush)
71
+
72
+ # Store raw inputs
73
+ self.data = data
74
+ self.DLB = DLB
75
+ self.DUB = DUB
76
+ self.LB = LB
77
+ self.UB = UB
78
+ self.S = S
79
+ self.z0_optimize = z0_optimize
80
+
81
+ self.tolerance = tolerance
82
+ self.data_form = data_form
83
+ self.n_points = n_points
84
+ self.homogeneous = homogeneous
85
+ self.catch = catch
86
+ self.weights = weights if weights is not None else np.ones_like(data)
87
+ self.wedf = wedf
88
+ self.opt_method = opt_method
89
+ self.verbose = verbose
90
+ self.max_data_size = max_data_size
91
+ self.flush = flush
92
+
93
+ # Initialize state variables
94
+ self.params = {}
95
+ self._fitted = False
96
+ self._derivatives_calculated = False
97
+ self._marginal_analysis_done = False
98
+
99
+ # Initialize computation cache
100
+ self._computation_cache = {
101
+ 'data_converter': None,
102
+ 'characteristics_computer': None,
103
+ 'weights_normalized': None,
104
+ 'smooth_curves_generated': False
105
+ }
106
+
107
+ # Store initial parameters if catching
108
+ if self.catch:
109
+ self._store_initial_params()
110
+
111
+ # Validate all inputs
112
+ self._validate_inputs()
113
+
114
+ # logger setup
115
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
116
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
117
+
118
+ def _compute_qgdf_core(self, S, LB, UB, zi_data=None, zi_eval=None):
119
+ """Core QGDF computation with caching."""
120
+ self.logger.info("Computing QGDF core.")
121
+ # Use provided data or default to instance data
122
+ if zi_data is None:
123
+ zi_data = self.z
124
+ if zi_eval is None:
125
+ zi_eval = zi_data
126
+
127
+ # Convert to infinite domain
128
+ zi_n = DataConversion._convert_fininf(zi_eval, LB, UB)
129
+ zi_d = DataConversion._convert_fininf(zi_data, LB, UB)
130
+
131
+ # Calculate R matrix with numerical stability
132
+ R = zi_n.reshape(-1, 1) / (zi_d.reshape(1, -1) + self._NUMERICAL_EPS)
133
+
134
+ # Get characteristics
135
+ gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
136
+ q, q1 = gc._get_q_q1(S=S)
137
+
138
+ # Calculate fidelities and irrelevances
139
+ fj = gc._fj(q=q, q1=q1)
140
+ hj = gc._hj(q=q, q1=q1)
141
+
142
+ # Estimate QGDF
143
+ return self._estimate_qgdf_from_moments(fj, hj), fj, hj
144
+
145
+ def _estimate_qgdf_from_moments_complex(self, fidelities, irrelevances):
146
+ """Estimate QGDF using complex number approach to handle all cases."""
147
+ self.logger.info("Estimating QGDF using complex number approach.")
148
+
149
+ weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
150
+
151
+ # Add numerical stability for both large and small values
152
+ max_safe_value = np.sqrt(np.finfo(float).max) / 100 # More conservative
153
+ min_safe_value = np.sqrt(np.finfo(float).eps) * 100 # Avoid very small numbers
154
+
155
+ # Comprehensive clipping for extreme values (both large and small)
156
+ def safe_clip_values(values, name="values"):
157
+ """Safely clip values to prevent both overflow and underflow issues."""
158
+ # Handle very small values (close to zero)
159
+ values_magnitude = np.abs(values)
160
+ too_small_mask = values_magnitude < min_safe_value
161
+
162
+ # Handle very large values
163
+ too_large_mask = values_magnitude > max_safe_value
164
+
165
+ if np.any(too_small_mask) and self.verbose:
166
+ small_count = np.sum(too_small_mask)
167
+ self.logger.info(f"Warning: {small_count} very small {name} values detected (< {min_safe_value:.2e})")
168
+
169
+ if np.any(too_large_mask) and self.verbose:
170
+ large_count = np.sum(too_large_mask)
171
+ self.logger.info(f"Warning: {large_count} very large {name} values detected (> {max_safe_value:.2e})")
172
+
173
+ # Clip small values to minimum safe value (preserving sign)
174
+ values_safe = np.where(too_small_mask,
175
+ np.sign(values) * min_safe_value,
176
+ values)
177
+
178
+ # Clip large values to maximum safe value (preserving sign)
179
+ values_safe = np.where(too_large_mask,
180
+ np.sign(values_safe) * max_safe_value,
181
+ values_safe)
182
+
183
+ return values_safe
184
+
185
+ # Apply safe clipping to both fidelities and irrelevances
186
+ fidelities_safe = safe_clip_values(fidelities, "fidelity")
187
+ irrelevances_safe = safe_clip_values(irrelevances, "irrelevance")
188
+
189
+ # Calculate weighted means (f̄Q and h̄Q from equation 15.35)
190
+ mean_fidelity = np.sum(weights * fidelities_safe, axis=0) / np.sum(weights) # f̄Q
191
+ mean_irrelevance = np.sum(weights * irrelevances_safe, axis=0) / np.sum(weights) # h̄Q
192
+
193
+ # Apply safe clipping to means as well
194
+ mean_fidelity = safe_clip_values(mean_fidelity, "mean_fidelity")
195
+ mean_irrelevance = safe_clip_values(mean_irrelevance, "mean_irrelevance")
196
+
197
+ # Convert to complex for robust calculation with overflow protection
198
+ f_complex = mean_fidelity.astype(complex)
199
+ h_complex = mean_irrelevance.astype(complex)
200
+
201
+ # Calculate the complex square root with comprehensive protection
202
+ # Check magnitudes before squaring
203
+ f_magnitude = np.abs(f_complex)
204
+ h_magnitude = np.abs(h_complex)
205
+ sqrt_max = np.sqrt(max_safe_value)
206
+ sqrt_min = np.sqrt(min_safe_value)
207
+
208
+ # Check for both very large and very small values before squaring
209
+ f_too_large = f_magnitude > sqrt_max
210
+ h_too_large = h_magnitude > sqrt_max
211
+ f_too_small = f_magnitude < sqrt_min
212
+ h_too_small = h_magnitude < sqrt_min
213
+
214
+ if np.any(f_too_large) or np.any(h_too_large) or np.any(f_too_small) or np.any(h_too_small):
215
+ if self.verbose:
216
+ self.logger.info("Warning: Extreme values detected in complex calculation. Using scaled approach.")
217
+
218
+ # Scale problematic values to safe range
219
+ f_scaled = np.where(f_too_large, sqrt_max * (f_complex / f_magnitude), f_complex)
220
+ f_scaled = np.where(f_too_small, sqrt_min * (f_complex / f_magnitude), f_scaled)
221
+
222
+ h_scaled = np.where(h_too_large, sqrt_max * (h_complex / h_magnitude), h_complex)
223
+ h_scaled = np.where(h_too_small, sqrt_min * (h_complex / h_magnitude), h_scaled)
224
+
225
+ diff_squared_complex = f_scaled**2 - h_scaled**2
226
+ scale_factor = 1.0
227
+ else:
228
+ diff_squared_complex = f_complex**2 - h_complex**2
229
+ scale_factor = 1.0
230
+
231
+ # Calculate denominator with protection against both zero and very small values
232
+ denominator_magnitude = np.abs(diff_squared_complex)
233
+ denominator_too_small = denominator_magnitude < min_safe_value
234
+
235
+ if np.any(denominator_too_small):
236
+ if self.verbose:
237
+ small_denom_count = np.sum(denominator_too_small)
238
+ self.logger.info(f"Warning: {small_denom_count} very small denominators in complex calculation.")
239
+
240
+ # Use sqrt with protection
241
+ denominator_complex = np.sqrt(diff_squared_complex)
242
+ denominator_complex = np.where(denominator_magnitude < min_safe_value,
243
+ min_safe_value + 0j, denominator_complex)
244
+
245
+ # Calculate hZ,j using complex arithmetic with comprehensive protection
246
+ h_zj_complex = h_complex / denominator_complex
247
+
248
+ # **FIX THE OVERFLOW ISSUE HERE**
249
+ # Check magnitude of h_zj_complex BEFORE any squaring operation
250
+ h_zj_magnitude = np.abs(h_zj_complex)
251
+ sqrt_max_for_square = np.sqrt(sqrt_max) # Even more conservative for squaring
252
+
253
+ h_zj_too_large_for_square = h_zj_magnitude > sqrt_max_for_square
254
+ h_zj_too_small = h_zj_magnitude < sqrt_min
255
+
256
+ if np.any(h_zj_too_large_for_square):
257
+ if self.verbose:
258
+ large_count = np.sum(h_zj_too_large_for_square)
259
+ self.logger.info(f"Warning: {large_count} h_zj values too large for safe squaring. Using approximation.")
260
+
261
+ # For very large |h_zj|, use the mathematical limit without squaring
262
+ # When |h_zj| >> 1: h_zj / sqrt(1 + h_zj²) ≈ h_zj / |h_zj| = sign(h_zj)
263
+
264
+ # Safe calculation for non-large values only
265
+ h_zj_safe = np.where(h_zj_too_large_for_square, 0, h_zj_complex) # Zero out large values
266
+ h_zj_squared_safe = h_zj_safe**2 # Only square the safe values
267
+
268
+ # Calculate result for safe values
269
+ safe_result = h_zj_safe / np.sqrt(1 + h_zj_squared_safe)
270
+
271
+ # Use approximation for large values
272
+ large_result = h_zj_complex / h_zj_magnitude
273
+
274
+ # Combine results
275
+ h_gq_complex = np.where(h_zj_too_large_for_square, large_result, safe_result)
276
+
277
+ elif np.any(h_zj_too_small):
278
+ self.logger.info("Warning: Very small h_zj values in complex calculation.")
279
+
280
+ # For very small |h_zj|: h_zj / sqrt(1 + h_zj²) ≈ h_zj (linear approximation)
281
+ h_gq_complex = np.where(h_zj_too_small,
282
+ h_zj_complex, # linear approximation - no squaring!
283
+ h_zj_complex / np.sqrt(1 + h_zj_complex**2)) # safe squaring only
284
+ else:
285
+ # All values are safe for squaring - proceed normally
286
+ try:
287
+ # Only square when we know it's safe
288
+ h_zj_squared = h_zj_complex**2
289
+ h_gq_complex = h_zj_complex / np.sqrt(1 + h_zj_squared)
290
+ except (OverflowError, FloatingPointError, ZeroDivisionError) as e:
291
+ # log error
292
+ error_msg = f"Exception in h_gq calculation: {e}"
293
+ self.params['errors'].append({
294
+ 'method': '_calculate_pdf_from_moments',
295
+ 'error': error_msg,
296
+ 'exception_type': type(e).__name__
297
+ })
298
+ if self.verbose:
299
+ self.logger.info(f"Warning: Unexpected exception in h_gq calculation ({e}). Using approximation.")
300
+ # Fallback to magnitude-based approach
301
+ h_gq_complex = h_zj_complex / (h_zj_magnitude + min_safe_value)
302
+
303
+ # Extract meaningful results from complex calculation
304
+ h_gq_real = np.real(h_gq_complex)
305
+ h_gq_imag = np.imag(h_gq_complex)
306
+ h_gq_magnitude = np.abs(h_gq_complex)
307
+
308
+ # Determine how to handle complex results with small value protection
309
+ is_purely_real = np.abs(h_gq_imag) < min_safe_value
310
+ is_real_dominant = np.abs(h_gq_real) >= np.abs(h_gq_imag)
311
+
312
+ if self.verbose and not np.all(is_purely_real):
313
+ complex_count = np.sum(~is_purely_real)
314
+ self.logger.info(f"Info: {complex_count} points have complex intermediate results.")
315
+
316
+ # Strategy for handling complex results with numerical stability
317
+ h_gq_final = np.where(is_purely_real,
318
+ h_gq_real, # Use real part for essentially real results
319
+ np.where(is_real_dominant,
320
+ h_gq_real, # Use real part when real component dominates
321
+ h_gq_magnitude * np.sign(h_gq_real))) # Use magnitude with sign
322
+
323
+ # Clip to reasonable range to prevent further overflow/underflow
324
+ h_gq_final = np.clip(h_gq_final, -10, 10)
325
+
326
+ # Calculate QGDF using the processed hGQ values
327
+ qgdf_from_hgq = (1 + h_gq_final) / 2
328
+
329
+ # Also calculate using direct ratio as backup with small value protection
330
+ mean_fidelity_safe = np.where(np.abs(mean_fidelity) < min_safe_value,
331
+ np.sign(mean_fidelity) * min_safe_value, mean_fidelity)
332
+
333
+ ratio = mean_irrelevance / mean_fidelity_safe
334
+
335
+ # Handle extreme ratios (both large and small)
336
+ ratio_magnitude = np.abs(ratio)
337
+ ratio_too_large = ratio_magnitude > 10
338
+ ratio_too_small = ratio_magnitude < min_safe_value
339
+
340
+ ratio_safe = np.where(ratio_too_large, 10 * np.tanh(ratio / 10), ratio)
341
+ ratio_safe = np.where(ratio_too_small, np.sign(ratio) * min_safe_value, ratio_safe)
342
+
343
+ qgdf_from_ratio = (1 - ratio_safe) / 2
344
+
345
+ # Use complex method for difficult cases, ratio method for simple cases
346
+ use_complex_method = ~is_purely_real | ratio_too_large | ratio_too_small
347
+
348
+ qgdf_values = np.where(use_complex_method,
349
+ qgdf_from_hgq,
350
+ qgdf_from_ratio)
351
+
352
+ # Apply final constraints
353
+ qgdf_values = np.clip(qgdf_values, 0, 1)
354
+ qgdf_values = np.maximum.accumulate(qgdf_values)
355
+
356
+ return qgdf_values.flatten()
357
+
358
+ def _estimate_qgdf_from_moments(self, fidelities, irrelevances):
359
+ """Main QGDF estimation method with complex number fallback."""
360
+ self.logger.info("Estimating QGDF from moments with fallback.")
361
+ try:
362
+ # First try the complex number approach
363
+ return self._estimate_qgdf_from_moments_complex(fidelities, irrelevances)
364
+ except Exception as e:
365
+ # log error
366
+ error_msg = f"Exception in complex QGDF estimation: {e}"
367
+ self.logger.error(error_msg)
368
+ if self.verbose:
369
+ self.logger.info(f"Complex method failed: {e}. Using fallback approach.")
370
+ self.params['errors'].append({
371
+ 'method': '_estimate_qgdf_from_moments',
372
+ 'error': error_msg,
373
+ 'exception_type': type(e).__name__
374
+ })
375
+
376
+ # Fallback to the robust real-number approach
377
+ return self._estimate_qgdf_from_moments_fallback(fidelities, irrelevances)
378
+
379
+ def _estimate_qgdf_from_moments_fallback(self, fidelities, irrelevances):
380
+ """Fallback method using real numbers only."""
381
+ self.logger.info("Estimating QGDF using fallback real-number approach.")
382
+ weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
383
+
384
+ # Calculate weighted means
385
+ mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
386
+ mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
387
+
388
+ # Direct ratio approach (always mathematically valid)
389
+ mean_fidelity_safe = np.where(np.abs(mean_fidelity) < self._NUMERICAL_EPS,
390
+ np.sign(mean_fidelity) * self._NUMERICAL_EPS, mean_fidelity)
391
+
392
+ ratio = mean_irrelevance / mean_fidelity_safe
393
+ ratio_limited = np.where(np.abs(ratio) > 5, 5 * np.tanh(ratio / 5), ratio)
394
+
395
+ # hzj NOTE for QGDF book eq not working properly
396
+ # hzj = mean_irrelevance / (np.sqrt(mean_fidelity_safe**2 + mean_irrelevance**2))
397
+
398
+ # # hgq
399
+ # h_gq = hzj / (np.sqrt(1 + hzj**2))
400
+
401
+ # qgdf_values = (1 + h_gq/mean_fidelity_safe) / 2
402
+
403
+ qgdf_values = (1 - ratio_limited) / 2
404
+ qgdf_values = np.clip(qgdf_values, 0, 1)
405
+ qgdf_values = np.maximum.accumulate(qgdf_values)
406
+
407
+ return qgdf_values.flatten()
408
+
409
+ # NOTE fi and hi derivative base logic
410
+ # this give little of PDF
411
+ # can be improved
412
+ # def _calculate_pdf_from_moments(self, fidelities, irrelevances):
413
+ # """Calculate first derivative of QGDF (which is the PDF) from stored fidelities and irrelevances."""
414
+ # if fidelities is None or irrelevances is None:
415
+ # # log error
416
+ # error_msg = "Fidelities and irrelevances must be calculated first"
417
+ # self.params['errors'].append({
418
+ # 'method': '_calculate_pdf_from_moments',
419
+ # 'error': error_msg,
420
+ # 'exception_type': 'ValueError'
421
+ # })
422
+ # raise ValueError("Fidelities and irrelevances must be calculated first")
423
+
424
+ # weights = self.weights.reshape(-1, 1)
425
+
426
+ # # First order moments using QGDF's fj and hj
427
+ # f1 = np.sum(weights * fidelities, axis=0) / np.sum(weights) # f̄Q
428
+ # h1 = np.sum(weights * irrelevances, axis=0) / np.sum(weights) # h̄Q
429
+
430
+ # # Second order moments (scaled by S as in EGDF pattern)
431
+ # f2s = np.sum(weights * (fidelities**2 / self.S_opt), axis=0) / np.sum(weights) # F2
432
+ # h2s = np.sum(weights * (irrelevances**2 / self.S_opt), axis=0) / np.sum(weights) # H2
433
+ # fhs = np.sum(weights * (fidelities * irrelevances / self.S_opt), axis=0) / np.sum(weights) # FH
434
+
435
+ # # Calculate Nj = Σ(1/f²ᵢ,ⱼ) + Σ H²ᵢ,ⱼ (from equation 10.8)
436
+ # eps = np.finfo(float).eps
437
+ # f_inv_squared = np.sum(weights * (1 / (fidelities**2 + eps)), axis=0) / np.sum(weights)
438
+ # h_squared = np.sum(weights * irrelevances**2, axis=0) / np.sum(weights)
439
+ # Nj = f_inv_squared + h_squared
440
+ # Nj = np.where(Nj == 0, eps, Nj)
441
+
442
+ # # Calculate denominator w = (2 * Nj)^2 for QGDF derivative
443
+ # w = (2 * Nj)**2
444
+ # w = np.where(w == 0, eps, w)
445
+
446
+ # # QGDF PDF formula: dQGDF/dZ₀ = (1/SZ₀) * (1/(2 * Nⱼ²)) * [F2 - H2 + f̄_E * h̄_E * FH]
447
+ # numerator = f2s - h2s + f1 * h1 * fhs
448
+ # first_derivative = (1 / self.S_opt) * numerator / ( Nj**2)
449
+
450
+ # return first_derivative.flatten()
451
+
452
+ def _calculate_pdf_from_moments(self, fidelities, irrelevances):
453
+ self.logger.info("Calculating PDF from moments.")
454
+ """Calculate PDF from fidelities and irrelevances with corrected mathematical formulation."""
455
+ self.logger.info("Calculating PDF from moments")
456
+ if fidelities is None or irrelevances is None:
457
+ # log error
458
+ self.logger.error("Fidelities and irrelevances must be calculated first.")
459
+ raise ValueError("Fidelities and irrelevances must be calculated first")
460
+
461
+ weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
462
+
463
+ # Numerical stability constants
464
+ max_safe_value = np.sqrt(np.finfo(float).max) / 10
465
+ min_safe_value = np.sqrt(np.finfo(float).eps) * 100
466
+
467
+ def safe_clip_for_pdf(values, name="values"):
468
+ """Safely clip values for PDF calculations."""
469
+ values_magnitude = np.abs(values)
470
+ too_small_mask = values_magnitude < min_safe_value
471
+ too_large_mask = values_magnitude > max_safe_value
472
+
473
+ values_safe = np.where(too_small_mask,
474
+ np.sign(values) * min_safe_value, values)
475
+ values_safe = np.where(too_large_mask,
476
+ np.sign(values_safe) * max_safe_value, values_safe)
477
+ return values_safe
478
+
479
+ # Apply clipping
480
+ fidelities_safe = safe_clip_for_pdf(fidelities, "fidelity")
481
+ irrelevances_safe = safe_clip_for_pdf(irrelevances, "irrelevance")
482
+
483
+ # Calculate weighted means
484
+ mean_fidelity = np.sum(weights * fidelities_safe, axis=0) / np.sum(weights) # f̄Q
485
+ mean_irrelevance = np.sum(weights * irrelevances_safe, axis=0) / np.sum(weights) # h̄Q
486
+
487
+ # Apply safety to means
488
+ mean_fidelity = safe_clip_for_pdf(mean_fidelity, "mean_fidelity")
489
+ mean_irrelevance = safe_clip_for_pdf(mean_irrelevance, "mean_irrelevance")
490
+
491
+ # CORRECTED PDF CALCULATION FOR QGDF
492
+ # The PDF should be the derivative of QGDF with respect to the data points
493
+ # Based on QGDF = (1 + h_GQ)/2, where h_GQ = h̄Q/√(f̄Q² - h̄Q²)/√(1 + (h̄Q/√(f̄Q² - h̄Q²))²)
494
+
495
+ S_value = self.S_opt if hasattr(self, 'S_opt') else 1.0
496
+
497
+ # Calculate the denominator √(f̄Q² - h̄Q²) with protection
498
+ mean_fidelity_safe = np.where(np.abs(mean_fidelity) < min_safe_value,
499
+ np.sign(mean_fidelity) * min_safe_value, mean_fidelity)
500
+
501
+ # For QGDF, the correct mathematical relationship is different from what's implemented
502
+ # The PDF should be derived from d(QGDF)/dz, not from an empirical ratio formula
503
+
504
+ # Corrected approach: Use the mathematical derivative of the QGDF equation
505
+ # d(QGDF)/dz = (1/2) * d(h_GQ)/dz
506
+
507
+ # Calculate h_Z,j = h̄Q / √(f̄Q² - h̄Q²)
508
+ denominator_squared = mean_fidelity_safe**2 - mean_irrelevance**2
509
+
510
+ # Ensure denominator is positive and safe
511
+ denominator_squared = np.maximum(denominator_squared, min_safe_value)
512
+ denominator = np.sqrt(denominator_squared)
513
+
514
+ h_zj = mean_irrelevance / denominator
515
+
516
+ # Clip h_zj to avoid overflow
517
+ h_zj = np.clip(h_zj, 1, 1e12)
518
+
519
+ # Calculate h_GQ = h_Z,j / √(1 + h_Z,j²)
520
+ h_zj_squared = np.minimum(h_zj**2, max_safe_value) # Prevent overflow
521
+ h_gq_denominator = np.sqrt(1 + h_zj_squared)
522
+ h_gq = h_zj / h_gq_denominator
523
+
524
+ # For PDF calculation, we need the derivative of h_GQ with respect to z
525
+ # This involves second-order moments which should be calculated properly
526
+
527
+ # Second order moments (this is where the original method had issues)
528
+ f2 = np.sum(weights * fidelities_safe**2, axis=0) / np.sum(weights)
529
+ h2 = np.sum(weights * irrelevances_safe**2, axis=0) / np.sum(weights)
530
+ fh = np.sum(weights * fidelities_safe * irrelevances_safe, axis=0) / np.sum(weights)
531
+
532
+ # Apply safety to second moments
533
+ f2 = safe_clip_for_pdf(f2, "f2")
534
+ h2 = safe_clip_for_pdf(h2, "h2")
535
+ fh = safe_clip_for_pdf(fh, "fh")
536
+
537
+ # Corrected PDF formula for QGDF:
538
+ # PDF = (1/S) * derivative_term where derivative_term comes from differentiating h_GQ
539
+
540
+ # This is a simplified but more mathematically sound approach
541
+ # clip values to avoid overflow in multiplications [0, 1e12]
542
+ mean_irrelevance = np.clip(mean_irrelevance, 1, 1e12)
543
+ mean_fidelity = np.clip(mean_fidelity, 0, 1e12)
544
+ fh = np.clip(fh, -1e12, 1e12)
545
+ derivative_factor = f2 - h2 + mean_fidelity * mean_irrelevance * fh
546
+
547
+ # Apply scaling and ensure positive values
548
+ pdf_values = (1 / S_value) * np.maximum(derivative_factor, min_safe_value)
549
+
550
+ # Final clipping
551
+ pdf_values = np.clip(pdf_values, min_safe_value, max_safe_value)
552
+
553
+ return pdf_values.flatten()
554
+
555
+ def _calculate_final_results(self):
556
+ """Calculate final QGDF and PDF with optimized parameters."""
557
+ self.logger.info("Calculating final QGDF and PDF results.")
558
+ # Convert to infinite domain
559
+ # zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
560
+ zi_d = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
561
+ self.zi = zi_d
562
+
563
+ # Calculate QGDF and get moments
564
+ qgdf_values, fj, hj = self._compute_qgdf_core(self.S_opt, self.LB_opt, self.UB_opt)
565
+
566
+ # Store for derivative calculations
567
+ self.fj = fj
568
+ self.hj = hj
569
+ self.qgdf = qgdf_values
570
+ self.pdf = self._calculate_pdf_from_moments(fj, hj)
571
+
572
+ if self.catch:
573
+ self.params.update({
574
+ 'qgdf': self.qgdf.copy(),
575
+ 'pdf': self.pdf.copy(),
576
+ 'zi': self.zi.copy()
577
+ })
578
+
579
+ def _generate_smooth_curves(self):
580
+ """Generate smooth curves for plotting and analysis."""
581
+ self.logger.info("Generating smooth curves for QGDF and PDF.")
582
+ try:
583
+ # Generate smooth QGDF and PDF
584
+ smooth_qgdf, self.smooth_fj, self.smooth_hj = self._compute_qgdf_core(
585
+ self.S_opt, self.LB_opt, self.UB_opt,
586
+ zi_data=self.z_points_n, zi_eval=self.z
587
+ )
588
+
589
+ smooth_pdf = self._calculate_pdf_from_moments(self.smooth_fj, self.smooth_hj)
590
+
591
+ self.qgdf_points = smooth_qgdf
592
+ self.pdf_points = smooth_pdf
593
+
594
+ # Store zi_n for derivative calculations
595
+ self.zi_n = DataConversion._convert_fininf(self.z_points_n, self.LB_opt, self.UB_opt)
596
+
597
+ # Mark as generated
598
+ self._computation_cache['smooth_curves_generated'] = True
599
+
600
+ if self.catch:
601
+ self.params.update({
602
+ 'qgdf_points': self.qgdf_points.copy(),
603
+ 'pdf_points': self.pdf_points.copy(),
604
+ 'zi_points': self.zi_n.copy()
605
+ })
606
+
607
+ self.logger.info(f"Generated smooth curves with {self.n_points} points.")
608
+
609
+ except Exception as e:
610
+ # Log the error
611
+ error_msg = f"Could not generate smooth curves: {e}"
612
+ self.logger.error(error_msg)
613
+ self.params['errors'].append({
614
+ 'method': '_generate_smooth_curves',
615
+ 'error': error_msg,
616
+ 'exception_type': type(e).__name__
617
+ })
618
+
619
+ self.logger.warning(f"Could not generate smooth curves: {e}")
620
+ # Create fallback points using original data
621
+ self.qgdf_points = self.qgdf.copy() if hasattr(self, 'qgdf') else None
622
+ self.pdf_points = self.pdf.copy() if hasattr(self, 'pdf') else None
623
+ self._computation_cache['smooth_curves_generated'] = False
624
+
625
+ def _get_results(self)-> dict:
626
+ """Return fitting results."""
627
+ self.logger.info("Getting results from QGDF fitting.")
628
+
629
+ if not self._fitted:
630
+ error_msg = "Must fit QGDF before getting results."
631
+ self.logger.error(error_msg)
632
+ self.params['errors'].append({
633
+ 'method': '_get_results',
634
+ 'error': error_msg,
635
+ 'exception_type': 'RuntimeError'
636
+ })
637
+ raise RuntimeError("Must fit QGDF before getting results.")
638
+
639
+ # selected key from params if exists
640
+ keys = ['DLB', 'DUB', 'LB', 'UB', 'S_opt', 'z0', 'qgdf', 'pdf',
641
+ 'qgdf_points', 'pdf_points', 'zi', 'zi_points', 'weights']
642
+ results = {key: self.params.get(key) for key in keys if key in self.params}
643
+ return results
644
+
645
+
646
+ def _plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
647
+ """Enhanced plotting with better organization."""
648
+ self.logger.info("Plotting QGDF and PDF results.")
649
+ import matplotlib.pyplot as plt
650
+
651
+ if plot_smooth and (len(self.data) > self.max_data_size) and self.verbose:
652
+ self.logger.warning(f"Given data size ({len(self.data)}) exceeds max_data_size ({self.max_data_size}). For optimal compute performance, set 'plot_smooth=False', or 'max_data_size' to a larger value whichever is appropriate.")
653
+
654
+ if not self.catch:
655
+ self.logger.warning("Plot is not available with argument catch=False")
656
+ return
657
+
658
+ if not self._fitted:
659
+ self.logger.error("Must fit QGDF before plotting.")
660
+ raise RuntimeError("Must fit QGDF before plotting.")
661
+
662
+ # Validate plot parameter
663
+ if plot not in ['gdf', 'pdf', 'both']:
664
+ self.logger.error("Invalid plot parameter.")
665
+ raise ValueError("plot parameter must be 'gdf', 'pdf', or 'both'")
666
+
667
+ # Check data availability
668
+ if plot in ['gdf', 'both'] and self.params.get('qgdf') is None:
669
+ self.logger.error("QGDF must be calculated before plotting GDF.")
670
+ raise ValueError("QGDF must be calculated before plotting GDF")
671
+ if plot in ['pdf', 'both'] and self.params.get('pdf') is None:
672
+ self.logger.error("PDF must be calculated before plotting PDF.")
673
+ raise ValueError("PDF must be calculated before plotting PDF.")
674
+
675
+ # Prepare data
676
+ x_points = self.data
677
+ qgdf_plot = self.params.get('qgdf')
678
+ pdf_plot = self.params.get('pdf')
679
+ wedf = self.params.get('wedf')
680
+ ksdf = self.params.get('ksdf')
681
+
682
+ # Check smooth plotting availability
683
+ has_smooth = (hasattr(self, 'di_points_n') and hasattr(self, 'qgdf_points')
684
+ and hasattr(self, 'pdf_points') and self.di_points_n is not None
685
+ and self.qgdf_points is not None and self.pdf_points is not None)
686
+ plot_smooth = plot_smooth and has_smooth
687
+
688
+ # Create figure
689
+ fig, ax1 = plt.subplots(figsize=figsize)
690
+
691
+ # Plot QGDF if requested
692
+ if plot in ['gdf', 'both']:
693
+ self._plot_qgdf(ax1, x_points, qgdf_plot, plot_smooth, extra_df, wedf, ksdf)
694
+
695
+ # Plot PDF if requested
696
+ if plot in ['pdf', 'both']:
697
+ if plot == 'pdf':
698
+ self._plot_pdf(ax1, x_points, pdf_plot, plot_smooth, is_secondary=False)
699
+ else:
700
+ ax2 = ax1.twinx()
701
+ self._plot_pdf(ax2, x_points, pdf_plot, plot_smooth, is_secondary=True)
702
+
703
+ # Add bounds and formatting
704
+ self._add_plot_formatting(ax1, plot, bounds)
705
+
706
+ # Add Z0 vertical line if available
707
+ if hasattr(self, 'z0') and self.z0 is not None:
708
+ ax1.axvline(x=self.z0, color='magenta', linestyle='-.', linewidth=1,
709
+ alpha=0.8, label=f'Z0={self.z0:.3f}')
710
+ # Update legend to include Z0
711
+ ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
712
+
713
+ plt.tight_layout()
714
+ plt.show()
715
+
716
+ def _plot_qgdf(self, ax, x_points, qgdf_plot, plot_smooth, extra_df, wedf, ksdf):
717
+ """Plot QGDF components."""
718
+ self.logger.info("Plotting QGDF components.")
719
+ if plot_smooth and hasattr(self, 'qgdf_points') and self.qgdf_points is not None:
720
+ ax.plot(x_points, qgdf_plot, 'o', color='blue', label='QGDF', markersize=4)
721
+ ax.plot(self.di_points_n, self.qgdf_points, color='blue',
722
+ linestyle='-', linewidth=2, alpha=0.8)
723
+ else:
724
+ ax.plot(x_points, qgdf_plot, 'o-', color='blue', label='QGDF',
725
+ markersize=4, linewidth=1, alpha=0.8)
726
+
727
+ if extra_df:
728
+ if wedf is not None:
729
+ ax.plot(x_points, wedf, 's', color='lightblue',
730
+ label='WEDF', markersize=3, alpha=0.8)
731
+ if ksdf is not None:
732
+ ax.plot(x_points, ksdf, 's', color='cyan',
733
+ label='KS Points', markersize=3, alpha=0.8)
734
+
735
+ ax.set_ylabel('QGDF', color='blue')
736
+ ax.tick_params(axis='y', labelcolor='blue')
737
+ ax.set_ylim(0, 1)
738
+
739
+ def _plot_pdf(self, ax, x_points, pdf_plot, plot_smooth, is_secondary=False):
740
+ """Plot PDF components."""
741
+ self.logger.info("Plotting PDF components.")
742
+ color = 'red'
743
+
744
+ if plot_smooth and hasattr(self, 'pdf_points') and self.pdf_points is not None:
745
+ ax.plot(x_points, pdf_plot, 'o', color=color, label='PDF', markersize=4)
746
+ ax.plot(self.di_points_n, self.pdf_points, color=color,
747
+ linestyle='-', linewidth=2, alpha=0.8)
748
+ max_pdf = np.max(self.pdf_points)
749
+ else:
750
+ ax.plot(x_points, pdf_plot, 'o-', color=color, label='PDF',
751
+ markersize=4, linewidth=1, alpha=0.8)
752
+ max_pdf = np.max(pdf_plot)
753
+
754
+ ax.set_ylabel('PDF', color=color)
755
+ ax.tick_params(axis='y', labelcolor=color)
756
+ ax.set_ylim(0, max_pdf * 1.1)
757
+
758
+ if is_secondary:
759
+ ax.legend(loc='upper right', bbox_to_anchor=(1, 1))
760
+
761
+ def _add_plot_formatting(self, ax1, plot, bounds):
762
+ """Add formatting, bounds, and legends to plot."""
763
+ self.logger.info("Adding plot formatting and bounds.")
764
+ ax1.set_xlabel('Data Points')
765
+
766
+ # Add bounds if requested
767
+ if bounds:
768
+ bound_info = [
769
+ (self.params.get('DLB'), 'green', '-', 'DLB'),
770
+ (self.params.get('DUB'), 'orange', '-', 'DUB'),
771
+ (self.params.get('LB'), 'purple', '--', 'LB'),
772
+ (self.params.get('UB'), 'brown', '--', 'UB')
773
+ ]
774
+
775
+ for bound, color, style, name in bound_info:
776
+ if bound is not None:
777
+ ax1.axvline(x=bound, color=color, linestyle=style, linewidth=2,
778
+ alpha=0.8, label=f"{name}={bound:.3f}")
779
+
780
+ # Add shaded regions
781
+ if self.params.get('LB') is not None:
782
+ ax1.axvspan(self.data.min(), self.params['LB'], alpha=0.15, color='purple')
783
+ if self.params.get('UB') is not None:
784
+ ax1.axvspan(self.params['UB'], self.data.max(), alpha=0.15, color='brown')
785
+
786
+ # Set limits and add grid
787
+ data_range = self.params['DUB'] - self.params['DLB']
788
+ padding = data_range * 0.1
789
+ ax1.set_xlim(self.params['DLB'] - padding, self.params['DUB'] + padding)
790
+
791
+ # Set title
792
+ titles = {
793
+ 'gdf': 'QGDF' + (' with Bounds' if bounds else ''),
794
+ 'pdf': 'PDF' + (' with Bounds' if bounds else ''),
795
+ 'both': 'QGDF and PDF' + (' with Bounds' if bounds else '')
796
+ }
797
+
798
+ ax1.set_title(titles[plot])
799
+ ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
800
+ ax1.grid(True, alpha=0.3)
801
+
802
+
803
+ def _get_qgdf_second_derivative(self):
804
+ """Calculate second derivative of QGDF with corrected mathematical formulation."""
805
+ self.logger.info("Calculating second derivative of QGDF.")
806
+ if self.fj is None or self.hj is None:
807
+ self.logger.error("Fidelities and irrelevances must be calculated before second derivative estimation.")
808
+ raise ValueError("Fidelities and irrelevances must be calculated before second derivative estimation.")
809
+
810
+ weights = self.weights.reshape(-1, 1)
811
+
812
+ # Calculate all required moments
813
+ f1 = np.sum(weights * self.fj, axis=0) / np.sum(weights) # f̄Q
814
+ h1 = np.sum(weights * self.hj, axis=0) / np.sum(weights) # h̄Q
815
+ f2 = np.sum(weights * self.fj**2, axis=0) / np.sum(weights)
816
+ h2 = np.sum(weights * self.hj**2, axis=0) / np.sum(weights)
817
+ fh = np.sum(weights * self.fj * self.hj, axis=0) / np.sum(weights)
818
+
819
+ # Additional moments for second derivative
820
+ f3 = np.sum(weights * self.fj**3, axis=0) / np.sum(weights)
821
+ h3 = np.sum(weights * self.hj**3, axis=0) / np.sum(weights)
822
+ f2h = np.sum(weights * self.fj**2 * self.hj, axis=0) / np.sum(weights)
823
+ fh2 = np.sum(weights * self.fj * self.hj**2, axis=0) / np.sum(weights)
824
+
825
+ eps = np.finfo(float).eps
826
+ f1_safe = np.where(np.abs(f1) < eps, np.sign(f1) * eps, f1)
827
+
828
+ # CORRECTED: Based on the actual QGDF equation QGDF = (1 + h_GQ)/2
829
+ # where h_GQ = h_zj / √(1 + h_zj²) and h_zj = h̄Q / √(f̄Q² - h̄Q²)
830
+
831
+ # Calculate first derivatives of weighted means
832
+ # These are derived from the variance-covariance relationships
833
+ df1_dz = (f2 - f1**2) / self.S_opt # Corrected: variance formula
834
+ dh1_dz = (h2 - h1**2) / self.S_opt # Corrected: variance formula
835
+
836
+ # Calculate second derivatives
837
+ d2f1_dz2 = (f3 - 3*f1*f2 + 2*f1**3) / (self.S_opt**2) # Third central moment
838
+ d2h1_dz2 = (h3 - 3*h1*h2 + 2*h1**3) / (self.S_opt**2) # Third central moment
839
+
840
+ # Calculate derivatives of h_zj = h̄Q / √(f̄Q² - h̄Q²)
841
+ denominator_squared = f1_safe**2 - h1**2
842
+ denominator_squared = np.maximum(denominator_squared, eps)
843
+ denominator = np.sqrt(denominator_squared)
844
+
845
+ h_zj = h1 / denominator
846
+
847
+ # First derivative of h_zj using quotient rule
848
+ d_numerator = dh1_dz
849
+ d_denominator = (f1_safe * df1_dz - h1 * dh1_dz) / denominator
850
+
851
+ dh_zj_dz = (d_numerator * denominator - h_zj * d_denominator) / denominator
852
+
853
+ # Second derivative of h_zj (more complex)
854
+ d2_numerator = d2h1_dz2
855
+ # For d²(denominator), we need more careful calculation
856
+ temp_term = f1_safe * d2f1_dz2 - h1 * d2h1_dz2 - df1_dz**2 - dh1_dz**2
857
+ d2_denominator = (temp_term * denominator - d_denominator**2) / denominator
858
+
859
+ d2h_zj_dz2 = ((d2_numerator * denominator - d_numerator * d_denominator) * denominator -
860
+ (d_numerator * denominator - h_zj * d_denominator) * d_denominator) / (denominator**2)
861
+
862
+ # Calculate derivatives of h_GQ = h_zj / √(1 + h_zj²)
863
+ h_zj_squared = np.minimum(h_zj**2, 1e10) # Prevent overflow
864
+ h_gq_denominator = np.sqrt(1 + h_zj_squared)
865
+
866
+ # First derivative of h_GQ
867
+ dh_gq_dz = dh_zj_dz / (h_gq_denominator**3)
868
+
869
+ # Second derivative of h_GQ
870
+ term1 = d2h_zj_dz2 / (h_gq_denominator**3)
871
+ term2 = -3 * dh_zj_dz**2 * h_zj / (h_gq_denominator**5)
872
+
873
+ d2h_gq_dz2 = term1 + term2
874
+
875
+ # Finally, second derivative of QGDF = (1/2) * d²(h_GQ)/dz²
876
+ second_derivative = 0.5 * d2h_gq_dz2
877
+
878
+ return second_derivative.flatten()
879
+
880
+ def _get_qgdf_third_derivative(self):
881
+ """Calculate third derivative of QGDF with corrected mathematical formulation."""
882
+ self.logger.info("Calculating third derivative of QGDF.")
883
+ if self.fj is None or self.hj is None:
884
+ self.logger.error("Fidelities and irrelevances must be calculated before third derivative estimation.")
885
+ raise ValueError("Fidelities and irrelevances must be calculated before third derivative estimation.")
886
+
887
+ weights = self.weights.reshape(-1, 1)
888
+
889
+ # Calculate all required moments up to 4th order
890
+ f1 = np.sum(weights * self.fj, axis=0) / np.sum(weights)
891
+ h1 = np.sum(weights * self.hj, axis=0) / np.sum(weights)
892
+ f2 = np.sum(weights * self.fj**2, axis=0) / np.sum(weights)
893
+ h2 = np.sum(weights * self.hj**2, axis=0) / np.sum(weights)
894
+ f3 = np.sum(weights * self.fj**3, axis=0) / np.sum(weights)
895
+ h3 = np.sum(weights * self.hj**3, axis=0) / np.sum(weights)
896
+ f4 = np.sum(weights * self.fj**4, axis=0) / np.sum(weights)
897
+ h4 = np.sum(weights * self.hj**4, axis=0) / np.sum(weights)
898
+
899
+ eps = np.finfo(float).eps
900
+ f1_safe = np.where(np.abs(f1) < eps, np.sign(f1) * eps, f1)
901
+
902
+ # Calculate derivatives up to third order
903
+ df1_dz = (f2 - f1**2) / self.S_opt
904
+ dh1_dz = (h2 - h1**2) / self.S_opt
905
+
906
+ d2f1_dz2 = (f3 - 3*f1*f2 + 2*f1**3) / (self.S_opt**2)
907
+ d2h1_dz2 = (h3 - 3*h1*h2 + 2*h1**3) / (self.S_opt**2)
908
+
909
+ d3f1_dz3 = (f4 - 4*f1*f3 + 6*f1**2*f2 - 3*f1**4) / (self.S_opt**3)
910
+ d3h1_dz3 = (h4 - 4*h1*h3 + 6*h1**2*h2 - 3*h1**4) / (self.S_opt**3)
911
+
912
+ # Calculate h_zj and its derivatives (simplified approach)
913
+ denominator_squared = f1_safe**2 - h1**2
914
+ denominator_squared = np.maximum(denominator_squared, eps)
915
+ denominator = np.sqrt(denominator_squared)
916
+
917
+ h_zj = h1 / denominator
918
+
919
+ # For third derivative, use numerical differentiation as analytical form is extremely complex
920
+ h = 1e-6 * np.std(self.data) if np.std(self.data) > 0 else 1e-6
921
+
922
+ # Store original values
923
+ original_zi = self.zi.copy()
924
+ original_fi = self.fj.copy()
925
+ original_hi = self.hj.copy()
926
+
927
+ try:
928
+ # Calculate second derivative at nearby points
929
+ second_derivs = []
930
+ points = [-h, 0, h]
931
+
932
+ for delta in points:
933
+ self.zi = original_zi + delta
934
+ self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
935
+ second_deriv = self._get_qgdf_second_derivative()
936
+ second_derivs.append(second_deriv)
937
+
938
+ # Use finite difference formula for third derivative
939
+ # f'''(x) ≈ [f''(x+h) - f''(x-h)] / (2h)
940
+ third_derivative = (second_derivs[2] - second_derivs[0]) / (2 * h)
941
+
942
+ return third_derivative.flatten()
943
+
944
+ finally:
945
+ # Always restore original state
946
+ self.zi = original_zi
947
+ self.fj = original_fi
948
+ self.hj = original_hi
949
+
950
+ def _get_qgdf_fourth_derivative(self):
951
+ """Calculate fourth derivative of QGDF using corrected numerical differentiation."""
952
+ self.logger.info("Calculating fourth derivative of QGDF.")
953
+ if self.fj is None or self.hj is None:
954
+ self.logger.error("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
955
+ raise ValueError("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
956
+
957
+ # Use adaptive step size based on data scale
958
+ data_scale = np.std(self.data) if np.std(self.data) > 0 else 1.0
959
+ h = max(1e-6 * data_scale, 1e-10)
960
+
961
+ # Store original state
962
+ original_fi = self.fj.copy()
963
+ original_hi = self.hj.copy()
964
+ original_zi = self.zi.copy()
965
+
966
+ try:
967
+ # Use 5-point stencil for better accuracy
968
+ # f''''(x) ≈ [f'''(x-2h) - 8f'''(x-h) + 8f'''(x+h) - f'''(x+2h)] / (12h)
969
+ points = [-2*h, -h, 0, h, 2*h]
970
+ third_derivatives = []
971
+
972
+ for delta in points:
973
+ self.zi = original_zi + delta
974
+ self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
975
+ third_deriv = self._get_qgdf_third_derivative()
976
+ third_derivatives.append(third_deriv)
977
+
978
+ # Apply 5-point finite difference formula
979
+ fourth_derivative = (third_derivatives[0] - 8*third_derivatives[1] +
980
+ 8*third_derivatives[3] - third_derivatives[4]) / (12*h)
981
+
982
+ # REMOVED THE INCORRECT MULTIPLICATION BY self.zi
983
+ # The original code incorrectly multiplied by self.zi
984
+
985
+ return fourth_derivative.flatten()
986
+
987
+ finally:
988
+ # Always restore original state
989
+ self.fj = original_fi
990
+ self.hj = original_hi
991
+ self.zi = original_zi
992
+
993
+ def _calculate_fidelities_irrelevances_at_given_zi_corrected(self, zi):
994
+ """Helper method to recalculate fidelities and irrelevances for current zi."""
995
+ self.logger.info("Calculating fidelities and irrelevances at given zi.")
996
+ # FIXED: Convert the data points to infinite domain, not the evaluation points
997
+ zi_data = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt) # Data points
998
+ zi_eval = DataConversion._convert_fininf(zi, self.LB_opt, self.UB_opt) # Evaluation points
999
+
1000
+ # Calculate R matrix with proper dimensions
1001
+ eps = np.finfo(float).eps
1002
+ R = zi_eval.reshape(-1, 1) / (zi_data.reshape(1, -1) + eps)
1003
+
1004
+ # Get characteristics
1005
+ gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
1006
+ q, q1 = gc._get_q_q1(S=self.S_opt)
1007
+
1008
+ # Store fidelities and irrelevances
1009
+ self.fj = gc._fj(q=q, q1=q1)
1010
+ self.hj = gc._hj(q=q, q1=q1)
1011
+
1012
+
1013
+ def _fit_qgdf(self, plot: bool = False):
1014
+ """Fit the QGDF to the data."""
1015
+ self.logger.info("Starting QGDF fitting process.")
1016
+ try:
1017
+
1018
+ # Step 1: Data preprocessing
1019
+ self.logger.info("Preprocessing data for QGDF fitting.")
1020
+ self.data = np.sort(self.data)
1021
+ self._estimate_data_bounds()
1022
+ self._transform_data_to_standard_domain()
1023
+ self._estimate_weights()
1024
+
1025
+ # Step 2: Bounds estimation
1026
+ self.logger.info("Estimating initial probable bounds.")
1027
+ self._estimate_initial_probable_bounds()
1028
+ self._generate_evaluation_points()
1029
+
1030
+ # Step 3: Get distribution function values for optimization
1031
+ self.logger.info("Getting distribution function values for optimization.")
1032
+ self.df_values = self._get_distribution_function_values(use_wedf=self.wedf)
1033
+
1034
+ # Step 4: Parameter optimization
1035
+ self.logger.info("Optimizing QGDF parameters.")
1036
+ self._determine_optimization_strategy(egdf=False) # NOTE for QGDF egdf is False
1037
+
1038
+ # Step 5: Calculate final QGDF and PDF
1039
+ self.logger.info("Calculating final QGDF and PDF with optimized parameters.")
1040
+ self._calculate_final_results()
1041
+
1042
+ # Step 6: Generate smooth curves for plotting and analysis
1043
+ self.logger.info("Generating smooth curves for QGDF and PDF.")
1044
+ self._generate_smooth_curves()
1045
+
1046
+ # Step 7: Transform bounds back to original domain
1047
+ self.logger.info("Transforming bounds back to original domain.")
1048
+ self._transform_bounds_to_original_domain()
1049
+ # Mark as fitted (Step 8 is now optional via marginal_analysis())
1050
+ self._fitted = True
1051
+
1052
+ # Step 8: Z0 estimate with Z0Estimator
1053
+ self.logger.info("Estimating Z0 point with Z0Estimator.")
1054
+ self._compute_z0(optimize=self.z0_optimize)
1055
+ # derivatives calculation
1056
+ # self._calculate_all_derivatives()
1057
+
1058
+ self.logger.info("QGDF fitting completed successfully.")
1059
+
1060
+ if plot:
1061
+ self.logger.info("Plotting QGDF and PDF.")
1062
+ self._plot()
1063
+
1064
+ # clean up computation cache
1065
+ if self.flush:
1066
+ self.logger.info("Cleaning up computation cache.")
1067
+ self._cleanup_computation_cache()
1068
+
1069
+ except Exception as e:
1070
+ error_msg = f"QGDF fitting failed: {e}"
1071
+ self.logger.error(error_msg)
1072
+ self.params['errors'].append({
1073
+ 'method': '_fit_QGDF',
1074
+ 'error': error_msg,
1075
+ 'exception_type': type(e).__name__
1076
+ })
1077
+
1078
+ self.logger.error(f"Error during QGDF fitting: {e}")
1079
+ raise e
1080
+
1081
+ # z0 compute
1082
+ def _compute_z0(self, optimize: bool = None):
1083
+ """
1084
+ Compute the Z0 point where PDF is maximum using the Z0Estimator class.
1085
+
1086
+ Parameters:
1087
+ -----------
1088
+ optimize : bool, optional
1089
+ If True, use interpolation-based methods for higher accuracy.
1090
+ If False, use simple linear search on existing points.
1091
+ If None, uses the instance's z0_optimize setting.
1092
+ """
1093
+ self.logger.info("Computing Z0 point using Z0Estimator.")
1094
+
1095
+ if self.z is None:
1096
+ self.logger.error("Data must be transformed (self.z) before Z0 estimation.")
1097
+ raise ValueError("Data must be transformed (self.z) before Z0 estimation.")
1098
+
1099
+ # Use provided optimize parameter or fall back to instance setting
1100
+ use_optimize = optimize if optimize is not None else self.z0_optimize
1101
+
1102
+ self.logger.info('QGDF: Computing Z0 point using Z0Estimator...')
1103
+
1104
+ try:
1105
+ # Create Z0Estimator instance with proper constructor signature
1106
+ z0_estimator = Z0Estimator(
1107
+ gdf_object=self, # Pass the QGDF object itself
1108
+ optimize=use_optimize,
1109
+ verbose=self.verbose
1110
+ )
1111
+
1112
+ # Call fit() method to estimate Z0
1113
+ self.z0 = z0_estimator.fit()
1114
+
1115
+ # Get estimation info for debugging and storage
1116
+ if self.catch:
1117
+ estimation_info = z0_estimator.get_estimation_info()
1118
+ self.params.update({
1119
+ 'z0': float(self.z0) if self.z0 is not None else None,
1120
+ 'z0_method': estimation_info.get('z0_method', 'unknown'),
1121
+ 'z0_estimation_info': estimation_info
1122
+ })
1123
+
1124
+ method_used = z0_estimator.get_estimation_info().get('z0_method', 'unknown')
1125
+ self.logger.info(f'QGDF: Z0 point computed successfully, (method: {method_used})')
1126
+
1127
+ except Exception as e:
1128
+ # Log the error
1129
+ error_msg = f"Z0 estimation failed: {str(e)}"
1130
+ self.params['errors'].append({
1131
+ 'method': '_compute_z0',
1132
+ 'error': error_msg,
1133
+ 'exception_type': type(e).__name__
1134
+ })
1135
+
1136
+ self.logger.warning(f"Warning: Z0Estimator failed with error: {e}")
1137
+ self.logger.info("Falling back to simple maximum finding...")
1138
+
1139
+ # Fallback to simple maximum finding
1140
+ self._compute_z0_fallback()
1141
+
1142
+ if self.catch:
1143
+ self.params.update({
1144
+ 'z0': float(self.z0),
1145
+ 'z0_method': 'fallback_simple_maximum',
1146
+ 'z0_estimation_info': {'error': str(e)}
1147
+ })
1148
+
1149
+ def _compute_z0_fallback(self):
1150
+ """
1151
+ Fallback method for Z0 computation using simple maximum finding.
1152
+ """
1153
+ if not hasattr(self, 'di_points_n') or not hasattr(self, 'pdf_points'):
1154
+ self.logger.error("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
1155
+ raise ValueError("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
1156
+
1157
+ self.logger.info('Using fallback method for Z0 point...')
1158
+
1159
+ # Find index with maximum PDF
1160
+ max_idx = np.argmax(self.pdf_points)
1161
+ self.z0 = self.di_points_n[max_idx]
1162
+
1163
+ self.logger.info(f"Z0 point (fallback method).")
1164
+
1165
+ def analyze_z0(self, figsize: tuple = (12, 6)) -> Dict[str, Any]:
1166
+ """
1167
+ Analyze and visualize Z0 estimation results.
1168
+
1169
+ Parameters:
1170
+ -----------
1171
+ figsize : tuple
1172
+ Figure size for the plot
1173
+
1174
+ Returns:
1175
+ --------
1176
+ Dict[str, Any]
1177
+ Z0 analysis information
1178
+ """
1179
+ self.logger.info("Analyzing Z0 estimation results.")
1180
+ if not hasattr(self, 'z0') or self.z0 is None:
1181
+ self.logger.error("Z0 must be computed before analysis. Call fit() first.")
1182
+ raise ValueError("Z0 must be computed before analysis. Call fit() first.")
1183
+
1184
+ # Create Z0Estimator for analysis
1185
+ z0_estimator = Z0Estimator(
1186
+ gdf_object=self,
1187
+ optimize=self.z0_optimize,
1188
+ verbose=self.verbose
1189
+ )
1190
+
1191
+ # Re-estimate for analysis (this is safe since it's already computed)
1192
+ z0_estimator.fit()
1193
+
1194
+ # Get detailed info
1195
+ analysis_info = z0_estimator.get_estimation_info()
1196
+
1197
+ # Create visualization
1198
+ z0_estimator.plot_z0_analysis(figsize=figsize)
1199
+
1200
+ return analysis_info
1201
+
1202
+ def _calculate_all_derivatives(self):
1203
+ """Calculate all derivatives and store in params."""
1204
+ self.logger.info("Calculating all QGDF derivatives.")
1205
+ if not self._fitted:
1206
+ self.logger.error("Must fit QGDF before calculating derivatives.")
1207
+ raise RuntimeError("Must fit QGDF before calculating derivatives.")
1208
+
1209
+ try:
1210
+ # Calculate derivatives using analytical methods
1211
+ second_deriv = self._get_qgdf_second_derivative()
1212
+ third_deriv = self._get_qgdf_third_derivative()
1213
+ fourth_deriv = self._get_qgdf_fourth_derivative()
1214
+
1215
+ # Store in params
1216
+ if self.catch:
1217
+ self.params.update({
1218
+ 'second_derivative': second_deriv.copy(),
1219
+ 'third_derivative': third_deriv.copy(),
1220
+ 'fourth_derivative': fourth_deriv.copy()
1221
+ })
1222
+
1223
+ self.logger.info("QGDF derivatives calculated and stored successfully.")
1224
+
1225
+ except Exception as e:
1226
+ # Log error
1227
+ error_msg = f"Derivative calculation failed: {e}"
1228
+ self.logger.error(error_msg)
1229
+ self.params['errors'].append({
1230
+ 'method': '_calculate_all_derivatives',
1231
+ 'error': error_msg,
1232
+ 'exception_type': type(e).__name__
1233
+ })
1234
+ self.logger.warning(f"Could not calculate derivatives: {e}")