machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,823 @@
1
+ """
2
+ base class for EGDF
3
+ EGDF - Estimating Global Distribution Function.
4
+
5
+ Author: Nirmal Parmar
6
+ Machine Gnostics
7
+ """
8
+
9
+ import numpy as np
10
+ import warnings
11
+ import logging
12
+ from machinegnostics.magcal.util.logging import get_logger
13
+ from typing import Dict, Any
14
+ from scipy.optimize import minimize
15
+ from machinegnostics.magcal.characteristics import GnosticsCharacteristics
16
+ from machinegnostics.magcal.data_conversion import DataConversion
17
+ from machinegnostics.magcal.gdf.base_distfunc import BaseDistFuncCompute
18
+ from machinegnostics.magcal.gdf.z0_estimator import Z0Estimator
19
+
20
+ class BaseEGDF(BaseDistFuncCompute):
21
+ """
22
+ Base class for EGDF (Estimating Global Distribution Function).
23
+
24
+ This class provides a comprehensive framework for estimating global distribution
25
+ functions with optimization capabilities and derivative analysis.
26
+ """
27
+
28
+ def __init__(self,
29
+ data: np.ndarray,
30
+ DLB: float = None,
31
+ DUB: float = None,
32
+ LB: float = None,
33
+ UB: float = None,
34
+ S = 'auto',
35
+ z0_optimize: bool = True,
36
+ tolerance: float = 1e-3,
37
+ data_form: str = 'a',
38
+ n_points: int = 500,
39
+ homogeneous: bool = True,
40
+ catch: bool = True,
41
+ weights: np.ndarray = None,
42
+ wedf: bool = True,
43
+ opt_method: str = 'L-BFGS-B',
44
+ verbose: bool = False,
45
+ max_data_size: int = 1000,
46
+ flush: bool = True):
47
+ super().__init__(data=data,
48
+ DLB=DLB,
49
+ DUB=DUB,
50
+ LB=LB,
51
+ UB=UB,
52
+ S=S,
53
+ z0_optimize=z0_optimize,
54
+ varS=False, # NOTE for EGDfF varS is always False
55
+ tolerance=tolerance,
56
+ data_form=data_form,
57
+ n_points=n_points,
58
+ homogeneous=homogeneous,
59
+ catch=catch,
60
+ weights=weights,
61
+ wedf=wedf,
62
+ opt_method=opt_method,
63
+ verbose=verbose,
64
+ max_data_size=max_data_size,
65
+ flush=flush)
66
+
67
+ # Store raw inputs
68
+ self.data = data
69
+ self.DLB = DLB
70
+ self.DUB = DUB
71
+ self.LB = LB
72
+ self.UB = UB
73
+ self.S = S
74
+ self.z0_optimize = z0_optimize
75
+
76
+ self.tolerance = tolerance
77
+ self.data_form = data_form
78
+ self.n_points = n_points
79
+ self.homogeneous = homogeneous
80
+ self.catch = catch
81
+ self.weights = weights if weights is not None else np.ones_like(data)
82
+ self.wedf = wedf
83
+ self.opt_method = opt_method
84
+ self.verbose = verbose
85
+ self.max_data_size = max_data_size
86
+ self.flush = flush
87
+
88
+ # Initialize state variables
89
+ self.params = {}
90
+ self._fitted = False
91
+ self._derivatives_calculated = False
92
+ self._marginal_analysis_done = False
93
+
94
+ # Initialize computation cache
95
+ self._computation_cache = {
96
+ 'data_converter': None,
97
+ 'characteristics_computer': None,
98
+ 'weights_normalized': None,
99
+ 'smooth_curves_generated': False
100
+ }
101
+
102
+ # Store initial parameters if catching
103
+ if self.catch:
104
+ self._store_initial_params()
105
+
106
+ # Validate all inputs
107
+ self._validate_inputs()
108
+
109
+ # logger
110
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
111
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
112
+
113
+
114
+ def _compute_egdf_core(self, S, LB, UB, zi_data=None, zi_eval=None):
115
+ """Core EGDF computation with caching."""
116
+ # self.logger.info("Starting core EGDF computation.")
117
+ # Use provided data or default to instance data
118
+ if zi_data is None:
119
+ zi_data = self.z
120
+ if zi_eval is None:
121
+ zi_eval = zi_data
122
+
123
+ # Convert to infinite domain
124
+ zi_n = DataConversion._convert_fininf(zi_eval, LB, UB)
125
+ zi_d = DataConversion._convert_fininf(zi_data, LB, UB)
126
+
127
+ # Calculate R matrix with numerical stability
128
+ R = zi_n.reshape(-1, 1) / (zi_d.reshape(1, -1) + self._NUMERICAL_EPS)
129
+
130
+ # Get characteristics
131
+ gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
132
+ q, q1 = gc._get_q_q1(S=S)
133
+
134
+ # Calculate fidelities and irrelevances
135
+ fi = gc._fi(q=q, q1=q1)
136
+ hi = gc._hi(q=q, q1=q1)
137
+
138
+ # Estimate EGDF
139
+ return self._estimate_egdf_from_moments(fi, hi), fi, hi
140
+
141
+ def _estimate_egdf_from_moments(self, fidelities, irrelevances):
142
+ """Estimate EGDF from fidelities and irrelevances."""
143
+ # self.logger.info("Estimating EGDF from moments.")
144
+ weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
145
+
146
+ mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
147
+ mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
148
+
149
+ M_zi = np.sqrt(mean_fidelity**2 + mean_irrelevance**2)
150
+ M_zi = np.where(M_zi == 0, self._NUMERICAL_EPS, M_zi)
151
+
152
+ egdf_values = (1 - mean_irrelevance / M_zi) / 2
153
+ egdf_values = np.maximum.accumulate(egdf_values)
154
+ egdf_values = np.clip(egdf_values, 0, 1)
155
+
156
+ return egdf_values.flatten()
157
+
158
+ # NOTE: PDF calculation as mentioned in a new book
159
+ # def _calculate_pdf_from_moments(self, fidelities, irrelevances):
160
+ # """Calculate PDF from fidelities and irrelevances."""
161
+ # weights = self._computation_cache['weights_normalized'].reshape(-1, 1)
162
+
163
+ # mean_fidelity = np.sum(weights * fidelities, axis=0) / np.sum(weights)
164
+ # mean_irrelevance = np.sum(weights * irrelevances, axis=0) / np.sum(weights)
165
+
166
+ # F2 = np.sum(weights * fidelities**2, axis=0) / np.sum(weights)
167
+ # FH = np.sum(weights * fidelities * irrelevances, axis=0) / np.sum(weights)
168
+
169
+ # M_zi = np.sqrt(mean_fidelity**2 + mean_irrelevance**2)
170
+ # M_zi = np.where(M_zi == 0, self._NUMERICAL_EPS, M_zi)
171
+ # M_zi_cubed = M_zi**3
172
+
173
+ # numerator = (mean_fidelity**2) * F2 + mean_fidelity * mean_irrelevance * FH
174
+ # S_value = self.S_opt if hasattr(self, 'S_opt') else 1.0
175
+ # density = (1 / S_value) * (numerator / M_zi_cubed)
176
+
177
+ # if np.any(density < 0):
178
+ # warnings.warn("PDF contains negative values, indicating potential non-homogeneous data", RuntimeWarning)
179
+ # return density.flatten()
180
+
181
+ def _calculate_pdf_from_moments(self, fidelities, irrelevances): # PDF
182
+ """Calculate first derivative of EGDF (which is the PDF) from stored fidelities and irrelevances."""
183
+ self.logger.info("Calculating PDF from moments.")
184
+ if fidelities is None or irrelevances is None:
185
+ raise ValueError("Fidelities and irrelevances must be calculated before first derivative estimation.")
186
+
187
+ weights = self.weights.reshape(-1, 1)
188
+
189
+ # First order moments
190
+ f1 = np.sum(weights * fidelities, axis=0) / np.sum(weights) # mean_fidelity
191
+ h1 = np.sum(weights * irrelevances, axis=0) / np.sum(weights) # mean_irrelevance
192
+
193
+ # Second order moments (scaled by S as in MATLAB)
194
+ f2s = np.sum(weights * (fidelities**2 / self.S_opt), axis=0) / np.sum(weights)
195
+ fhs = np.sum(weights * (fidelities * irrelevances / self.S_opt), axis=0) / np.sum(weights)
196
+
197
+ # Calculate denominator w = (f1^2 + h1^2)^(3/2)
198
+ w = (f1**2 + h1**2)**(3/2)
199
+ eps = np.finfo(float).eps
200
+ w = np.where(w == 0, eps, w)
201
+
202
+ # First derivative formula from MATLAB: y = (f1^2 * f2s + f1 * h1 * fhs) / w
203
+ numerator = f1**2 * f2s + f1 * h1 * fhs
204
+ first_derivative = numerator / w
205
+ # first_derivative = first_derivative / self.zi
206
+
207
+ # if np.any(first_derivative < 0):
208
+ # warnings.warn("EGDF first derivative (PDF) contains negative values, indicating potential non-homogeneous data", RuntimeWarning)
209
+ return first_derivative.flatten()
210
+
211
+
212
+ def _calculate_final_results(self):
213
+ """Calculate final EGDF and PDF with optimized parameters."""
214
+ self.logger.info("Calculating final EGDF and PDF with optimized parameters.")
215
+ # Convert to infinite domain
216
+ # zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
217
+ zi_d = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
218
+ self.zi = zi_d
219
+
220
+ # Calculate EGDF and get moments
221
+ egdf_values, fi, hi = self._compute_egdf_core(self.S_opt, self.LB_opt, self.UB_opt)
222
+
223
+ # Store for derivative calculations
224
+ self.fi = fi
225
+ self.hi = hi
226
+ self.egdf = egdf_values
227
+ self.pdf = self._calculate_pdf_from_moments(fi, hi)
228
+
229
+ if self.catch:
230
+ self.logger.info("Catching parameters for later use.")
231
+ self.params.update({
232
+ 'egdf': self.egdf.copy(),
233
+ 'pdf': self.pdf.copy(),
234
+ 'zi': self.zi.copy()
235
+ })
236
+
237
+ def _generate_smooth_curves(self):
238
+ """Generate smooth curves for plotting and analysis."""
239
+ self.logger.info("Generating smooth curves for EGDF and PDF.")
240
+ try:
241
+ # Generate smooth EGDF and PDF
242
+ smooth_egdf, self.smooth_fi, self.smooth_hi = self._compute_egdf_core(
243
+ self.S_opt, self.LB_opt, self.UB_opt,
244
+ zi_data=self.z_points_n, zi_eval=self.z
245
+ )
246
+
247
+ smooth_pdf = self._calculate_pdf_from_moments(self.smooth_fi, self.smooth_hi)
248
+
249
+ self.egdf_points = smooth_egdf
250
+ self.pdf_points = smooth_pdf
251
+
252
+ # Store zi_n for derivative calculations
253
+ self.zi_n = DataConversion._convert_fininf(self.z_points_n, self.LB_opt, self.UB_opt)
254
+
255
+ # Mark as generated
256
+ self._computation_cache['smooth_curves_generated'] = True
257
+
258
+ if self.catch:
259
+ self.logger.info("Catching parameters for later use.")
260
+ self.params.update({
261
+ 'egdf_points': self.egdf_points.copy(),
262
+ 'pdf_points': self.pdf_points.copy(),
263
+ 'zi_points': self.zi_n.copy()
264
+ })
265
+
266
+ self.logger.info(f"Generated smooth curves with {self.n_points} points.")
267
+
268
+ except Exception as e:
269
+ # Log the error
270
+ error_msg = f"Could not generate smooth curves: {e}"
271
+ self.logger.error(error_msg)
272
+ self.params['errors'].append({
273
+ 'method': '_generate_smooth_curves',
274
+ 'error': error_msg,
275
+ 'exception_type': type(e).__name__
276
+ })
277
+ self.logger.warning(f"Could not generate smooth curves: {e}")
278
+ # Create fallback points using original data
279
+ self.egdf_points = self.egdf.copy() if hasattr(self, 'egdf') else None
280
+ self.pdf_points = self.pdf.copy() if hasattr(self, 'pdf') else None
281
+ self._computation_cache['smooth_curves_generated'] = False
282
+
283
+
284
+ def _plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
285
+ """Enhanced plotting with better organization."""
286
+ self.logger.info("Starting plot generation.")
287
+
288
+ import matplotlib.pyplot as plt
289
+
290
+ if plot_smooth and (len(self.data) > self.max_data_size) and self.verbose:
291
+ self.logger.warning(f"Given data size ({len(self.data)}) exceeds max_data_size ({self.max_data_size}). For optimal compute performance, set 'plot_smooth=False', or 'max_data_size' to a larger value whichever is appropriate.")
292
+
293
+ if not self.catch:
294
+ self.logger.warning("Plot is not available with argument catch=False")
295
+ return
296
+
297
+ if not self._fitted:
298
+ self.logger.error("Must fit EGDF before plotting.")
299
+ raise RuntimeError("Must fit EGDF before plotting.")
300
+
301
+ # Validate plot parameter
302
+ if plot not in ['gdf', 'pdf', 'both']:
303
+ self.logger.error("Invalid plot parameter. Must be 'gdf', 'pdf', or 'both'.")
304
+ raise ValueError("plot parameter must be 'gdf', 'pdf', or 'both'")
305
+
306
+ # Check data availability
307
+ if plot in ['gdf', 'both'] and self.params.get('egdf') is None:
308
+ self.logger.error("EGDF must be calculated before plotting GDF")
309
+ raise ValueError("EGDF must be calculated before plotting GDF")
310
+ if plot in ['pdf', 'both'] and self.params.get('pdf') is None:
311
+ self.logger.error("PDF must be calculated before plotting PDF")
312
+ raise ValueError("PDF must be calculated before plotting PDF")
313
+
314
+ # Prepare data
315
+ self.logger.info("Preparing data for plotting.")
316
+ x_points = self.data
317
+ egdf_plot = self.params.get('egdf')
318
+ pdf_plot = self.params.get('pdf')
319
+ wedf = self.params.get('wedf')
320
+ ksdf = self.params.get('ksdf')
321
+
322
+ # Check smooth plotting availability
323
+ has_smooth = (hasattr(self, 'di_points_n') and hasattr(self, 'egdf_points')
324
+ and hasattr(self, 'pdf_points') and self.di_points_n is not None
325
+ and self.egdf_points is not None and self.pdf_points is not None)
326
+ plot_smooth = plot_smooth and has_smooth
327
+
328
+ # Create figure
329
+ fig, ax1 = plt.subplots(figsize=figsize)
330
+
331
+ # Plot EGDF if requested
332
+ if plot in ['gdf', 'both']:
333
+ self._plot_egdf(ax1, x_points, egdf_plot, plot_smooth, extra_df, wedf, ksdf)
334
+
335
+ # Plot PDF if requested
336
+ if plot in ['pdf', 'both']:
337
+ if plot == 'pdf':
338
+ self._plot_pdf(ax1, x_points, pdf_plot, plot_smooth, is_secondary=False)
339
+ else:
340
+ ax2 = ax1.twinx()
341
+ self._plot_pdf(ax2, x_points, pdf_plot, plot_smooth, is_secondary=True)
342
+
343
+ # Add bounds and formatting
344
+ self._add_plot_formatting(ax1, plot, bounds)
345
+
346
+ # Add Z0 vertical line if available
347
+ if hasattr(self, 'z0') and self.z0 is not None:
348
+ ax1.axvline(x=self.z0, color='magenta', linestyle='-.', linewidth=1,
349
+ alpha=0.8, label=f'Z0={self.z0:.3f}')
350
+ # Update legend to include Z0
351
+ ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
352
+
353
+ plt.tight_layout()
354
+ plt.show()
355
+
356
+ def _plot_egdf(self, ax, x_points, egdf_plot, plot_smooth, extra_df, wedf, ksdf):
357
+ """Plot EGDF components."""
358
+ self.logger.info("Plotting EGDF.")
359
+ if plot_smooth and hasattr(self, 'egdf_points') and self.egdf_points is not None:
360
+ ax.plot(x_points, egdf_plot, 'o', color='blue', label='EGDF', markersize=4)
361
+ ax.plot(self.di_points_n, self.egdf_points, color='blue',
362
+ linestyle='-', linewidth=2, alpha=0.8)
363
+ else:
364
+ ax.plot(x_points, egdf_plot, 'o-', color='blue', label='EGDF',
365
+ markersize=4, linewidth=1, alpha=0.8)
366
+
367
+ if extra_df:
368
+ if wedf is not None:
369
+ ax.plot(x_points, wedf, 's', color='lightblue',
370
+ label='WEDF', markersize=3, alpha=0.8)
371
+ if ksdf is not None:
372
+ ax.plot(x_points, ksdf, 's', color='cyan',
373
+ label='KS Points', markersize=3, alpha=0.8)
374
+
375
+ ax.set_ylabel('EGDF', color='blue')
376
+ ax.tick_params(axis='y', labelcolor='blue')
377
+ ax.set_ylim(0, 1)
378
+
379
+ def _plot_pdf(self, ax, x_points, pdf_plot, plot_smooth, is_secondary=False):
380
+ """Plot PDF components."""
381
+ self.logger.info("Plotting PDF.")
382
+ color = 'red'
383
+ if plot_smooth and hasattr(self, 'pdf_points') and self.pdf_points is not None:
384
+ ax.plot(x_points, pdf_plot, 'o', color=color, label='PDF', markersize=4)
385
+ ax.plot(self.di_points_n, self.pdf_points, color=color,
386
+ linestyle='-', linewidth=2, alpha=0.8)
387
+ max_pdf = np.max(self.pdf_points)
388
+ else:
389
+ ax.plot(x_points, pdf_plot, 'o-', color=color, label='PDF',
390
+ markersize=4, linewidth=1, alpha=0.8)
391
+ max_pdf = np.max(pdf_plot)
392
+
393
+ ax.set_ylabel('PDF', color=color)
394
+ ax.tick_params(axis='y', labelcolor=color)
395
+ ax.set_ylim(0, max_pdf * 1.1)
396
+
397
+ if is_secondary:
398
+ ax.legend(loc='upper right', bbox_to_anchor=(1, 1))
399
+
400
+ def _add_plot_formatting(self, ax1, plot, bounds):
401
+ """Add formatting, bounds, and legends to plot."""
402
+ ax1.set_xlabel('Data Points')
403
+
404
+ # Add bounds if requested
405
+ if bounds:
406
+ bound_info = [
407
+ (self.params.get('DLB'), 'green', '-', 'DLB'),
408
+ (self.params.get('DUB'), 'orange', '-', 'DUB'),
409
+ (self.params.get('LB'), 'purple', '--', 'LB'),
410
+ (self.params.get('UB'), 'brown', '--', 'UB')
411
+ ]
412
+
413
+ for bound, color, style, name in bound_info:
414
+ if bound is not None:
415
+ ax1.axvline(x=bound, color=color, linestyle=style, linewidth=2,
416
+ alpha=0.8, label=f"{name}={bound:.3f}")
417
+
418
+ # Add shaded regions
419
+ if self.params.get('LB') is not None:
420
+ ax1.axvspan(self.data.min(), self.params['LB'], alpha=0.15, color='purple')
421
+ if self.params.get('UB') is not None:
422
+ ax1.axvspan(self.params['UB'], self.data.max(), alpha=0.15, color='brown')
423
+
424
+ # Set limits and add grid
425
+ data_range = self.params['DUB'] - self.params['DLB']
426
+ padding = data_range * 0.1
427
+ ax1.set_xlim(self.params['DLB'] - padding, self.params['DUB'] + padding)
428
+
429
+ # Set title
430
+ titles = {
431
+ 'gdf': 'EGDF' + (' with Bounds' if bounds else ''),
432
+ 'pdf': 'PDF' + (' with Bounds' if bounds else ''),
433
+ 'both': 'EGDF and PDF' + (' with Bounds' if bounds else '')
434
+ }
435
+
436
+ ax1.set_title(titles[plot])
437
+ ax1.legend(loc='upper left', bbox_to_anchor=(0, 1))
438
+ ax1.grid(True, alpha=0.3)
439
+
440
+
441
+
442
+ # =============================================================================
443
+ # Derivative
444
+ # =============================================================================
445
+ def _get_egdf_second_derivative(self):
446
+ """Calculate second derivative of EGDF from stored fidelities and irrelevances."""
447
+ self.logger.info("Calculating second derivative of EGDF.")
448
+ if self.fi is None or self.hi is None:
449
+ self.logger.error("Fidelities and irrelevances must be calculated before second derivative estimation.")
450
+ raise ValueError("Fidelities and irrelevances must be calculated before second derivative estimation.")
451
+
452
+ weights = self.weights.reshape(-1, 1)
453
+
454
+ # Moment calculations
455
+ f1 = np.sum(weights * self.fi, axis=0) / np.sum(weights)
456
+ h1 = np.sum(weights * self.hi, axis=0) / np.sum(weights)
457
+ f2 = np.sum(weights * self.fi**2, axis=0) / np.sum(weights)
458
+ f3 = np.sum(weights * self.fi**3, axis=0) / np.sum(weights)
459
+ fh = np.sum(weights * self.fi * self.hi, axis=0) / np.sum(weights)
460
+ fh2 = np.sum(weights * self.fi * self.hi**2, axis=0) / np.sum(weights)
461
+ f2h = np.sum(weights * self.fi**2 * self.hi, axis=0) / np.sum(weights)
462
+
463
+ # Calculate components
464
+ b = f1**2 * f2 + f1 * h1 * fh
465
+ d = f1**2 + h1**2
466
+ eps = np.finfo(float).eps
467
+ d = np.where(d == 0, eps, d)
468
+
469
+ # Following
470
+ term1 = f1 * (h1 * (f3 - fh2) - f2 * fh)
471
+ term2 = 2 * f1**2 * f2h + h1 * fh**2
472
+ term3 = (6 * b * (f1 * fh - h1 * f2)) / d
473
+
474
+ d2 = -1 / (d**(1.5)) * (2 * (term1 - term2) + term3)
475
+ second_derivative = d2 / (self.S_opt**2)
476
+ # second_derivative = second_derivative / self.zi**2
477
+ self.logger.info("Second derivative calculation completed.")
478
+ return second_derivative.flatten()
479
+
480
+ def _get_egdf_third_derivative(self):
481
+ """Calculate third derivative of EGDF from stored fidelities and irrelevances."""
482
+ self.logger.info("Calculating third derivative of EGDF.")
483
+ if self.fi is None or self.hi is None:
484
+ self.logger.error("Fidelities and irrelevances must be calculated before third derivative estimation.")
485
+ raise ValueError("Fidelities and irrelevances must be calculated before third derivative estimation.")
486
+
487
+ weights = self.weights.reshape(-1, 1)
488
+
489
+ # All required moments
490
+ f1 = np.sum(weights * self.fi, axis=0) / np.sum(weights)
491
+ h1 = np.sum(weights * self.hi, axis=0) / np.sum(weights)
492
+ f2 = np.sum(weights * self.fi**2, axis=0) / np.sum(weights)
493
+ f3 = np.sum(weights * self.fi**3, axis=0) / np.sum(weights)
494
+ f4 = np.sum(weights * self.fi**4, axis=0) / np.sum(weights)
495
+ fh = np.sum(weights * self.fi * self.hi, axis=0) / np.sum(weights)
496
+ h2 = np.sum(weights * self.hi**2, axis=0) / np.sum(weights)
497
+ fh2 = np.sum(weights * self.fi * self.hi**2, axis=0) / np.sum(weights)
498
+ f2h = np.sum(weights * self.fi**2 * self.hi, axis=0) / np.sum(weights)
499
+ f2h2 = np.sum(weights * self.fi**2 * self.hi**2, axis=0) / np.sum(weights)
500
+ f3h = np.sum(weights * self.fi**3 * self.hi, axis=0) / np.sum(weights)
501
+ fh3 = np.sum(weights * self.fi * self.hi**3, axis=0) / np.sum(weights)
502
+
503
+ # Following
504
+ # Derivative calculations
505
+ dh1 = -f2
506
+ df1 = fh
507
+ df2 = 2 * f2h
508
+ dfh = -f3 + fh2
509
+ dfh2 = -2 * f3h + fh3
510
+ df3 = 3 * f3h
511
+ df2h = -f4 + 2 * f2h2
512
+
513
+ # u4 and its derivative
514
+ u4 = h1 * f3 - h1 * fh2 - f2 * fh
515
+ du4 = dh1 * f3 + h1 * df3 - dh1 * fh2 - h1 * dfh2 - df2 * fh - f2 * dfh
516
+
517
+ # u and its derivative
518
+ u = f1 * u4
519
+ du = df1 * u4 + f1 * du4
520
+
521
+ # v components
522
+ v4a = (f1**2) * f2h
523
+ dv4a = 2 * f1 * df1 * f2h + (f1**2) * df2h
524
+ v4b = h1 * fh**2
525
+ dv4b = dh1 * (fh**2) + 2 * h1 * fh * dfh
526
+
527
+ v = 2 * v4a + v4b
528
+ dv = 2 * dv4a + dv4b
529
+
530
+ # x components
531
+ x4a = f1**2 * f2 + f1 * h1 * fh
532
+ dx4a = 2 * f1 * df1 * f2 + (f1**2) * df2 + df1 * h1 * fh + f1 * dh1 * fh + f1 * h1 * dfh
533
+ x4b = f1 * fh - h1 * f2
534
+ dx4b = df1 * fh + f1 * dfh - dh1 * f2 - h1 * df2
535
+
536
+ x = 6 * x4a * x4b
537
+ dx = 6 * (dx4a * x4b + x4a * dx4b)
538
+
539
+ # d components
540
+ d = f1**2 + h1**2
541
+ dd = 2 * (f1 * df1 + h1 * dh1)
542
+ eps = np.finfo(float).eps
543
+ d = np.where(d == 0, eps, d)
544
+
545
+ # Final calculation
546
+ term1 = (du - dv) / (d**1.5) - (1.5 * (u - v)) / (d**2.5) * dd
547
+ term2 = dx / (d**2.5) - (2.5 * x) / (d**3.5) * dd
548
+
549
+ d3p = -2 * term1 - term2
550
+ third_derivative = 2 * d3p / (self.S_opt**3)
551
+ # third_derivative = third_derivative / (self.zi**3)
552
+ self.logger.info("Third derivative calculation completed.")
553
+ return third_derivative.flatten()
554
+
555
+ def _get_egdf_fourth_derivative(self):
556
+ """Calculate fourth derivative of EGDF using numerical differentiation."""
557
+ self.logger.info("Calculating fourth derivative of EGDF using numerical differentiation.")
558
+ if self.fi is None or self.hi is None:
559
+ self.logger.error("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
560
+ raise ValueError("Fidelities and irrelevances must be calculated before fourth derivative estimation.")
561
+
562
+ # For fourth derivative, use numerical differentiation as it's complex
563
+ dz = 1e-7
564
+
565
+ # Get third derivatives at slightly shifted points
566
+ zi_plus = self.zi + dz
567
+ zi_minus = self.zi - dz
568
+
569
+ # Store original zi
570
+ original_zi = self.zi.copy()
571
+
572
+ # Calculate third derivative at zi + dz
573
+ self.zi = zi_plus
574
+ self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
575
+ third_plus = self._get_egdf_third_derivative()
576
+
577
+ # Calculate third derivative at zi - dz
578
+ self.zi = zi_minus
579
+ self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
580
+ third_minus = self._get_egdf_third_derivative()
581
+
582
+ # Restore original zi and recalculate fi, hi
583
+ self.zi = original_zi
584
+ self._calculate_fidelities_irrelevances_at_given_zi(self.zi)
585
+
586
+ # Numerical derivative
587
+ fourth_derivative = (third_plus - third_minus) / (2 * dz) * self.zi
588
+
589
+ self.logger.info("Fourth derivative calculation completed.")
590
+ return fourth_derivative.flatten()
591
+
592
+ def _calculate_fidelities_irrelevances_at_given_zi(self, zi):
593
+ """Helper method to recalculate fidelities and irrelevances for current zi."""
594
+ self.logger.info("Recalculating fidelities and irrelevances for given zi.")
595
+ if self.LB_opt is None or self.UB_opt is None or self.S_opt is None:
596
+ self.logger.error("Optimized parameters LB_opt, UB_opt, and S_opt must be set before recalculating fidelities and irrelevances.")
597
+ # Convert to infinite domain
598
+ zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
599
+ # is zi given then use it, else use self.zi
600
+ if zi is None:
601
+ zi_d = self.zi
602
+ else:
603
+ zi_d = zi
604
+
605
+ # Calculate R matrix
606
+ eps = np.finfo(float).eps
607
+ R = zi_n.reshape(-1, 1) / (zi_d + eps).reshape(1, -1)
608
+
609
+ # Get characteristics
610
+ gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
611
+ q, q1 = gc._get_q_q1(S=self.S_opt)
612
+
613
+ # Store fidelities and irrelevances
614
+ self.fi = gc._fi(q=q, q1=q1)
615
+ self.hi = gc._hi(q=q, q1=q1)
616
+
617
+ def _get_results(self)-> dict:
618
+ """Return fitting results."""
619
+ self.logger.info("Retrieving fitting results.")
620
+ if not self._fitted:
621
+ raise RuntimeError("Must fit EGDF before getting results.")
622
+
623
+ # selected key from params if exists
624
+ keys = ['DLB', 'DUB', 'LB', 'UB', 'S_opt', 'z0', 'egdf', 'pdf',
625
+ 'egdf_points', 'pdf_points', 'zi', 'zi_points', 'weights']
626
+ results = {key: self.params.get(key) for key in keys if key in self.params}
627
+ return results
628
+
629
+ # =============================================================================
630
+ # MAIN FITTING PROCESS
631
+ # =============================================================================
632
+
633
+ def _fit_egdf(self, plot:bool = True):
634
+ """Main fitting process with improved organization."""
635
+ self.logger.info("Starting EGDF fitting process.")
636
+ try:
637
+ # Step 1: Data preprocessing
638
+ self.logger.info("Starting data preprocessing.")
639
+ self.data = np.sort(self.data)
640
+ self._estimate_data_bounds()
641
+ self._transform_data_to_standard_domain()
642
+ self._estimate_weights()
643
+
644
+ # Step 2: Bounds estimation
645
+ self.logger.info("Starting bounds estimation.")
646
+ self._estimate_initial_probable_bounds()
647
+ self._generate_evaluation_points()
648
+
649
+ # Step 3: Get distribution function values for optimization
650
+ self.logger.info("Getting distribution function values for optimization.")
651
+ self.df_values = self._get_distribution_function_values(use_wedf=self.wedf)
652
+
653
+ # Step 4: Parameter optimization
654
+ self.logger.info("Starting parameter optimization.")
655
+ self._determine_optimization_strategy()
656
+
657
+ # Step 5: Calculate final EGDF and PDF
658
+ self.logger.info("Calculating final EGDF and PDF.")
659
+ self._calculate_final_results()
660
+
661
+ # Step 6: Generate smooth curves for plotting and analysis
662
+ self.logger.info("Generating smooth curves for plotting and analysis.")
663
+ self._generate_smooth_curves()
664
+
665
+ # Step 7: Transform bounds back to original domain
666
+ self.logger.info("Transforming bounds back to original domain.")
667
+ self._transform_bounds_to_original_domain()
668
+
669
+ # Mark as fitted (Step 8 is now optional via marginal_analysis())
670
+ self._fitted = True
671
+
672
+ # Compute Z0 point
673
+ self.logger.info("Computing Z0 point.")
674
+ self._compute_z0()
675
+
676
+ self.logger.info("EGDF fitting completed successfully.")
677
+
678
+ if plot:
679
+ self.logger.info("Plotting results.")
680
+ self._plot()
681
+
682
+ # clean up computation cache
683
+ if self.flush:
684
+ self.logger.info("Cleaning up computation cache.")
685
+ self._cleanup_computation_cache()
686
+
687
+ except Exception as e:
688
+ error_msg = f"EGDF fitting failed: {e}"
689
+ self.logger.error(error_msg)
690
+ self.params['errors'].append({
691
+ 'method': '_fit_egdf',
692
+ 'error': error_msg,
693
+ 'exception_type': type(e).__name__
694
+ })
695
+ self.logger.info(f"Error during EGDF fitting: {e}")
696
+ raise e
697
+
698
+ # z0 compute
699
+ def _compute_z0(self, optimize: bool = None):
700
+ """
701
+ Compute the Z0 point where PDF is maximum using the Z0Estimator class.
702
+
703
+ Parameters:
704
+ -----------
705
+ optimize : bool, optional
706
+ If True, use interpolation-based methods for higher accuracy.
707
+ If False, use simple linear search on existing points.
708
+ If None, uses the instance's z0_optimize setting.
709
+ """
710
+ self.logger.info("Starting Z0 computation.")
711
+ if self.z is None:
712
+ self.logger.error("Data must be transformed (self.z) before Z0 estimation.")
713
+ raise ValueError("Data must be transformed (self.z) before Z0 estimation.")
714
+
715
+ # Use provided optimize parameter or fall back to instance setting
716
+ use_optimize = optimize if optimize is not None else self.z0_optimize
717
+
718
+ self.logger.info("EGDF: Computing Z0 point using Z0Estimator...")
719
+
720
+ try:
721
+ # Create Z0Estimator instance with proper constructor signature
722
+ z0_estimator = Z0Estimator(
723
+ gdf_object=self, # Pass the EGDF object itself
724
+ optimize=use_optimize,
725
+ verbose=self.verbose
726
+ )
727
+
728
+ # Call fit() method to estimate Z0
729
+ self.z0 = z0_estimator.fit()
730
+
731
+ # Get estimation info for debugging and storage
732
+ if self.catch:
733
+ estimation_info = z0_estimator.get_estimation_info()
734
+ self.params.update({
735
+ 'z0': float(self.z0) if self.z0 is not None else None,
736
+ 'z0_method': estimation_info.get('z0_method', 'unknown'),
737
+ 'z0_estimation_info': estimation_info
738
+ })
739
+
740
+ method_used = z0_estimator.get_estimation_info().get('z0_method', 'unknown')
741
+ self.logger.info(f'EGDF: Z0 point computed successfully, (method: {method_used})')
742
+
743
+ except Exception as e:
744
+ # Log the error
745
+ error_msg = f"Z0 estimation failed: {str(e)}"
746
+ self.logger.error(error_msg)
747
+ self.params['errors'].append({
748
+ 'method': '_compute_z0',
749
+ 'error': error_msg,
750
+ 'exception_type': type(e).__name__
751
+ })
752
+
753
+ self.logger.warning(f"Warning: Z0Estimator failed with error: {e}")
754
+ self.logger.info("Falling back to simple maximum finding...")
755
+
756
+ # Fallback to simple maximum finding
757
+ self.logger.info("Using fallback method for Z0 computation.")
758
+ self._compute_z0_fallback()
759
+
760
+ if self.catch:
761
+ self.logger.info("Catching fallback Z0 parameters for later use.")
762
+ self.params.update({
763
+ 'z0': float(self.z0),
764
+ 'z0_method': 'fallback_simple_maximum',
765
+ 'z0_estimation_info': {'error': str(e)}
766
+ })
767
+
768
+ def _compute_z0_fallback(self):
769
+ """
770
+ Fallback method for Z0 computation using simple maximum finding.
771
+ """
772
+ self.logger.info("Starting fallback Z0 computation.")
773
+
774
+ if not hasattr(self, 'di_points_n') or not hasattr(self, 'pdf_points'):
775
+ self.logger.error("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
776
+ raise ValueError("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
777
+
778
+ self.logger.info('Using fallback method for Z0 point...')
779
+
780
+ # Find index with maximum PDF
781
+ max_idx = np.argmax(self.pdf_points)
782
+ self.z0 = self.di_points_n[max_idx]
783
+
784
+ self.logger.info(f"Z0 point (fallback method).")
785
+
786
+ def analyze_z0(self, figsize: tuple = (12, 6)) -> Dict[str, Any]:
787
+ """
788
+ Analyze and visualize Z0 estimation results.
789
+
790
+ Parameters:
791
+ -----------
792
+ figsize : tuple
793
+ Figure size for the plot
794
+
795
+ Returns:
796
+ --------
797
+ Dict[str, Any]
798
+ Z0 analysis information
799
+ """
800
+ self.logger.info("Starting Z0 analysis.")
801
+ if not hasattr(self, 'z0') or self.z0 is None:
802
+ self.logger.error("Z0 must be computed before analysis. Call fit() first.")
803
+ raise ValueError("Z0 must be computed before analysis. Call fit() first.")
804
+
805
+ # Create Z0Estimator for analysis
806
+ z0_estimator = Z0Estimator(
807
+ gdf_object=self,
808
+ optimize=self.z0_optimize,
809
+ verbose=self.verbose
810
+ )
811
+
812
+ # Re-estimate for analysis (this is safe since it's already computed)
813
+ z0_estimator.fit()
814
+
815
+ # Get detailed info
816
+ analysis_info = z0_estimator.get_estimation_info()
817
+
818
+ # Create visualization
819
+ z0_estimator.plot_z0_analysis(figsize=figsize)
820
+
821
+ self.logger.info("Z0 analysis completed.")
822
+ return analysis_info
823
+