machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1202 @@
1
+ '''
2
+ Base Compute class for GDF
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ '''
7
+
8
+ import numpy as np
9
+ import warnings
10
+ from typing import Dict, Any, Tuple
11
+ from scipy.optimize import minimize
12
+ import logging
13
+ from machinegnostics.magcal.util.logging import get_logger
14
+ from machinegnostics.magcal.characteristics import GnosticsCharacteristics
15
+ from machinegnostics.magcal.gdf.base_df import BaseDistFunc
16
+ from machinegnostics.magcal.data_conversion import DataConversion
17
+ from machinegnostics.magcal.gdf.wedf import WEDF
18
+ from machinegnostics.magcal.mg_weights import GnosticsWeights
19
+ from machinegnostics.magcal.gdf.z0_estimator import Z0Estimator
20
+ from machinegnostics.magcal.gdf.distfunc_engine import DistFuncEngine
21
+
22
+ class BaseDistFuncCompute(BaseDistFunc):
23
+ '''Base Distribution Function class
24
+ Base class for EGDF (Estimating Global Distribution Function).
25
+
26
+ This class provides a comprehensive framework for estimating global distribution
27
+ functions with optimization capabilities and derivative analysis.
28
+ '''
29
+
30
+ # Class constants for optimization bounds
31
+ _OPTIMIZATION_BOUNDS = {
32
+ 'S_MIN': 0.05, 'S_MAX': 100.0,
33
+ 'LB_MIN': 1e-6, 'LB_MAX': np.exp(-1.000001),
34
+ 'UB_MIN': np.exp(1.000001), 'UB_MAX': 1e6,
35
+ 'Z0_SEARCH_FACTOR': 0.1 # For Z0 search range
36
+ }
37
+
38
+ # Numerical constants
39
+ _NUMERICAL_EPS = np.finfo(float).eps
40
+ _NUMERICAL_MAX = 1e6
41
+ _DERIVATIVE_TOLERANCE = 1e-6
42
+
43
+ def __init__(self,
44
+ data: np.ndarray,
45
+ DLB: float = None,
46
+ DUB: float = None,
47
+ LB: float = None,
48
+ UB: float = None,
49
+ S = 'auto',
50
+ z0_optimize: bool = True,
51
+ varS: bool = False,
52
+ tolerance: float = 1e-3,
53
+ data_form: str = 'a',
54
+ n_points: int = 500,
55
+ homogeneous: bool = True,
56
+ catch: bool = True,
57
+ weights: np.ndarray = None,
58
+ wedf: bool = True,
59
+ opt_method: str = 'L-BFGS-B',
60
+ verbose: bool = False,
61
+ max_data_size: int = 1000,
62
+ flush: bool = True):
63
+ """Initialize the EGDF class with comprehensive validation."""
64
+
65
+ # Store raw inputs
66
+ self.params = {}
67
+ self.params['warnings'] = []
68
+ self.params['errors'] = []
69
+ self.data = data
70
+ self.DLB = DLB
71
+ self.DUB = DUB
72
+ self.LB = LB
73
+ self.UB = UB
74
+ self.S = S
75
+ self.z0_optimize = z0_optimize
76
+ self.varS = varS
77
+ self.tolerance = tolerance
78
+ self.data_form = data_form
79
+ self.n_points = n_points
80
+ self.homogeneous = homogeneous
81
+ self.catch = catch
82
+ self.weights = weights if weights is not None else np.ones_like(data)
83
+ self.wedf = wedf
84
+ self.opt_method = opt_method
85
+ self.verbose = verbose
86
+ self.max_data_size = max_data_size
87
+ self.flush = flush
88
+
89
+ # Initialize state variables
90
+ self._fitted = False
91
+ self._derivatives_calculated = False
92
+ self._marginal_analysis_done = False
93
+
94
+ # safe for z0 compute
95
+ self.pdf_points = None
96
+
97
+ # Initialize computation cache
98
+ self._computation_cache = {
99
+ 'data_converter': None,
100
+ 'characteristics_computer': None,
101
+ 'weights_normalized': None,
102
+ 'smooth_curves_generated': False
103
+ }
104
+
105
+ # log
106
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
107
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
108
+ # =============================================================================
109
+ # VALIDATION AND INITIALIZATION
110
+ # =============================================================================
111
+
112
+ def _validate_inputs(self):
113
+ """Comprehensive input validation with error and warning logging."""
114
+ try:
115
+ self.logger.info("Validating inputs.")
116
+ # Data validation
117
+ if not isinstance(self.data, np.ndarray):
118
+ error_msg = "Data must be a numpy array."
119
+ self.logger.error(error_msg)
120
+ self.params['errors'].append({
121
+ 'method': '_validate_inputs',
122
+ 'error': error_msg,
123
+ 'exception_type': 'TypeError',
124
+ 'data_type_received': type(self.data).__name__
125
+ })
126
+ raise TypeError(error_msg)
127
+
128
+ if self.data.size == 0:
129
+ error_msg = "Data array cannot be empty."
130
+ self.logger.error(error_msg)
131
+ self.params['errors'].append({
132
+ 'method': '_validate_inputs',
133
+ 'error': error_msg,
134
+ 'exception_type': 'ValueError',
135
+ 'data_size': self.data.size
136
+ })
137
+ raise ValueError(error_msg)
138
+
139
+ if not np.isfinite(self.data).all():
140
+ non_finite_count = np.sum(~np.isfinite(self.data))
141
+ error_msg = f"Data must contain only finite values. Found {non_finite_count} non-finite values."
142
+ self.logger.error(error_msg)
143
+ self.params['errors'].append({
144
+ 'method': '_validate_inputs',
145
+ 'error': error_msg,
146
+ 'exception_type': 'ValueError',
147
+ 'non_finite_count': int(non_finite_count),
148
+ 'total_data_points': len(self.data)
149
+ })
150
+ raise ValueError(error_msg)
151
+
152
+ # Data dimensional validation
153
+ if self.data.ndim != 1:
154
+ error_msg = f"Data must be a 1-dimensional array. Got {self.data.ndim}-dimensional array."
155
+ self.logger.error(error_msg)
156
+ self.params['errors'].append({
157
+ 'method': '_validate_inputs',
158
+ 'error': error_msg,
159
+ 'exception_type': 'ValueError',
160
+ 'data_shape': self.data.shape
161
+ })
162
+ raise ValueError(error_msg)
163
+
164
+ # Bounds validation
165
+ for bound, name in [(self.DLB, 'DLB'), (self.DUB, 'DUB'), (self.LB, 'LB'), (self.UB, 'UB')]:
166
+ if bound is not None and (not isinstance(bound, (int, float)) or not np.isfinite(bound)):
167
+ error_msg = f"{name} must be a finite numeric value or None."
168
+ self.logger.error(error_msg)
169
+ self.params['errors'].append({
170
+ 'method': '_validate_inputs',
171
+ 'error': error_msg,
172
+ 'exception_type': 'ValueError',
173
+ 'parameter': name,
174
+ 'value': bound,
175
+ 'value_type': type(bound).__name__
176
+ })
177
+ raise ValueError(error_msg)
178
+
179
+ # Bounds logical validation
180
+ if self.DLB is not None and self.DUB is not None and self.DLB >= self.DUB:
181
+ error_msg = f"DLB ({self.DLB}) must be less than DUB ({self.DUB}) when both are provided."
182
+ self.logger.error(error_msg)
183
+ self.params['errors'].append({
184
+ 'method': '_validate_inputs',
185
+ 'error': error_msg,
186
+ 'exception_type': 'ValueError',
187
+ 'DLB': float(self.DLB),
188
+ 'DUB': float(self.DUB)
189
+ })
190
+ raise ValueError(error_msg)
191
+
192
+ if self.LB is not None and self.UB is not None and self.LB >= self.UB:
193
+ error_msg = f"LB ({self.LB}) must be less than UB ({self.UB}) when both are provided."
194
+ self.logger.error(error_msg)
195
+ self.params['errors'].append({
196
+ 'method': '_validate_inputs',
197
+ 'error': error_msg,
198
+ 'exception_type': 'ValueError',
199
+ 'LB': float(self.LB),
200
+ 'UB': float(self.UB)
201
+ })
202
+ raise ValueError(error_msg)
203
+
204
+ # S parameter validation
205
+ if not isinstance(self.S, (int, float, str)):
206
+ error_msg = "S must be a numeric positive value or 'auto'."
207
+ self.logger.error(error_msg)
208
+ self.params['errors'].append({
209
+ 'method': '_validate_inputs',
210
+ 'error': error_msg,
211
+ 'exception_type': 'TypeError',
212
+ 'S_type': type(self.S).__name__,
213
+ 'S_value': self.S
214
+ })
215
+ raise TypeError(error_msg)
216
+
217
+ if isinstance(self.S, (int, float)) and self.S <= 0:
218
+ error_msg = f"S must be positive when specified as a number. Got {self.S}."
219
+ self.logger.error(error_msg)
220
+ self.params['errors'].append({
221
+ 'method': '_validate_inputs',
222
+ 'error': error_msg,
223
+ 'exception_type': 'ValueError',
224
+ 'S_value': float(self.S)
225
+ })
226
+ raise ValueError(error_msg)
227
+
228
+ # S string validation when it's a string
229
+ if isinstance(self.S, str) and self.S.lower() != 'auto':
230
+ error_msg = f"When S is a string, it must be 'auto'. Got '{self.S}'."
231
+ self.logger.error(error_msg)
232
+ self.params['errors'].append({
233
+ 'method': '_validate_inputs',
234
+ 'error': error_msg,
235
+ 'exception_type': 'ValueError',
236
+ 'S_value': self.S
237
+ })
238
+ raise ValueError(error_msg)
239
+
240
+ # varS parameter validation
241
+ if not isinstance(self.varS, bool):
242
+ error_msg = "varS must be a boolean value. VarS can be only true for 'ELDF' and 'QLDF'."
243
+ self.logger.error(error_msg)
244
+ self.params['errors'].append({
245
+ 'method': '_validate_inputs',
246
+ 'error': error_msg,
247
+ 'exception_type': 'TypeError',
248
+ 'varS_type': type(self.varS).__name__,
249
+ 'varS_value': self.varS
250
+ })
251
+ raise TypeError(error_msg)
252
+
253
+ # varS can be only true with S = 'auto'
254
+ if self.varS and self.S != 'auto':
255
+ error_msg = f"varS can only be true when S is set to 'auto'. Got S='{self.S}', varS={self.varS}."
256
+ self.logger.error(error_msg)
257
+ self.params['errors'].append({
258
+ 'method': '_validate_inputs',
259
+ 'error': error_msg,
260
+ 'exception_type': 'ValueError',
261
+ 'S_value': self.S,
262
+ 'varS_value': self.varS
263
+ })
264
+ raise ValueError(error_msg)
265
+
266
+ # Tolerance validation
267
+ if not isinstance(self.tolerance, (int, float)) or self.tolerance <= 0:
268
+ error_msg = f"Tolerance must be a positive numeric value. Got {self.tolerance}."
269
+ self.logger.error(error_msg)
270
+ self.params['errors'].append({
271
+ 'method': '_validate_inputs',
272
+ 'error': error_msg,
273
+ 'exception_type': 'ValueError',
274
+ 'tolerance_value': self.tolerance,
275
+ 'tolerance_type': type(self.tolerance).__name__
276
+ })
277
+ raise ValueError(error_msg)
278
+
279
+ # Tolerance range validation with warnings
280
+ if self.tolerance > 1.0:
281
+ warning_msg = f"tolerance ({self.tolerance}) is unusually large."
282
+ self.logger.warning(warning_msg)
283
+ self.params['warnings'].append({
284
+ 'method': '_validate_inputs',
285
+ 'message': warning_msg,
286
+ 'severity': 'medium',
287
+ 'tolerance_value': float(self.tolerance)
288
+ })
289
+ self.logger.info(f"Warning: {warning_msg}")
290
+
291
+ if self.tolerance < 1e-10:
292
+ warning_msg = f"tolerance ({self.tolerance}) is very small and may cause numerical issues."
293
+ self.logger.warning(warning_msg)
294
+ self.params['warnings'].append({
295
+ 'method': '_validate_inputs',
296
+ 'message': warning_msg,
297
+ 'severity': 'high',
298
+ 'tolerance_value': float(self.tolerance)
299
+ })
300
+ self.logger.info(f"Warning: {warning_msg}")
301
+
302
+ if self.tolerance < 1e-10:
303
+ warning_msg = f"tolerance ({self.tolerance}) is very small and may cause numerical issues."
304
+ self.logger.warning(warning_msg)
305
+ self.params['warnings'].append({
306
+ 'method': '_validate_inputs',
307
+ 'message': warning_msg,
308
+ 'severity': 'high',
309
+ 'tolerance_value': float(self.tolerance)
310
+ })
311
+ self.logger.info(f"Warning: {warning_msg}")
312
+
313
+ # data_form validation
314
+ if self.data_form not in ['a', 'm']:
315
+ error_msg = f"data_form must be 'a' for additive or 'm' for multiplicative. Got '{self.data_form}'."
316
+ self.logger.error(error_msg)
317
+ self.params['errors'].append({
318
+ 'method': '_validate_inputs',
319
+ 'error': error_msg,
320
+ 'exception_type': 'ValueError',
321
+ 'data_form_value': self.data_form
322
+ })
323
+ raise ValueError(error_msg)
324
+
325
+ # n_points validation
326
+ if not isinstance(self.n_points, int) or self.n_points <= 0:
327
+ error_msg = f"n_points must be a positive integer. Got {self.n_points}."
328
+ self.logger.error(error_msg)
329
+ self.params['errors'].append({
330
+ 'method': '_validate_inputs',
331
+ 'error': error_msg,
332
+ 'exception_type': 'ValueError',
333
+ 'n_points_value': self.n_points,
334
+ 'n_points_type': type(self.n_points).__name__
335
+ })
336
+ raise ValueError(error_msg)
337
+
338
+ # n_points reasonable range validation with warning
339
+ if self.n_points > 10000:
340
+ warning_msg = f"n_points ({self.n_points}) is very large and may impact performance."
341
+ self.logger.warning(warning_msg)
342
+ self.params['warnings'].append({
343
+ 'method': '_validate_inputs',
344
+ 'message': warning_msg,
345
+ 'severity': 'medium',
346
+ 'n_points_value': self.n_points
347
+ })
348
+ self.logger.warning(f"Warning: {warning_msg}")
349
+
350
+ # Weights validation
351
+ if self.weights is not None:
352
+ if not isinstance(self.weights, np.ndarray):
353
+ error_msg = "weights must be a numpy array."
354
+ self.logger.error(error_msg)
355
+ self.params['errors'].append({
356
+ 'method': '_validate_inputs',
357
+ 'error': error_msg,
358
+ 'exception_type': 'TypeError',
359
+ 'weights_type': type(self.weights).__name__
360
+ })
361
+ raise TypeError(error_msg)
362
+
363
+ if len(self.weights) != len(self.data):
364
+ error_msg = f"Weights must have the same length as data. Got weights length {len(self.weights)}, data length {len(self.data)}."
365
+ self.logger.error(error_msg)
366
+ self.params['errors'].append({
367
+ 'method': '_validate_inputs',
368
+ 'error': error_msg,
369
+ 'exception_type': 'ValueError',
370
+ 'weights_length': len(self.weights),
371
+ 'data_length': len(self.data)
372
+ })
373
+ raise ValueError(error_msg)
374
+
375
+ if not np.all(self.weights >= 0):
376
+ negative_count = np.sum(self.weights < 0)
377
+ error_msg = f"All weights must be non-negative. Found {negative_count} negative weights."
378
+ self.logger.error(error_msg)
379
+ self.params['errors'].append({
380
+ 'method': '_validate_inputs',
381
+ 'error': error_msg,
382
+ 'exception_type': 'ValueError',
383
+ 'negative_weights_count': int(negative_count)
384
+ })
385
+ raise ValueError(error_msg)
386
+
387
+ # Weights finite values validation
388
+ if not np.isfinite(self.weights).all():
389
+ non_finite_weights = np.sum(~np.isfinite(self.weights))
390
+ error_msg = f"All weights must be finite values. Found {non_finite_weights} non-finite weights."
391
+ self.logger.error(error_msg)
392
+ self.params['errors'].append({
393
+ 'method': '_validate_inputs',
394
+ 'error': error_msg,
395
+ 'exception_type': 'ValueError',
396
+ 'non_finite_weights_count': int(non_finite_weights)
397
+ })
398
+ raise ValueError(error_msg)
399
+
400
+ # z0_optimize validation
401
+ if not isinstance(self.z0_optimize, bool):
402
+ error_msg = f"z0_optimize must be a boolean value. Got {type(self.z0_optimize).__name__}."
403
+ self.logger.error(error_msg)
404
+ self.params['errors'].append({
405
+ 'method': '_validate_inputs',
406
+ 'error': error_msg,
407
+ 'exception_type': 'TypeError',
408
+ 'z0_optimize_type': type(self.z0_optimize).__name__,
409
+ 'z0_optimize_value': self.z0_optimize
410
+ })
411
+ raise TypeError(error_msg)
412
+
413
+ # opt_method validation
414
+ valid_methods = ['L-BFGS-B', 'SLSQP', 'TNC', 'trust-constr', 'Powell', 'COBYLA']
415
+ if not isinstance(self.opt_method, str):
416
+ error_msg = f"opt_method must be a string. Got {type(self.opt_method).__name__}."
417
+ self.logger.error(error_msg)
418
+ self.params['errors'].append({
419
+ 'method': '_validate_inputs',
420
+ 'error': error_msg,
421
+ 'exception_type': 'TypeError',
422
+ 'opt_method_type': type(self.opt_method).__name__,
423
+ 'opt_method_value': self.opt_method
424
+ })
425
+ raise TypeError(error_msg)
426
+
427
+ if self.opt_method not in valid_methods:
428
+ error_msg = f"opt_method must be one of {valid_methods}. Got '{self.opt_method}'."
429
+ self.logger.error(error_msg)
430
+ self.params['errors'].append({
431
+ 'method': '_validate_inputs',
432
+ 'error': error_msg,
433
+ 'exception_type': 'ValueError',
434
+ 'opt_method_value': self.opt_method,
435
+ 'valid_methods': valid_methods
436
+ })
437
+ raise ValueError(error_msg)
438
+
439
+ # max_data_size validation
440
+ if not isinstance(self.max_data_size, int) or self.max_data_size <= 0:
441
+ error_msg = f"max_data_size must be a positive integer. Got {self.max_data_size}."
442
+ self.logger.error(error_msg)
443
+ self.params['errors'].append({
444
+ 'method': '_validate_inputs',
445
+ 'error': error_msg,
446
+ 'exception_type': 'ValueError',
447
+ 'max_data_size_value': self.max_data_size,
448
+ 'max_data_size_type': type(self.max_data_size).__name__
449
+ })
450
+ raise ValueError(error_msg)
451
+
452
+ # flush parameter validation
453
+ if not isinstance(self.flush, bool):
454
+ error_msg = f"flush must be a boolean value. Got {type(self.flush).__name__}."
455
+ self.logger.error(error_msg)
456
+ self.params['errors'].append({
457
+ 'method': '_validate_inputs',
458
+ 'error': error_msg,
459
+ 'exception_type': 'TypeError',
460
+ 'flush_type': type(self.flush).__name__,
461
+ 'flush_value': self.flush
462
+ })
463
+ raise TypeError(error_msg)
464
+
465
+ # if length of data exceeds max_data_size, set flush to True with warning
466
+ if len(self.data) > self.max_data_size and not self.flush:
467
+ warning_msg = f"Data size ({len(self.data)}) exceeds max_data_size ({self.max_data_size}). For optimal compute performance, setting 'flush=True'."
468
+ self.logger.warning(warning_msg)
469
+ self.params['warnings'].append({
470
+ 'method': '_validate_inputs',
471
+ 'message': warning_msg,
472
+ 'severity': 'medium',
473
+ 'data_size': len(self.data),
474
+ 'max_data_size': self.max_data_size,
475
+ 'action_taken': 'flush_set_to_true'
476
+ })
477
+ self.flush = True
478
+ self.logger.info(warning_msg)
479
+
480
+ # Boolean parameters validation
481
+ boolean_params = [
482
+ (self.homogeneous, 'homogeneous'),
483
+ (self.catch, 'catch'),
484
+ (self.wedf, 'wedf'),
485
+ (self.verbose, 'verbose')
486
+ ]
487
+
488
+ for param, name in boolean_params:
489
+ if not isinstance(param, bool):
490
+ error_msg = f"{name} must be a boolean value. Got {type(param).__name__}."
491
+ self.params['errors'].append({
492
+ 'method': '_validate_inputs',
493
+ 'error': error_msg,
494
+ 'exception_type': 'TypeError',
495
+ 'parameter': name,
496
+ 'parameter_type': type(param).__name__,
497
+ 'parameter_value': param
498
+ })
499
+ raise TypeError(error_msg)
500
+
501
+ except Exception as e:
502
+ if self.verbose:
503
+ self.logger.error(f"Input validation failed: {str(e)}")
504
+ raise
505
+
506
+ def _store_initial_params(self):
507
+ """Store initial parameters for reference."""
508
+ self.logger.info("Storing initial parameters.")
509
+
510
+ self.params.update({
511
+ 'data': np.sort(self.data).copy(),
512
+ 'DLB': self.DLB,
513
+ 'DUB': self.DUB,
514
+ 'LB': self.LB,
515
+ 'UB': self.UB,
516
+ 'S': self.S,
517
+ 'z0_optimize': self.z0_optimize,
518
+ 'varS': self.varS,
519
+ 'tolerance': self.tolerance,
520
+ 'data_form': self.data_form,
521
+ 'n_points': self.n_points,
522
+ 'homogeneous': self.homogeneous,
523
+ 'catch': self.catch,
524
+ 'weights': self.weights.copy() if self.weights is not None else None,
525
+ 'compute_wedf': self.wedf,
526
+ 'opt_method': self.opt_method,
527
+ 'verbose': self.verbose,
528
+ 'max_data_size': self.max_data_size,
529
+ 'flush': self.flush,
530
+ 'warnings': [],
531
+ 'errors': [],
532
+ })
533
+
534
+ # =============================================================================
535
+ # DATA PREPROCESSING AND TRANSFORMATION
536
+ # =============================================================================
537
+
538
+ def _get_data_converter(self):
539
+ """Get or create cached data converter."""
540
+ self.logger.info("Retrieving data converter.")
541
+ if self._computation_cache['data_converter'] is None:
542
+ self._computation_cache['data_converter'] = DataConversion()
543
+ return self._computation_cache['data_converter']
544
+
545
+ def _estimate_data_bounds(self):
546
+ """Estimate data bounds (DLB and DUB) if not provided."""
547
+ self.logger.info("Estimating data bounds.")
548
+ if self.DLB is None:
549
+ self.DLB = np.min(self.data)
550
+ if self.DUB is None:
551
+ self.DUB = np.max(self.data)
552
+
553
+ # Validate bounds
554
+ if self.DLB >= self.DUB:
555
+ self.logger.info("DLB >= DUB, All values are same case.")
556
+
557
+ if self.catch:
558
+ self.params.update({'DLB': float(self.DLB), 'DUB': float(self.DUB)})
559
+
560
+ def _estimate_weights(self):
561
+ """Process and normalize weights."""
562
+ self.logger.info("Estimating and normalizing weights.")
563
+ if self.weights is None:
564
+ self.weights = np.ones_like(self.data, dtype=float)
565
+ else:
566
+ self.weights = np.asarray(self.weights, dtype=float)
567
+
568
+ # Normalize weights to sum to n (number of data points)
569
+ weight_sum = np.sum(self.weights)
570
+ if weight_sum > 0:
571
+ self.weights = self.weights / weight_sum * len(self.weights)
572
+ else:
573
+ raise ValueError("Sum of weights must be positive.")
574
+
575
+ # Apply gnostic weights for non-homogeneous data
576
+ if not self.homogeneous:
577
+ self.logger.info("Applying gnostic weights for non-homogeneous data.")
578
+ gw = GnosticsWeights(verbose=self.verbose)
579
+ self.gweights = gw._get_gnostic_weights(self.z)
580
+ self.weights = self.gweights * self.weights
581
+
582
+ # Cache normalized weights
583
+ self._computation_cache['weights_normalized'] = self.weights.copy()
584
+
585
+ if self.catch:
586
+ self.params['weights'] = self.weights.copy()
587
+
588
+ def _transform_data_to_standard_domain(self):
589
+ """Transform data to standard z-domain."""
590
+ self.logger.info("Transforming data to standard z-domain.")
591
+ dc = self._get_data_converter()
592
+
593
+ if self.data_form == 'a':
594
+ self.z = dc._convert_az(self.data, self.DLB, self.DUB)
595
+ elif self.data_form == 'm':
596
+ self.z = dc._convert_mz(self.data, self.DLB, self.DUB)
597
+
598
+ if self.catch:
599
+ self.params['z'] = self.z.copy()
600
+
601
+ def _generate_evaluation_points(self):
602
+ """Generate points for smooth evaluation."""
603
+ self.logger.info("Generating evaluation points.")
604
+ self.di_points_n = np.linspace(self.DLB, self.DUB, self.n_points)
605
+
606
+ dc = self._get_data_converter()
607
+ if self.data_form == 'a':
608
+ self.z_points_n = dc._convert_az(self.di_points_n, self.DLB, self.DUB)
609
+ else:
610
+ self.z_points_n = dc._convert_mz(self.di_points_n, self.DLB, self.DUB)
611
+
612
+ if self.catch:
613
+ self.params.update({
614
+ 'z_points': self.z_points_n.copy(),
615
+ 'di_points': self.di_points_n.copy()
616
+ })
617
+
618
+ # =============================================================================
619
+ # BOUNDS ESTIMATION
620
+ # =============================================================================
621
+
622
+ def _estimate_initial_probable_bounds(self):
623
+ """Estimate initial probable bounds (LB and UB)."""
624
+ dc = self._get_data_converter()
625
+ self.logger.info("Estimating initial probable bounds (LB and UB).")
626
+
627
+ # Estimate LB if not provided
628
+ if self.LB is None:
629
+ if self.data_form == 'a':
630
+ pad = (self.DUB - self.DLB) / 2
631
+ lb_raw = self.DLB - pad
632
+ self.LB_init = dc._convert_az(lb_raw, self.DLB, self.DUB)
633
+ elif self.data_form == 'm':
634
+ lb_raw = self.DLB / np.sqrt(self.DUB / self.DLB)
635
+ self.LB_init = dc._convert_mz(lb_raw, self.DLB, self.DUB)
636
+ else:
637
+ if self.data_form == 'a':
638
+ self.LB_init = dc._convert_az(self.LB, self.DLB, self.DUB)
639
+ else:
640
+ self.LB_init = dc._convert_mz(self.LB, self.DLB, self.DUB)
641
+
642
+ # Estimate UB if not provided
643
+ if self.UB is None:
644
+ if self.data_form == 'a':
645
+ pad = (self.DUB - self.DLB) / 2
646
+ ub_raw = self.DUB + pad
647
+ self.UB_init = dc._convert_az(ub_raw, self.DLB, self.DUB)
648
+ elif self.data_form == 'm':
649
+ ub_raw = self.DUB * np.sqrt(self.DUB / self.DLB)
650
+ self.UB_init = dc._convert_mz(ub_raw, self.DLB, self.DUB)
651
+ else:
652
+ if self.data_form == 'a':
653
+ self.UB_init = dc._convert_az(self.UB, self.DLB, self.DUB)
654
+ else:
655
+ self.UB_init = dc._convert_mz(self.UB, self.DLB, self.DUB)
656
+
657
+ if self.catch:
658
+ self.params.update({'LB_init': self.LB_init, 'UB_init': self.UB_init})
659
+
660
+ # =============================================================================
661
+ # DISTRIBUTION FUNCTION COMPUTATION
662
+ # =============================================================================
663
+
664
+ def _get_distribution_function_values(self, use_wedf=True):
665
+ """Get WEDF or KS points for optimization."""
666
+ self.logger.info("Computing distribution function values.")
667
+ if use_wedf:
668
+ self.logger.info("Using WEDF for distribution function computation.")
669
+ wedf_ = WEDF(self.data, weights=self.weights, data_lb=self.DLB, data_ub=self.DUB, verbose=self.verbose)
670
+ # if smooth:
671
+ # df_values = wedf_.fit(self.di_points_n)
672
+ # else:
673
+ df_values = wedf_.fit(self.data)
674
+
675
+ if self.catch:
676
+ self.params['wedf'] = df_values.copy()
677
+
678
+ self.logger.info("WEDF values computed.")
679
+ return df_values
680
+ else:
681
+ self.logger.info("Using KS points for distribution function computation.")
682
+ # n_points = self.n_points if smooth else len(self.data)
683
+ df_values = self._generate_ks_points(len(self.data))
684
+
685
+ if self.catch:
686
+ self.params['ksdf'] = df_values.copy()
687
+
688
+ self.logger.info("KS points computed.")
689
+ return df_values
690
+
691
+ def _generate_ks_points(self, N):
692
+ """Generate Kolmogorov-Smirnov points."""
693
+ self.logger.info("Generating Kolmogorov-Smirnov points.")
694
+ if N <= 0:
695
+ raise ValueError("N must be a positive integer.")
696
+
697
+ n = np.arange(1, N + 1)
698
+ ks_points = (2 * n - 1) / (2 * N)
699
+
700
+ if self.catch:
701
+ self.params['ks_points'] = ks_points.copy()
702
+
703
+ return ks_points
704
+
705
+ def _determine_optimization_strategy(self, egdf: bool = True):
706
+ """Determine optimization strategy for S, LB, and UB."""
707
+ self.logger.info("Determining optimization strategy for S, LB, and UB.")
708
+ try:
709
+ self.logger.info("Initializing optimization Engine...")
710
+
711
+ # For EGDF and QGDF optimization
712
+ engine = DistFuncEngine(
713
+ compute_func=self._compute_egdf_core if egdf else self._compute_qgdf_core, # NOTE switch between egdf and qgdf
714
+ target_values=self.df_values,
715
+ weights=self.weights,
716
+ S=self.S,
717
+ LB=self.LB,
718
+ UB=self.UB,
719
+ LB_init=self.LB_init,
720
+ UB_init=self.UB_init,
721
+ tolerance=self.tolerance,
722
+ opt_method=self.opt_method,
723
+ max_iterations=10000, # Engine will set default
724
+ regularization_weight=None, # Engine will set default
725
+ verbose=self.verbose,
726
+ catch_errors=self.catch
727
+ )
728
+
729
+ results = engine.optimize()
730
+ self.S_opt = results['S']
731
+ self.LB_opt = results['LB']
732
+ self.UB_opt = results['UB']
733
+
734
+ except Exception as e:
735
+ error_msg = f"Optimization strategy determination failed: {str(e)}"
736
+ self.logger.error(error_msg)
737
+ self.params['errors'].append({
738
+ 'method': '_determine_optimization_strategy',
739
+ 'error': error_msg,
740
+ 'exception_type': type(e).__name__
741
+ })
742
+ self.logger.error(error_msg)
743
+ # Fallback to initial values
744
+ self.logger.info("Falling back to initial values for S, LB, and UB.")
745
+ self.S_opt = self.S if isinstance(self.S, (int, float)) else 1.0
746
+ self.LB_opt = self.LB_init
747
+ self.UB_opt = self.UB_init
748
+
749
+
750
+ def _transform_bounds_to_original_domain(self):
751
+ """Transform optimized bounds back to original domain."""
752
+ dc = self._get_data_converter()
753
+
754
+ self.logger.info("Transforming optimized bounds back to original domain.")
755
+
756
+ if self.data_form == 'a':
757
+ self.LB = dc._convert_za(self.LB_opt, self.DLB, self.DUB)
758
+ self.UB = dc._convert_za(self.UB_opt, self.DLB, self.DUB)
759
+ else:
760
+ self.LB = dc._convert_zm(self.LB_opt, self.DLB, self.DUB)
761
+ self.UB = dc._convert_zm(self.UB_opt, self.DLB, self.DUB)
762
+
763
+ if self.catch:
764
+ self.params.update({'LB': float(self.LB), 'UB': float(self.UB), 'S_opt': float(self.S_opt)})
765
+
766
+ def _cleanup_computation_cache(self):
767
+ """Clean up temporary computation cache to free memory."""
768
+
769
+ self.logger.info("Cleaning up computation cache.")
770
+ self._computation_cache = {
771
+ 'data_converter': None,
772
+ 'characteristics_computer': None,
773
+ 'weights_normalized': None,
774
+ 'smooth_curves_generated': False
775
+ }
776
+
777
+ # Remove large temporary arrays if they exist
778
+ temp_attrs = ['fi', 'hi', 'df_values']
779
+ for attr in temp_attrs:
780
+ if hasattr(self, attr):
781
+ delattr(self, attr)
782
+
783
+ long_array_params = ['z_points', 'di_points', 'egdf_points', 'pdf_points', 'zi_n', 'zi_points', 'eldf_points', 'qldf_points', 'qgdf_points']
784
+
785
+ for param in long_array_params:
786
+ if param in self.params:
787
+ self.params[param] = None
788
+
789
+ if self.catch:
790
+ self.params['computation_cache_cleared'] = True
791
+
792
+ self.logger.info("Computation cache cleaned up.")
793
+
794
+
795
+ def _calculate_fidelities_irrelevances_at_given_zi(self, zi):
796
+ """Helper method to recalculate fidelities and irrelevances for current zi."""
797
+ self.logger.info("Calculating fidelities and irrelevances at given zi.")
798
+
799
+ # Convert to infinite domain
800
+ zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
801
+ # is zi given then use it, else use self.zi
802
+ if zi is None:
803
+ zi_d = self.zi
804
+ else:
805
+ zi_d = zi
806
+
807
+ # Calculate R matrix
808
+ eps = np.finfo(float).eps
809
+ R = zi_n.reshape(-1, 1) / (zi_d + eps).reshape(1, -1)
810
+
811
+ # Get characteristics
812
+ gc = GnosticsCharacteristics(R=R)
813
+ q, q1 = gc._get_q_q1(S=self.S_opt)
814
+
815
+ # Store fidelities and irrelevances
816
+ self.fi = gc._fi(q=q, q1=q1)
817
+ self.hi = gc._hi(q=q, q1=q1)
818
+
819
+
820
+ def _calculate_gcq_at_given_zi(self, data) -> Tuple[GnosticsCharacteristics, np.ndarray, np.ndarray]:
821
+ """Helper method to calculate q and q1 for current zi.
822
+ this will be used in z0estimator for some methods and error calculation
823
+
824
+ returns: gc, q, q1
825
+ """
826
+ self.logger.info("Calculating GnosticsCharacteristics, q, and q1 at given zi.")
827
+ # conver to z domain with DLB and DUB
828
+ zi = DataConversion._convert_az(data, self.DLB, self.DUB) if self.data_form == 'a' else DataConversion._convert_mz(data, self.DLB, self.DUB)
829
+ # Convert to infinite domain
830
+ zi_n = DataConversion._convert_fininf(self.z, self.LB_opt, self.UB_opt)
831
+ # is data given then use it, else use self.zii
832
+ zi_d = zi
833
+
834
+ # Calculate R matrix
835
+ eps = np.finfo(float).eps
836
+ R = zi_n.reshape(-1, 1) / (zi_d + eps).reshape(1, -1)
837
+
838
+ # Get characteristics
839
+ gc = GnosticsCharacteristics(R=R, verbose=self.verbose)
840
+ q, q1 = gc._get_q_q1(S=self.S_opt)
841
+
842
+ return gc, q, q1
843
+
844
+ # NOTE: put this method to specific class that needs it, e.g., ELDF, QLDF, EGDF, QGDF
845
+ # # z0 compute
846
+ # def _compute_z0(self, optimize: bool = None):
847
+ # """
848
+ # Compute the Z0 point where PDF is maximum using the Z0Estimator class.
849
+
850
+ # Parameters:
851
+ # -----------
852
+ # optimize : bool, optional
853
+ # If True, use interpolation-based methods for higher accuracy.
854
+ # If False, use simple linear search on existing points.
855
+ # If None, uses the instance's z0_optimize setting.
856
+ # """
857
+ # if self.z is None:
858
+ # raise ValueError("Data must be transformed (self.z) before Z0 estimation.")
859
+
860
+ # # Use provided optimize parameter or fall back to instance setting
861
+ # use_optimize = optimize if optimize is not None else self.z0_optimize
862
+
863
+ # if self.verbose:
864
+ # print('GDF: Computing Z0 point using Z0Estimator...')
865
+
866
+ # try:
867
+ # # Create Z0Estimator instance with proper constructor signature
868
+ # z0_estimator = Z0Estimator(
869
+ # gdf_object=self, # Pass the ELDF object itself
870
+ # optimize=use_optimize,
871
+ # verbose=self.verbose
872
+ # )
873
+
874
+ # # Call fit() method to estimate Z0
875
+ # self.z0 = z0_estimator.fit()
876
+
877
+ # # Get estimation info for debugging and storage
878
+ # if self.catch:
879
+ # estimation_info = z0_estimator.get_estimation_info()
880
+ # self.params.update({
881
+ # 'z0': float(self.z0),
882
+ # 'z0_method': estimation_info.get('z0_method', 'unknown'),
883
+ # 'z0_estimation_info': estimation_info
884
+ # })
885
+
886
+ # if self.verbose:
887
+ # method_used = z0_estimator.get_estimation_info().get('z0_method', 'unknown')
888
+ # print(f'ELDF: Z0 point computed successfully: {self.z0:.6f} (method: {method_used})')
889
+
890
+ # except Exception as e:
891
+ # # Log the error
892
+ # error_msg = f"Z0 estimation failed: {str(e)}"
893
+ # self.params['errors'].append({
894
+ # 'method': '_compute_z0',
895
+ # 'error': error_msg,
896
+ # 'exception_type': type(e).__name__
897
+ # })
898
+
899
+ # if self.verbose:
900
+ # print(f"Warning: Z0Estimator failed with error: {e}")
901
+ # print("Falling back to simple maximum finding...")
902
+
903
+ # # Fallback to simple maximum finding
904
+ # self._compute_z0_fallback()
905
+
906
+ # if self.catch:
907
+ # self.params.update({
908
+ # 'z0': float(self.z0),
909
+ # 'z0_method': 'fallback_simple_maximum',
910
+ # 'z0_estimation_info': {'error': str(e)}
911
+ # })
912
+
913
+ # def _compute_z0_fallback(self):
914
+ # """
915
+ # Fallback method for Z0 computation using simple maximum finding.
916
+ # """
917
+ # if not hasattr(self, 'di_points_n') or not hasattr(self, 'pdf_points'):
918
+ # raise ValueError("Both 'di_points_n' and 'pdf_points' must be defined for Z0 computation.")
919
+
920
+ # if self.verbose:
921
+ # print('Using fallback method for Z0 point...')
922
+
923
+ # # Find index with maximum PDF
924
+ # max_idx = np.argmax(self.pdf_points)
925
+ # self.z0 = self.di_points_n[max_idx]
926
+
927
+ # if self.verbose:
928
+ # print(f"Z0 point (fallback method): {self.z0:.6f}")
929
+
930
+ # def analyze_z0(self, figsize: tuple = (12, 6)) -> Dict[str, Any]:
931
+ # """
932
+ # Analyze and visualize Z0 estimation results.
933
+
934
+ # Parameters:
935
+ # -----------
936
+ # figsize : tuple
937
+ # Figure size for the plot
938
+
939
+ # Returns:
940
+ # --------
941
+ # Dict[str, Any]
942
+ # Z0 analysis information
943
+ # """
944
+ # if not hasattr(self, 'z0') or self.z0 is None:
945
+ # raise ValueError("Z0 must be computed before analysis. Call fit() first.")
946
+
947
+ # # Create Z0Estimator for analysis
948
+ # z0_estimator = Z0Estimator(
949
+ # gdf_object=self,
950
+ # optimize=self.z0_optimize,
951
+ # verbose=self.verbose
952
+ # )
953
+
954
+ # # Re-estimate for analysis (this is safe since it's already computed)
955
+ # z0_estimator.fit()
956
+
957
+ # # Get detailed info
958
+ # analysis_info = z0_estimator.get_estimation_info()
959
+
960
+ # # Create visualization
961
+ # z0_estimator.plot_z0_analysis(figsize=figsize)
962
+
963
+ # return analysis_info
964
+
965
+ # NOTE: The following commented-out methods represent an earlier approach to optimization strategy determination. They have been replaced by the DistFuncEngine class for better modularity and maintainability.
966
+ # def _determine_optimization_strategy(self):
967
+ # """Determine which parameters to optimize based on inputs."""
968
+ # if self.verbose:
969
+ # print("Determining optimization strategy...")
970
+ # s_is_auto = isinstance(self.S, str) and self.S.lower() == 'auto'
971
+ # lb_provided = self.LB is not None
972
+ # ub_provided = self.UB is not None
973
+
974
+ # if s_is_auto and not lb_provided and not ub_provided:
975
+ # # Optimize all parameters
976
+ # self.S_opt, self.LB_opt, self.UB_opt = self._optimize_all_parameters()
977
+ # elif lb_provided and ub_provided and s_is_auto:
978
+ # # Optimize only S
979
+ # self.LB_opt = self.LB_init
980
+ # self.UB_opt = self.UB_init
981
+ # self.S_opt = self._optimize_s_parameter(self.LB_opt, self.UB_opt)
982
+ # elif not s_is_auto and (not lb_provided or not ub_provided):
983
+ # # Optimize bounds only
984
+ # self.S_opt = self.S
985
+ # _, self.LB_opt, self.UB_opt = self._optimize_bounds_parameters(self.S_opt)
986
+ # else:
987
+ # # Use provided parameters
988
+ # self.S_opt = self.S if not s_is_auto else 1.0
989
+ # self.LB_opt = self.LB_init
990
+ # self.UB_opt = self.UB_init
991
+
992
+ # if self.verbose:
993
+ # print(f"Optimized parameters: S={self.S_opt:.6f}, LB={self.LB_opt:.6f}, UB={self.UB_opt:.6f}")
994
+
995
+ # def _optimize_all_parameters(self):
996
+ # """Optimize all parameters using normalized parameter space."""
997
+ # if self.verbose:
998
+ # print("Optimizing all parameters (S, LB, UB)...")
999
+ # bounds = self._OPTIMIZATION_BOUNDS
1000
+
1001
+ # def normalize_params(s, lb, ub):
1002
+ # s_norm = (s - bounds['S_MIN']) / (bounds['S_MAX'] - bounds['S_MIN'])
1003
+ # lb_norm = (lb - bounds['LB_MIN']) / (bounds['LB_MAX'] - bounds['LB_MIN'])
1004
+ # ub_norm = (ub - bounds['UB_MIN']) / (bounds['UB_MAX'] - bounds['UB_MIN'])
1005
+ # return s_norm, lb_norm, ub_norm
1006
+
1007
+ # def denormalize_params(s_norm, lb_norm, ub_norm):
1008
+ # s = bounds['S_MIN'] + s_norm * (bounds['S_MAX'] - bounds['S_MIN'])
1009
+ # lb = bounds['LB_MIN'] + lb_norm * (bounds['LB_MAX'] - bounds['LB_MIN'])
1010
+ # ub = bounds['UB_MIN'] + ub_norm * (bounds['UB_MAX'] - bounds['UB_MIN'])
1011
+ # return s, lb, ub
1012
+
1013
+ # def objective_function(norm_params):
1014
+ # try:
1015
+ # s, lb, ub = denormalize_params(*norm_params)
1016
+
1017
+ # if s <= 0 or ub <= lb:
1018
+ # return 1e6
1019
+
1020
+ # egdf_values, _, _ = self._compute_egdf_core(s, lb, ub)
1021
+ # diff = np.mean(np.abs(egdf_values - self.df_values) * self.weights)
1022
+
1023
+ # # Regularization
1024
+ # reg = np.sum(np.array(norm_params)**2)
1025
+
1026
+ # total_loss = diff + reg
1027
+
1028
+ # if self.verbose:
1029
+ # print(f"Loss: {diff:.6f}, Total: {total_loss:.6f}, S: {s:.3f}, LB: {lb:.6f}, UB: {ub:.3f}")
1030
+
1031
+ # return total_loss
1032
+ # except:
1033
+ # error_msg = f"Objective function computation failed: {str(e)}"
1034
+ # self.params['errors'].append({
1035
+ # 'method': '_optimize_all_parameters.objective_function',
1036
+ # 'error': error_msg,
1037
+ # 'exception_type': type(e).__name__,
1038
+ # 'norm_params': norm_params.tolist() if hasattr(norm_params, 'tolist') else list(norm_params)
1039
+ # })
1040
+ # return 1e6
1041
+
1042
+ # # Initial values
1043
+ # s_init = 0.05
1044
+ # lb_init = self.LB_init if hasattr(self, 'LB_init') and self.LB_init is not None else bounds['LB_MIN']
1045
+ # ub_init = self.UB_init if hasattr(self, 'UB_init') and self.UB_init is not None else bounds['UB_MAX']
1046
+
1047
+ # initial_params = normalize_params(s_init, lb_init, ub_init)
1048
+ # norm_bounds = [(0.0, 1.0)]
1049
+
1050
+ # try:
1051
+ # result = minimize(
1052
+ # objective_function,
1053
+ # initial_params,
1054
+ # method=self.opt_method,
1055
+ # bounds=norm_bounds,
1056
+ # options={'maxiter': 10000, 'ftol': self.tolerance},
1057
+ # tol=self.tolerance
1058
+ # )
1059
+
1060
+ # s_opt, lb_opt, ub_opt = denormalize_params(*result.x)
1061
+
1062
+ # if lb_opt >= ub_opt:
1063
+ # if self.verbose:
1064
+ # print("Warning: Optimized LB >= UB, using initial values")
1065
+ # return s_init, lb_init, ub_init
1066
+
1067
+ # return s_opt, lb_opt, ub_opt
1068
+ # except Exception as e:
1069
+ # # error handling
1070
+ # error_msg = f"Optimization failed: {str(e)}"
1071
+ # self.params['errors'].append({
1072
+ # 'method': '_optimize_all_parameters',
1073
+ # 'error': error_msg,
1074
+ # 'exception_type': type(e).__name__
1075
+ # })
1076
+ # if self.verbose:
1077
+ # print(f"Optimization failed: {e}")
1078
+ # return s_init, lb_init, ub_init
1079
+
1080
+ # def _optimize_s_parameter(self, lb, ub):
1081
+ # """Optimize only S parameter."""
1082
+ # if self.verbose:
1083
+ # print("Optimizing S parameter...")
1084
+
1085
+ # def objective_function(s):
1086
+ # try:
1087
+ # egdf_values, _, _ = self._compute_egdf_core(s[0], lb, ub)
1088
+ # diff = np.mean(np.abs(egdf_values - self.df_values) * self.weights)
1089
+ # if self.verbose:
1090
+ # print(f"S optimization - Loss: {diff:.6f}, S: {s[0]:.3f}")
1091
+ # return diff
1092
+ # except Exception as e:
1093
+ # error_msg = f"S optimization objective function failed: {str(e)}"
1094
+ # self.params['errors'].append({
1095
+ # 'method': '_optimize_s_parameter',
1096
+ # 'error': error_msg,
1097
+ # 'exception_type': type(e).__name__
1098
+ # })
1099
+ # return 1e6
1100
+
1101
+ # try:
1102
+ # result = minimize(
1103
+ # objective_function,
1104
+ # [1.0],
1105
+ # bounds=[(self._OPTIMIZATION_BOUNDS['S_MIN'], self._OPTIMIZATION_BOUNDS['S_MAX'])],
1106
+ # method=self.opt_method,
1107
+ # options={'maxiter': 1000}
1108
+ # )
1109
+ # return result.x[0]
1110
+ # except Exception as e:
1111
+ # error_msg = f"S optimization failed: {str(e)}"
1112
+ # self.params['errors'].append({
1113
+ # 'method': '_optimize_s_parameter',
1114
+ # 'error': error_msg,
1115
+ # 'exception_type': type(e).__name__
1116
+ # })
1117
+ # return 1.0
1118
+
1119
+ # def _optimize_bounds_parameters(self, s):
1120
+ # """Optimize only LB and UB parameters."""
1121
+ # if self.verbose:
1122
+ # print("Optimizing LB and UB parameters...")
1123
+
1124
+ # bounds = self._OPTIMIZATION_BOUNDS
1125
+
1126
+ # def normalize_bounds(lb, ub):
1127
+ # lb_norm = (lb - bounds['LB_MIN']) / (bounds['LB_MAX'] - bounds['LB_MIN'])
1128
+ # ub_norm = (ub - bounds['UB_MIN']) / (bounds['UB_MAX'] - bounds['UB_MIN'])
1129
+ # return lb_norm, ub_norm
1130
+
1131
+ # def denormalize_bounds(lb_norm, ub_norm):
1132
+ # lb = bounds['LB_MIN'] + lb_norm * (bounds['LB_MAX'] - bounds['LB_MIN'])
1133
+ # ub = bounds['UB_MIN'] + ub_norm * (bounds['UB_MAX'] - bounds['UB_MIN'])
1134
+ # return lb, ub
1135
+
1136
+ # def objective_function(norm_params):
1137
+ # try:
1138
+ # lb, ub = denormalize_bounds(*norm_params)
1139
+
1140
+ # if lb <= 0 or ub <= lb:
1141
+ # return 1e6
1142
+
1143
+ # egdf_values, _, _ = self._compute_egdf_core(s, lb, ub)
1144
+ # diff = np.mean(np.abs(egdf_values - self.df_values) * self.weights)
1145
+
1146
+ # # Regularization
1147
+ # reg = np.sum(np.array(norm_params)**2)
1148
+ # total_loss = diff + reg
1149
+
1150
+ # if self.verbose:
1151
+ # print(f"Bounds optimization - Loss: {diff:.6f}, Total: {total_loss:.6f}, LB: {lb:.6f}, UB: {ub:.3f}")
1152
+ # except Exception as e:
1153
+ # error_msg = f"Bounds optimization objective function failed: {str(e)}"
1154
+ # self.params['errors'].append({
1155
+ # 'method': '_optimize_bounds_parameters',
1156
+ # 'error': error_msg,
1157
+ # 'exception_type': type(e).__name__
1158
+ # })
1159
+ # return 1e6
1160
+
1161
+ # # Initial values
1162
+ # lb_init = self.LB_init if hasattr(self, 'LB_init') and self.LB_init is not None else bounds['LB_MIN']
1163
+ # ub_init = self.UB_init if hasattr(self, 'UB_init') and self.UB_init is not None else bounds['UB_MIN']
1164
+
1165
+ # lb_init = np.clip(lb_init, bounds['LB_MIN'], bounds['LB_MAX'])
1166
+ # ub_init = np.clip(ub_init, bounds['UB_MIN'], bounds['UB_MAX'])
1167
+
1168
+ # if lb_init >= ub_init:
1169
+ # lb_init = bounds['LB_MIN']
1170
+ # ub_init = bounds['UB_MIN']
1171
+
1172
+ # initial_params = normalize_bounds(lb_init, ub_init)
1173
+ # norm_bounds = [(0.0, 1.0), (0.0, 1.0)]
1174
+
1175
+ # try:
1176
+ # result = minimize(
1177
+ # objective_function,
1178
+ # initial_params,
1179
+ # method=self.opt_method,
1180
+ # bounds=norm_bounds,
1181
+ # options={'maxiter': 10000, 'ftol': self.tolerance},
1182
+ # tol=self.tolerance
1183
+ # )
1184
+
1185
+ # lb_opt, ub_opt = denormalize_bounds(*result.x)
1186
+
1187
+ # if lb_opt >= ub_opt:
1188
+ # if self.verbose:
1189
+ # print("Warning: Optimized LB >= UB, using initial values")
1190
+ # return s, lb_init, ub_init
1191
+
1192
+ # return s, lb_opt, ub_opt
1193
+ # except Exception as e:
1194
+ # error_msg = f"Bounds optimization failed: {str(e)}"
1195
+ # self.params['errors'].append({
1196
+ # 'method': '_optimize_bounds_parameters',
1197
+ # 'error': error_msg,
1198
+ # 'exception_type': type(e).__name__
1199
+ # })
1200
+ # if self.verbose:
1201
+ # print(f"Bounds optimization failed: {e}")
1202
+ # return s, self.LB, self.UB