machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,841 @@
1
+ """
2
+ Distribution Function Parameter Optimization Engine
3
+
4
+ Internal optimization engine for ManGo distribution function parameter tuning.
5
+ This module provides a unified, robust optimization framework for EGDF, QGDF,
6
+ and other distribution function implementations.
7
+
8
+ Key Design Decisions:
9
+ - Normalized parameter space for numerical stability
10
+ - Strategy-based optimization (auto-detect what needs optimization)
11
+ - Comprehensive error handling with fallback mechanisms
12
+ - Modular design for easy extension to new distribution functions
13
+
14
+ Performance Considerations:
15
+ - Uses scipy.optimize.minimize with L-BFGS-B for bounded optimization
16
+ - Parameter normalization prevents scale-related convergence issues
17
+ - Caching and result storage for debugging optimization failures
18
+
19
+ Author: Nirmal Parmar
20
+ Machine Gnostics
21
+ """
22
+
23
+ import logging
24
+ import numpy as np
25
+ from typing import Callable, Dict, Any, Tuple, Optional, Union
26
+ from scipy.optimize import minimize
27
+ from machinegnostics.magcal.util.logging import get_logger
28
+
29
+ class DistFuncEngine:
30
+ """
31
+ Internal optimization engine for distribution function parameters (S, LB, UB).
32
+
33
+ DEVELOPER NOTES:
34
+ ================
35
+
36
+ Design Philosophy:
37
+ ------------------
38
+ This class was designed to replace scattered optimization code in EGDF and QGDF
39
+ classes. The main goals were:
40
+ 1. Centralize parameter optimization logic
41
+ 2. Provide consistent behavior across different distribution functions
42
+ 3. Handle edge cases and optimization failures gracefully
43
+ 4. Enable debugging of optimization issues through comprehensive logging
44
+
45
+ Parameter Space Design:
46
+ -----------------------
47
+ The optimization uses normalized parameter space [0,1]³ to avoid numerical
48
+ issues with vastly different scales:
49
+ - S: typically 0.05 to 100 (factor of 2000x difference)
50
+ - LB: typically 1e-6 to exp(-1) (factor of ~300,000x difference)
51
+ - UB: typically exp(1) to 1e6 (factor of ~300,000x difference)
52
+
53
+ Normalization prevents optimizer from getting stuck due to poor scaling.
54
+
55
+ Optimization Strategies:
56
+ ------------------------
57
+ The engine automatically detects which parameters need optimization:
58
+
59
+ 1. 'optimize_all': S='auto', LB=None, UB=None
60
+ - Most computationally expensive
61
+ - Uses 3D normalized optimization with regularization
62
+ - Fallback to reasonable defaults if optimization fails
63
+
64
+ 2. 'optimize_s_only': S='auto', LB=provided, UB=provided
65
+ - Fast 1D optimization over S parameter
66
+ - Bounds are fixed to user-provided values
67
+ - Common case when bounds are known from data analysis
68
+
69
+ 3. 'optimize_bounds_only': S=provided, LB=None OR UB=None
70
+ - 2D optimization over bounds with fixed S
71
+ - Less common, used when S is known from theory/experiment
72
+
73
+ 4. 'use_provided': All parameters explicitly provided
74
+ - No optimization, direct parameter assignment
75
+ - Used for validation runs or when parameters are pre-computed
76
+
77
+ Error Handling Strategy:
78
+ ------------------------
79
+ Multiple layers of error handling:
80
+ 1. Input validation at initialization (fail fast)
81
+ 2. Optimization-level error catching (graceful degradation)
82
+ 3. Objective function error catching (return penalty value)
83
+ 4. Fallback parameter estimation (always return something reasonable)
84
+
85
+ This design ensures the engine never completely fails, which is critical
86
+ for batch processing large datasets.
87
+
88
+ Memory and Performance:
89
+ -----------------------
90
+ - Stores optimization results for debugging (can be disabled in production)
91
+ - Error/warning logs are accumulated (consider periodic clearing for long runs)
92
+ - Objective function evaluations are not cached (compute_func should handle this)
93
+ - Uses scipy's L-BFGS-B which is memory-efficient for bounded optimization
94
+
95
+ Thread Safety:
96
+ --------------
97
+ This class is NOT thread-safe. Each thread should have its own instance.
98
+ The compute_func parameter may have its own thread-safety requirements.
99
+
100
+ Integration Points:
101
+ -------------------
102
+ The compute_func parameter is the main integration point. It should:
103
+ - Accept (s: float, lb: float, ub: float) parameters
104
+ - Return (dist_values: np.ndarray, info1: Any, info2: Any) tuple
105
+ - Handle numerical edge cases gracefully
106
+ - Be reasonably efficient (called many times during optimization)
107
+
108
+ Common Integration Examples:
109
+ - compute_func = lambda s, lb, ub: self._compute_egdf_core(s, lb, ub)
110
+ - compute_func = lambda s, lb, ub: self._compute_qgdf_core(s, lb, ub)
111
+
112
+ Debugging Optimization Issues:
113
+ ------------------------------
114
+ Use get_optimization_info() to access:
115
+ - Optimization convergence details (success, iterations, final objective value)
116
+ - Error logs with full exception information
117
+ - Parameter values attempted during optimization
118
+
119
+ Common failure modes:
120
+ 1. Objective function numerical issues -> Check compute_func implementation
121
+ 2. Poor initial parameter guesses -> Adjust LB_init, UB_init
122
+ 3. Target values incompatible with parameter bounds -> Check data preprocessing
123
+ 4. Optimization stuck in local minimum -> Try different opt_method or tolerance
124
+
125
+ Extension Guidelines:
126
+ ---------------------
127
+ To add new optimization features:
128
+ 1. Add new strategy to _determine_optimization_strategy()
129
+ 2. Implement corresponding _optimize_xxx_parameters() method
130
+ 3. Update optimize() method to handle new strategy
131
+ 4. Add appropriate fallback behavior
132
+ 5. Update optimization bounds if needed
133
+
134
+ KNOWN LIMITATIONS:
135
+ ==================
136
+ 1. No support for constrained optimization beyond bounds
137
+ 2. No automatic hyperparameter tuning (tolerance, max_iterations)
138
+ 3. No parallel optimization (could be added for multiple starting points)
139
+ 4. Regularization weight is global (could be parameter-specific)
140
+ 5. No adaptive optimization strategy based on problem characteristics
141
+
142
+ TODO/FUTURE IMPROVEMENTS:
143
+ =========================
144
+ 1. Add support for custom bounds per parameter
145
+ 2. Implement multi-start optimization for better global convergence
146
+ 3. Add automatic hyperparameter tuning based on problem size
147
+ 4. Consider using more advanced optimizers (e.g., differential evolution)
148
+ 5. Add optimization warm-starting from previous results
149
+ """
150
+
151
+ # Class constants for optimization bounds
152
+ # These bounds are derived from practical experience with ManGo data
153
+ _OPTIMIZATION_BOUNDS = {
154
+ 'S_MIN': 0.05, # Below this, numerical instability in distribution functions
155
+ 'S_MAX': 100.0, # Above this, diminishing returns and numerical issues
156
+ 'LB_MIN': 1e-6, # Practical lower limit for meaningful bounds
157
+ 'LB_MAX': np.exp(-1.000001), # Slightly less than e^-1 to avoid edge case
158
+ 'UB_MIN': np.exp(1.000001), # Slightly more than e^1 to avoid edge case
159
+ 'UB_MAX': 1e6, # Practical upper limit for meaningful bounds
160
+ 'REGULARIZATION_WEIGHT': 1e-6 # Prevents overfitting to noise in target_values
161
+ }
162
+
163
+ # Numerical constants for fallback behavior
164
+ _NUMERICAL_EPS = np.finfo(float).eps
165
+ _FALLBACK_VALUES = {
166
+ 'S': 1.0, # Neutral S value (no excessive sharpening/smoothing)
167
+ 'LB_FACTOR': 0.1, # Conservative lower bound estimation
168
+ 'UB_FACTOR': 10.0 # Conservative upper bound estimation
169
+ }
170
+
171
+ def __init__(self,
172
+ compute_func: Callable,
173
+ target_values: np.ndarray,
174
+ weights: np.ndarray = None,
175
+ S: Union[float, str] = 'auto',
176
+ LB: float = None,
177
+ UB: float = None,
178
+ LB_init: float = None,
179
+ UB_init: float = None,
180
+ tolerance: float = 1e-3,
181
+ opt_method: str = 'L-BFGS-B',
182
+ max_iterations: int = 10000,
183
+ regularization_weight: float = None,
184
+ verbose: bool = False,
185
+ catch_errors: bool = True):
186
+ """
187
+ Initialize the Distribution Function Optimization Engine.
188
+
189
+ DEVELOPER PARAMETERS:
190
+ =====================
191
+
192
+ compute_func : Callable
193
+ Core distribution function to optimize. Must have signature:
194
+ compute_func(s: float, lb: float, ub: float) -> Tuple[np.ndarray, Any, Any]
195
+
196
+ The function should:
197
+ - Return distribution values as first element of tuple
198
+ - Handle edge cases (s<=0, lb>=ub, etc.) gracefully
199
+ - Be numerically stable across the optimization bounds
200
+ - Execute reasonably fast (called 100s-1000s of times)
201
+
202
+ Example: lambda s, lb, ub: self._compute_egdf_core(s, lb, ub)
203
+
204
+ target_values : np.ndarray
205
+ Target distribution values to match (e.g., WEDF values, KS points).
206
+ These are typically:
207
+ - WEDF values for EGDF optimization
208
+ - Kolmogorov-Smirnov points for theoretical comparisons
209
+ - Pre-computed reference distribution values
210
+
211
+ Must be same length as the output of compute_func.
212
+
213
+ weights : np.ndarray, optional
214
+ Point-wise weights for loss computation. If None, uniform weights used.
215
+ Useful for:
216
+ - Emphasizing tail behavior (higher weights at extremes)
217
+ - Handling heteroscedastic data
218
+ - Incorporating measurement uncertainties
219
+
220
+ S : Union[float, str], default 'auto'
221
+ Sharpening parameter. Options:
222
+ - 'auto': Optimize S automatically
223
+ - float > 0: Use fixed S value
224
+
225
+ Typical values:
226
+ - S < 1: Smoothing effect
227
+ - S = 1: Neutral
228
+ - S > 1: Sharpening effect
229
+
230
+ LB, UB : float, optional
231
+ Distribution bounds. If None, will be optimized.
232
+ These should correspond to the transformed data domain.
233
+
234
+ Critical: LB must be < UB if both provided
235
+
236
+ LB_init, UB_init : float, optional
237
+ Initial guesses for bound optimization. Good initial guesses
238
+ significantly improve convergence:
239
+ - LB_init: slightly below data minimum
240
+ - UB_init: slightly above data maximum
241
+
242
+ tolerance : float, default 1e-3
243
+ Optimization convergence tolerance. Smaller values give more
244
+ precise results but take longer to converge.
245
+
246
+ Recommended values:
247
+ - 1e-2: Fast, acceptable for most applications
248
+ - 1e-3: Standard precision
249
+ - 1e-4: High precision, slower convergence
250
+
251
+ opt_method : str, default 'L-BFGS-B'
252
+ Scipy optimization method. L-BFGS-B is recommended for bounded
253
+ problems with gradients. Alternatives:
254
+ - 'TNC': Alternative bounded optimizer
255
+ - 'SLSQP': Sequential least squares (slower but robust)
256
+
257
+ max_iterations : int, default 10000
258
+ Maximum optimization iterations. Increase for difficult problems,
259
+ decrease for faster (but potentially less accurate) optimization.
260
+
261
+ regularization_weight : float, optional
262
+ Weight for L2 regularization term. Helps prevent overfitting
263
+ to noisy target values. Default is usually appropriate.
264
+
265
+ verbose : bool, default False
266
+ Enable detailed optimization logging. Useful for debugging
267
+ but can generate significant output for large optimization runs.
268
+
269
+ catch_errors : bool, default True
270
+ Whether to catch optimization errors and return fallback values.
271
+ - True: Always returns reasonable parameters (recommended for production)
272
+ - False: Raises exceptions for debugging optimization issues
273
+
274
+ INTERNAL STATE INITIALIZED:
275
+ ===========================
276
+ - optimization_results: Dict storing convergence information
277
+ - optimization_errors: List of encountered errors with full context
278
+ - optimization_warnings: List of non-critical issues
279
+ - S_opt, LB_opt, UB_opt: Optimized parameter values (None until optimize() called)
280
+ """
281
+
282
+ # Validate inputs
283
+ self._validate_inputs(compute_func, target_values, weights, S, LB, UB)
284
+
285
+ # Store parameters
286
+ self.compute_func = compute_func
287
+ self.target_values = np.asarray(target_values)
288
+ self.weights = weights if weights is not None else np.ones_like(self.target_values)
289
+ self.S = S
290
+ self.LB = LB
291
+ self.UB = UB
292
+ self.LB_init = LB_init
293
+ self.UB_init = UB_init
294
+ self.tolerance = tolerance
295
+ self.opt_method = opt_method
296
+ self.max_iterations = max_iterations
297
+ self.regularization_weight = (regularization_weight if regularization_weight is not None
298
+ else self._OPTIMIZATION_BOUNDS['REGULARIZATION_WEIGHT'])
299
+ self.verbose = verbose
300
+ self.catch_errors = catch_errors
301
+
302
+ # Results storage
303
+ self.optimization_results = {}
304
+ self.optimization_errors = []
305
+ self.optimization_warnings = []
306
+
307
+ # Optimized parameters
308
+ self.S_opt = None
309
+ self.LB_opt = None
310
+ self.UB_opt = None
311
+
312
+ # logger
313
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
314
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
315
+
316
+ def _validate_inputs(self, compute_func, target_values, weights, S, LB, UB):
317
+ """Validate initialization inputs."""
318
+ if not callable(compute_func):
319
+ self.logger.error("compute_func must be callable")
320
+ raise TypeError("compute_func must be callable")
321
+
322
+ if not isinstance(target_values, np.ndarray):
323
+ self.logger.debug("Converting target_values to numpy array.")
324
+ target_values = np.asarray(target_values)
325
+
326
+ if target_values.size == 0:
327
+ self.logger.error("target_values cannot be empty")
328
+ raise ValueError("target_values cannot be empty")
329
+
330
+ if not np.isfinite(target_values).all():
331
+ self.logger.error("target_values must contain only finite values")
332
+ raise ValueError("target_values must contain only finite values")
333
+
334
+ if weights is not None:
335
+ weights = np.asarray(weights)
336
+ if len(weights) != len(target_values):
337
+ self.logger.error("weights must have same length as target_values")
338
+ raise ValueError("weights must have same length as target_values")
339
+ if not np.all(weights >= 0):
340
+ self.logger.error("weights must be non-negative")
341
+ raise ValueError("weights must be non-negative")
342
+
343
+ # Validate S parameter
344
+ if isinstance(S, str) and S.lower() != 'auto':
345
+ self.logger.error("S must be a positive number or 'auto'")
346
+ raise ValueError("S must be a positive number or 'auto'")
347
+ elif isinstance(S, (int, float)) and S <= 0:
348
+ self.logger.error("S must be positive when specified as a number")
349
+ raise ValueError("S must be positive when specified as a number")
350
+
351
+ # Validate bounds
352
+ if LB is not None and UB is not None and LB >= UB:
353
+ self.logger.error("LB must be less than UB when both are provided")
354
+ raise ValueError("LB must be less than UB when both are provided")
355
+
356
+ def optimize(self) -> Dict[str, float]:
357
+ """
358
+ Main optimization method. Determines strategy and optimizes parameters.
359
+
360
+ Returns:
361
+ --------
362
+ Dict[str, float]
363
+ Optimized parameters {'S': value, 'LB': value, 'UB': value}
364
+ """
365
+ self.logger.debug("Starting parameter optimization...")
366
+
367
+ try:
368
+ # Determine optimization strategy
369
+ strategy = self._determine_optimization_strategy()
370
+
371
+ self.logger.debug(f"Using optimization strategy: {strategy}")
372
+
373
+ # Execute optimization based on strategy
374
+ if strategy == 'optimize_all':
375
+ self.S_opt, self.LB_opt, self.UB_opt = self._optimize_all_parameters()
376
+ elif strategy == 'optimize_s_only':
377
+ self.S_opt = self._optimize_s_parameter(self.LB_init, self.UB_init)
378
+ self.LB_opt = self.LB_init
379
+ self.UB_opt = self.UB_init
380
+ elif strategy == 'optimize_bounds_only':
381
+ _, self.LB_opt, self.UB_opt = self._optimize_bounds_parameters(self.S)
382
+ self.S_opt = self.S
383
+ else: # use_provided
384
+ self.S_opt = self.S if isinstance(self.S, (int, float)) else self._FALLBACK_VALUES['S']
385
+ self.LB_opt = self.LB_init
386
+ self.UB_opt = self.UB_init
387
+
388
+ # Store results
389
+ results = {
390
+ 'S': float(self.S_opt),
391
+ 'LB': float(self.LB_opt),
392
+ 'UB': float(self.UB_opt)
393
+ }
394
+
395
+ self.optimization_results.update(results)
396
+ self.optimization_results['strategy_used'] = strategy
397
+
398
+ self.logger.info("Optimization completed successfully")
399
+ self.logger.debug(f"Results - S: {self.S_opt:.6f}, LB: {self.LB_opt:.6f}, UB: {self.UB_opt:.6f}")
400
+
401
+ return results
402
+
403
+ except Exception as e:
404
+ error_msg = f"DistFuncEngine optimization failed: {str(e)}"
405
+ self.logger.error(error_msg)
406
+ self.optimization_errors.append({
407
+ 'method': 'optimize',
408
+ 'error': error_msg,
409
+ 'exception_type': type(e).__name__
410
+ })
411
+
412
+ if self.catch_errors:
413
+ if self.verbose:
414
+ self.logger.debug(f"{error_msg}")
415
+ self.logger.debug("Using fallback values")
416
+ return self._get_fallback_results()
417
+ else:
418
+ raise
419
+
420
+ def _determine_optimization_strategy(self) -> str:
421
+ """Determine which optimization strategy to use."""
422
+ self.logger.info("Determining optimization strategy...")
423
+ s_is_auto = isinstance(self.S, str) and self.S.lower() == 'auto'
424
+ lb_provided = self.LB is not None
425
+ ub_provided = self.UB is not None
426
+
427
+ if s_is_auto and not lb_provided and not ub_provided:
428
+ return 'optimize_all'
429
+ elif lb_provided and ub_provided and s_is_auto:
430
+ # Set initial values from provided bounds
431
+ self.LB_init = self.LB_init
432
+ self.UB_init = self.UB_init
433
+ return 'optimize_s_only'
434
+ elif not s_is_auto and (not lb_provided or not ub_provided):
435
+ return 'optimize_bounds_only'
436
+ else:
437
+ # All parameters provided
438
+ self.LB_init = self.LB_init
439
+ self.UB_init = self.UB_init
440
+ return 'use_provided'
441
+
442
+ def _optimize_all_parameters(self) -> Tuple[float, float, float]:
443
+ """Optimize all parameters (S, LB, UB) using normalized parameter space."""
444
+ self.logger.info("Optimizing all parameters (S, LB, UB)...")
445
+
446
+ bounds = self._OPTIMIZATION_BOUNDS
447
+
448
+ def normalize_params(s, lb, ub):
449
+ s_norm = (s - bounds['S_MIN']) / (bounds['S_MAX'] - bounds['S_MIN'])
450
+ lb_norm = (lb - bounds['LB_MIN']) / (bounds['LB_MAX'] - bounds['LB_MIN'])
451
+ ub_norm = (ub - bounds['UB_MIN']) / (bounds['UB_MAX'] - bounds['UB_MIN'])
452
+ return np.array([s_norm, lb_norm, ub_norm])
453
+
454
+ def denormalize_params(norm_params):
455
+ s_norm, lb_norm, ub_norm = norm_params
456
+ s = bounds['S_MIN'] + s_norm * (bounds['S_MAX'] - bounds['S_MIN'])
457
+ lb = bounds['LB_MIN'] + lb_norm * (bounds['LB_MAX'] - bounds['LB_MIN'])
458
+ ub = bounds['UB_MIN'] + ub_norm * (bounds['UB_MAX'] - bounds['UB_MIN'])
459
+ return s, lb, ub
460
+
461
+ def objective_function(norm_params):
462
+ try:
463
+ s, lb, ub = denormalize_params(norm_params)
464
+
465
+ # Check parameter validity
466
+ if s <= 0 or ub <= lb:
467
+ return 1e6
468
+
469
+ # Compute distribution values
470
+ dist_values, _, _ = self.compute_func(s, lb, ub)
471
+
472
+ # Calculate loss
473
+ diff = np.mean(np.abs(dist_values - self.target_values) * self.weights)
474
+
475
+ # Add regularization
476
+ regularization = np.sum(norm_params)
477
+ total_loss = diff + regularization
478
+
479
+ if self.verbose and hasattr(self, '_opt_iteration'):
480
+ self._opt_iteration += 1
481
+ if self._opt_iteration % 50 == 0:
482
+ self.logger.debug(f" Iteration {self._opt_iteration}: Loss={diff:.6f}, Total={total_loss:.6f}, "
483
+ f"S={s:.3f}, LB={lb:.6f}, UB={ub:.3f}")
484
+
485
+ return total_loss
486
+
487
+ except Exception as e:
488
+ error_msg = f"Objective function failed: {str(e)}"
489
+ self.logger.error(error_msg)
490
+ self.optimization_errors.append({
491
+ 'method': '_optimize_all_parameters.objective_function',
492
+ 'error': error_msg,
493
+ 'exception_type': type(e).__name__,
494
+ 'parameters': norm_params.tolist() if hasattr(norm_params, 'tolist') else list(norm_params)
495
+ })
496
+ return 1e6
497
+
498
+ # Set initial values
499
+ s_init = 1
500
+ lb_init = self.LB_init if self.LB_init is not None else bounds['LB_MIN'] * 10
501
+ ub_init = self.UB_init if self.UB_init is not None else bounds['UB_MIN'] * 10
502
+
503
+ # Ensure valid initial bounds
504
+ if lb_init >= ub_init:
505
+ lb_init = bounds['LB_MIN'] * 10
506
+ ub_init = bounds['UB_MIN'] * 10
507
+
508
+ initial_params = normalize_params(s_init, lb_init, ub_init)
509
+ norm_bounds = [(0.0, 1.0), (0.0, 1.0), (0.0, 1.0)]
510
+
511
+ try:
512
+ if self.verbose:
513
+ self._opt_iteration = 0
514
+
515
+ result = minimize(
516
+ objective_function,
517
+ initial_params,
518
+ method=self.opt_method,
519
+ bounds=norm_bounds,
520
+ options={'maxiter': self.max_iterations, 'ftol': self.tolerance},
521
+ tol=self.tolerance
522
+ )
523
+
524
+ s_opt, lb_opt, ub_opt = denormalize_params(result.x)
525
+
526
+ # Validate results
527
+ if lb_opt >= ub_opt or s_opt <= 0:
528
+ self.logger.warning("Invalid optimized parameters, using fallback")
529
+ return self._get_fallback_parameters()
530
+
531
+ # Store optimization info
532
+ self.optimization_results['all_params_optimization'] = {
533
+ 'success': result.success,
534
+ 'fun': float(result.fun),
535
+ 'nit': int(result.nit),
536
+ 'message': result.message
537
+ }
538
+
539
+ return s_opt, lb_opt, ub_opt
540
+
541
+ except Exception as e:
542
+ error_msg = f"All parameters optimization failed: {str(e)}"
543
+ self.logger.error(error_msg)
544
+ self.optimization_errors.append({
545
+ 'method': '_optimize_all_parameters',
546
+ 'error': error_msg,
547
+ 'exception_type': type(e).__name__
548
+ })
549
+
550
+ return self._get_fallback_parameters()
551
+
552
+ def _optimize_s_parameter(self, lb: float, ub: float) -> float:
553
+ """Optimize only S parameter with fixed bounds."""
554
+ self.logger.info("Optimizing S parameter...")
555
+
556
+ def objective_function(s_array):
557
+ try:
558
+ s = s_array[0]
559
+ dist_values, _, _ = self.compute_func(s, lb, ub)
560
+ diff = np.mean(np.abs(dist_values - self.target_values) * self.weights)
561
+
562
+ self.logger.debug(f" S optimization - Loss: {diff:.6f}, S: {s:.3f}")
563
+
564
+ return diff
565
+
566
+ except Exception as e:
567
+ error_msg = f"S optimization objective failed: {str(e)}"
568
+ self.logger.error(error_msg)
569
+ self.optimization_errors.append({
570
+ 'method': '_optimize_s_parameter.objective_function',
571
+ 'error': error_msg,
572
+ 'exception_type': type(e).__name__
573
+ })
574
+ return 1e6
575
+
576
+ bounds = self._OPTIMIZATION_BOUNDS
577
+ s_bounds = [(bounds['S_MIN'], bounds['S_MAX'])]
578
+
579
+ try:
580
+ result = minimize(
581
+ objective_function,
582
+ [1.0], # Initial S value
583
+ bounds=s_bounds,
584
+ method=self.opt_method,
585
+ options={'maxiter': 1000, 'ftol': self.tolerance}
586
+ )
587
+
588
+ # Store optimization info
589
+ self.optimization_results['s_optimization'] = {
590
+ 'success': result.success,
591
+ 'fun': float(result.fun),
592
+ 'nit': int(result.nit),
593
+ 'message': result.message
594
+ }
595
+
596
+ return result.x[0]
597
+
598
+ except Exception as e:
599
+ error_msg = f"S parameter optimization failed: {str(e)}"
600
+ self.logger.error(error_msg)
601
+ self.optimization_errors.append({
602
+ 'method': '_optimize_s_parameter',
603
+ 'error': error_msg,
604
+ 'exception_type': type(e).__name__
605
+ })
606
+
607
+
608
+ return self._FALLBACK_VALUES['S']
609
+
610
+ def _optimize_bounds_parameters(self, s: float) -> Tuple[float, float, float]:
611
+ """Optimize LB and UB parameters with fixed S."""
612
+ self.logger.info("Optimizing LB and UB parameters...")
613
+
614
+ bounds = self._OPTIMIZATION_BOUNDS
615
+
616
+ def normalize_bounds(lb, ub):
617
+ lb_norm = (lb - bounds['LB_MIN']) / (bounds['LB_MAX'] - bounds['LB_MIN'])
618
+ ub_norm = (ub - bounds['UB_MIN']) / (bounds['UB_MAX'] - bounds['UB_MIN'])
619
+ return np.array([lb_norm, ub_norm])
620
+
621
+ def denormalize_bounds(norm_params):
622
+ lb_norm, ub_norm = norm_params
623
+ lb = bounds['LB_MIN'] + lb_norm * (bounds['LB_MAX'] - bounds['LB_MIN'])
624
+ ub = bounds['UB_MIN'] + ub_norm * (bounds['UB_MAX'] - bounds['UB_MIN'])
625
+ return lb, ub
626
+
627
+ def objective_function(norm_params):
628
+ try:
629
+ lb, ub = denormalize_bounds(norm_params)
630
+
631
+ if lb <= 0 or ub <= lb:
632
+ return 1e6
633
+
634
+ dist_values, _, _ = self.compute_func(s, lb, ub)
635
+ diff = np.mean(np.abs(dist_values - self.target_values) * self.weights)
636
+
637
+ # Add regularization
638
+ regularization = np.sum(norm_params**2)
639
+ total_loss = diff + regularization
640
+
641
+ # print only 50th iteration
642
+ if self.verbose and hasattr(self, '_opt_iteration'):
643
+ self._opt_iteration += 1
644
+ if self._opt_iteration % 50 == 0:
645
+ self.logger.debug(f" Iteration {self._opt_iteration}: Loss={diff:.6f}, Total={total_loss:.6f}, "
646
+ f"LB={lb:.6f}, UB={ub:.3f}")
647
+
648
+ return total_loss
649
+
650
+ except Exception as e:
651
+ error_msg = f"Bounds optimization objective failed: {str(e)}"
652
+ self.optimization_errors.append({
653
+ 'method': '_optimize_bounds_parameters.objective_function',
654
+ 'error': error_msg,
655
+ 'exception_type': type(e).__name__
656
+ })
657
+ return 1e6
658
+
659
+ # Set initial values
660
+ lb_init = self.LB_init if self.LB_init is not None else bounds['LB_MIN'] * 10
661
+ ub_init = self.UB_init if self.UB_init is not None else bounds['UB_MIN'] * 10
662
+
663
+ # Ensure valid bounds
664
+ lb_init = np.clip(lb_init, bounds['LB_MIN'], bounds['LB_MAX'])
665
+ ub_init = np.clip(ub_init, bounds['UB_MIN'], bounds['UB_MAX'])
666
+
667
+ if lb_init >= ub_init:
668
+ lb_init = bounds['LB_MIN'] * 10
669
+ ub_init = bounds['UB_MIN'] * 10
670
+
671
+ initial_params = normalize_bounds(lb_init, ub_init)
672
+ norm_bounds = [(0.0, 1.0), (0.0, 1.0)]
673
+
674
+ try:
675
+ result = minimize(
676
+ objective_function,
677
+ initial_params,
678
+ method=self.opt_method,
679
+ bounds=norm_bounds,
680
+ options={'maxiter': self.max_iterations, 'ftol': self.tolerance},
681
+ tol=self.tolerance
682
+ )
683
+
684
+ lb_opt, ub_opt = denormalize_bounds(result.x)
685
+
686
+ # Validate results
687
+ if lb_opt >= ub_opt:
688
+ if self.verbose:
689
+ self.logger.warning("Warning - Invalid optimized bounds, using initial values")
690
+ return s, lb_init, ub_init
691
+
692
+ # Store optimization info
693
+ self.optimization_results['bounds_optimization'] = {
694
+ 'success': result.success,
695
+ 'fun': float(result.fun),
696
+ 'nit': int(result.nit),
697
+ 'message': result.message
698
+ }
699
+
700
+ return s, lb_opt, ub_opt
701
+
702
+ except Exception as e:
703
+ error_msg = f"Bounds optimization failed: {str(e)}"
704
+ self.logger.error(error_msg)
705
+ self.optimization_errors.append({
706
+ 'method': '_optimize_bounds_parameters',
707
+ 'error': error_msg,
708
+ 'exception_type': type(e).__name__
709
+ })
710
+
711
+ return s, lb_init, ub_init
712
+
713
+ def _get_fallback_parameters(self) -> Tuple[float, float, float]:
714
+ """Get fallback parameters when optimization fails."""
715
+ self.logger.info("Using fallback parameters...")
716
+
717
+ s_fallback = self._FALLBACK_VALUES['S']
718
+ lb_fallback = self._estimate_fallback_lb()
719
+ ub_fallback = self._estimate_fallback_ub()
720
+
721
+ if self.verbose:
722
+ self.logger.info(f"Using fallback parameters - S: {s_fallback}, "
723
+ f"LB: {lb_fallback}, UB: {ub_fallback}")
724
+
725
+ return s_fallback, lb_fallback, ub_fallback
726
+
727
+ def _get_fallback_results(self) -> Dict[str, float]:
728
+ """Get fallback results dictionary."""
729
+ self.logger.info("Getting fallback results...")
730
+
731
+ s_fallback, lb_fallback, ub_fallback = self._get_fallback_parameters()
732
+
733
+ self.S_opt = s_fallback
734
+ self.LB_opt = lb_fallback
735
+ self.UB_opt = ub_fallback
736
+
737
+ return {
738
+ 'S': float(s_fallback),
739
+ 'LB': float(lb_fallback),
740
+ 'UB': float(ub_fallback)
741
+ }
742
+
743
+ def _estimate_fallback_lb(self) -> float:
744
+ """Estimate fallback LB value."""
745
+ self.logger.info("Estimating fallback LB...")
746
+
747
+ bounds = self._OPTIMIZATION_BOUNDS
748
+ if self.LB_init is not None:
749
+ return max(self.LB_init, bounds['LB_MIN'])
750
+ else:
751
+ return bounds['LB_MIN'] * self._FALLBACK_VALUES['LB_FACTOR']
752
+
753
+ def _estimate_fallback_ub(self) -> float:
754
+ """Estimate fallback UB value."""
755
+ self.logger.info("Estimating fallback UB...")
756
+
757
+ bounds = self._OPTIMIZATION_BOUNDS
758
+ if self.UB_init is not None:
759
+ return min(self.UB_init, bounds['UB_MAX'])
760
+ else:
761
+ return bounds['UB_MIN'] * self._FALLBACK_VALUES['UB_FACTOR']
762
+
763
+ def get_optimization_info(self) -> Dict[str, Any]:
764
+ """
765
+ Get detailed optimization information.
766
+
767
+ Returns:
768
+ --------
769
+ Dict[str, Any]
770
+ Optimization results, errors, and warnings
771
+ """
772
+ self.logger.info("Getting optimization info...")
773
+
774
+ return {
775
+ 'results': self.optimization_results.copy(),
776
+ 'errors': self.optimization_errors.copy(),
777
+ 'warnings': self.optimization_warnings.copy(),
778
+ 'optimized_parameters': {
779
+ 'S': self.S_opt,
780
+ 'LB': self.LB_opt,
781
+ 'UB': self.UB_opt
782
+ } if self.S_opt is not None else None
783
+ }
784
+
785
+ def evaluate_with_optimized_parameters(self) -> Tuple[np.ndarray, Any, Any]:
786
+ """
787
+ Evaluate the compute function with optimized parameters.
788
+
789
+ Returns:
790
+ --------
791
+ Tuple containing the results of compute_func(S_opt, LB_opt, UB_opt)
792
+ """
793
+ self.logger.info("Evaluating with optimized parameters...")
794
+ if self.S_opt is None or self.LB_opt is None or self.UB_opt is None:
795
+ self.logger.error("Parameters must be optimized before evaluation. Call optimize() first.")
796
+ raise ValueError("Parameters must be optimized before evaluation. Call optimize() first.")
797
+
798
+ return self.compute_func(self.S_opt, self.LB_opt, self.UB_opt)
799
+
800
+ def compute_final_loss(self) -> float:
801
+ """
802
+ Compute final loss with optimized parameters.
803
+
804
+ Returns:
805
+ --------
806
+ float
807
+ Final weighted mean absolute error
808
+ """
809
+ self.logger.info("Computing final loss with optimized parameters...")
810
+ try:
811
+ dist_values, _, _ = self.evaluate_with_optimized_parameters()
812
+ loss = np.mean(np.abs(dist_values - self.target_values) * self.weights)
813
+ return float(loss)
814
+ except Exception as e:
815
+ error_msg = f"Final loss computation failed: {str(e)}"
816
+ self.logger.error(error_msg)
817
+ self.optimization_errors.append({
818
+ 'method': 'compute_final_loss',
819
+ 'error': error_msg,
820
+ 'exception_type': type(e).__name__
821
+ })
822
+ return float('inf')
823
+
824
+ def reset(self):
825
+ """Reset optimization state for reuse."""
826
+ self.logger.info("Resetting engine state...")
827
+ self.S_opt = None
828
+ self.LB_opt = None
829
+ self.UB_opt = None
830
+ self.optimization_results.clear()
831
+ self.optimization_errors.clear()
832
+ self.optimization_warnings.clear()
833
+
834
+ if self.verbose:
835
+ self.logger.info("State reset successfully")
836
+
837
+ def __repr__(self) -> str:
838
+ """String representation of the engine."""
839
+ self.logger.info("Getting string representation...")
840
+ status = "optimized" if self.S_opt is not None else "not optimized"
841
+ return f"DistFuncEngine(target_values={len(self.target_values)}, status={status})"