machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,975 @@
1
+ """
2
+ DataCluster: Advanced Cluster Boundary Detection for Gnostic Distribution Functions (GDFs)
3
+
4
+ The DataCluster class identifies main cluster boundaries (CLB and CUB) from probability density functions of four types of Gnostic Distribution Functions: ELDF, EGDF, QLDF, and QGDF.
5
+
6
+ Author: Nirmal Parmar
7
+ Machine Gnostics
8
+ """
9
+
10
+ import numpy as np
11
+ import warnings
12
+ import logging
13
+ from machinegnostics.magcal.util.logging import get_logger
14
+ import matplotlib.pyplot as plt
15
+ from scipy.ndimage import gaussian_filter1d
16
+ from scipy.signal import find_peaks, argrelextrema
17
+ from typing import Union, Dict, Any, Optional, Tuple, List
18
+
19
+ class DataCluster:
20
+ """
21
+ Advanced cluster boundary detection for Gnostic Distribution Functions (GDFs).
22
+
23
+ The DataCluster class identifies main cluster boundaries (CLB and CUB) from probability
24
+ density functions of four types of Gnostic Distribution Functions: ELDF, EGDF, QLDF, and QGDF.
25
+ It uses normalized PDF analysis with derivative-based methods and shape detection algorithms
26
+ to precisely locate cluster boundaries.
27
+
28
+ Clustering Performance by GDF Type:
29
+ - **Local Functions (ELDF, QLDF)**: Excellent clustering performance due to unlimited
30
+ flexibility controlled by scale parameter
31
+ - **Global Functions (EGDF, QGDF)**: Limited clustering effectiveness due to constrained
32
+ flexibility and uniqueness assumptions
33
+
34
+ Key Features:
35
+ - PDF normalization for consistent analysis across all GDF types
36
+ - QLDF W-shape vs U-shape detection for accurate valley boundary identification
37
+ - Derivative-based boundary detection with adaptive thresholds
38
+ - Multiple fallback methods for robust cluster identification
39
+ - Comprehensive error handling and validation
40
+
41
+ Parameters
42
+ ----------
43
+ gdf : ELDF, EGDF, QLDF, or QGDF
44
+ A fitted Gnostic Distribution Function object with pdf_points available.
45
+ Must have been fitted with catch=True to ensure pdf_points are stored.
46
+ verbose : bool, default=False
47
+ Enable detailed progress reporting and diagnostic output.
48
+ catch : bool, default=True
49
+ Enable error catching and graceful degradation (inherited from GDF conventions).
50
+ derivative_threshold : float, default=0.01
51
+ Threshold for ELDF/EGDF boundary detection. Points where (PDF + 1st_derivative)
52
+ falls below this threshold are considered boundary candidates.
53
+ slope_percentile : int, default=70
54
+ Percentile threshold for QLDF/QGDF slope-based boundary detection. Higher values
55
+ create more conservative (narrower) cluster boundaries.
56
+
57
+ Attributes
58
+ ----------
59
+ CLB : float or None
60
+ Cluster Lower Boundary - left boundary of the main cluster
61
+ CUB : float or None
62
+ Cluster Upper Boundary - right boundary of the main cluster
63
+ z0 : float or None
64
+ Characteristic point of the distribution (from GDF object)
65
+ S_opt : float or None
66
+ Optimal scale parameter (from GDF object)
67
+ pdf_normalized : ndarray or None
68
+ Min-max normalized PDF values [0,1] used for analysis
69
+ pdf_original : ndarray or None
70
+ Original PDF values before normalization
71
+ params : dict
72
+ Complete analysis results including boundaries, methods used, and diagnostics
73
+
74
+ Methods
75
+ -------
76
+ fit()
77
+ Perform cluster boundary detection analysis
78
+ plot(figsize=(12, 8))
79
+ Visualize PDF, boundaries, and derivative analysis
80
+ results()
81
+ Return comprehensive analysis results dictionary
82
+
83
+ Algorithm Details
84
+ ----------------
85
+ **ELDF/EGDF (Estimating Distribution Functions):**
86
+ - PDF has global maximum at z0 (characteristic point)
87
+ - Boundaries found where (PDF + 1st_derivative) ≤ derivative_threshold
88
+ - Main cluster region is BETWEEN CLB and CUB (shaded green)
89
+ - Works best with local ELDF due to flexible scale parameter control
90
+
91
+ **QLDF (Quantifying Local Distribution Function):**
92
+ - **W-shape detection**: Identifies peaks between boundary extremes
93
+ - 1 internal peak → W-shape → Find valley minima as boundaries
94
+ - 0 internal peaks → U-shape → Use slope transition method
95
+ - 2+ internal peaks → Heterogeneous data warning
96
+ - **Valley detection**: Uses scipy.signal.argrelextrema for precise minima
97
+ - Main cluster region is OUTSIDE CLB and CUB boundaries (shaded green)
98
+
99
+ **QGDF (Quantifying Global Distribution Function):**
100
+ - Uses slope transition detection with percentile-based thresholds
101
+ - Limited effectiveness due to global function constraints
102
+ - Fallback to curvature analysis when slope detection fails
103
+ - Main cluster region is OUTSIDE CLB and CUB boundaries (shaded green)
104
+
105
+ Normalization Strategy
106
+ ---------------------
107
+ All PDFs are normalized to [0,1] range using min-max normalization:
108
+ - Ensures consistent threshold application across different GDF types
109
+ - Enables robust derivative analysis regardless of original PDF scale
110
+ - Maintains relative shape characteristics while standardizing magnitude
111
+
112
+ Error Handling
113
+ -------------
114
+ - Validates GDF object fitness and required attributes
115
+ - Warns when using global functions (EGDF/QGDF) for clustering
116
+ - Provides fallback to data bounds when boundary detection fails
117
+ - Comprehensive error logging with method traceability
118
+
119
+ Examples
120
+ --------
121
+ >>> # Basic usage with QLDF
122
+ >>> from machinegnostics.magcal import QLDF
123
+ >>> from machinegnostics.magcal import DataCluster
124
+ >>>
125
+ >>> # Fit QLDF first
126
+ >>> qldf = QLDF(data=your_data, catch=True)
127
+ >>> qldf.fit()
128
+ >>>
129
+ >>> # Perform cluster analysis
130
+ >>> cluster = DataCluster(gdf=qldf, verbose=True)
131
+ >>> cluster.fit()
132
+ >>> cluster.plot()
133
+ >>>
134
+ >>> # Get results
135
+ >>> results = cluster.results()
136
+ >>> print(f"CLB: {results['LCB']}, CUB: {results['UCB']}")
137
+ >>> print(f"Cluster width: {results['cluster_width']}")
138
+ >>> print(f"PDF shape: {results['pdf_shape']}") # For QLDF
139
+
140
+ >>> # Advanced usage with custom thresholds
141
+ >>> cluster = DataCluster(
142
+ ... gdf=eldf,
143
+ ... derivative_threshold=0.005, # More sensitive
144
+ ... slope_percentile=80, # More conservative
145
+ ... verbose=True
146
+ ... )
147
+ >>> cluster.fit()
148
+
149
+ Notes
150
+ -----
151
+ - Clustering works best with local distribution functions (ELDF, QLDF)
152
+ - Global functions (EGDF, QGDF) have limited clustering effectiveness due to
153
+ their uniqueness constraints and automatic parameter optimization
154
+ - QLDF W-shape detection is particularly effective for data with central clusters
155
+ between outlying regions
156
+ - For heterogeneous data with multiple clusters, consider data splitting before analysis
157
+
158
+ References
159
+ ----------
160
+ Based on Gnostic Distribution Function theory and cluster analysis methods
161
+ as described in mathematical gnostics literature.
162
+ """
163
+ def __init__(self, gdf, verbose=False, catch=True, derivative_threshold=0.01, slope_percentile=70):
164
+ """
165
+ Initialize DataCluster for boundary detection analysis.
166
+
167
+ Parameters
168
+ ----------
169
+ gdf : ELDF, EGDF, QLDF, or QGDF
170
+ A fitted Gnostic Distribution Function object. Must have pdf_points
171
+ available (fitted with catch=True).
172
+ verbose : bool, default=False
173
+ Enable detailed progress reporting and diagnostic messages.
174
+ catch : bool, default=True
175
+ Enable error catching and graceful degradation.
176
+ derivative_threshold : float, default=0.01
177
+ Threshold for ELDF/EGDF boundary detection. Lower values create
178
+ wider cluster boundaries, higher values create narrower boundaries.
179
+ slope_percentile : int, default=70
180
+ Percentile threshold (0-100) for QLDF/QGDF slope detection.
181
+ Higher values create more conservative cluster boundaries.
182
+
183
+ Raises
184
+ ------
185
+ ValueError
186
+ If GDF object is not fitted or missing required attributes.
187
+ AttributeError
188
+ If GDF object is missing pdf_points (ensure catch=True during fitting).
189
+ """
190
+ self.gdf = gdf
191
+ self.gdf_type = gdf.__class__.__name__.lower()
192
+ self.verbose = verbose
193
+ self.catch = catch
194
+ self.derivative_threshold = derivative_threshold
195
+ self.slope_percentile = slope_percentile
196
+
197
+ self.params = {
198
+ 'gdf_type': self.gdf_type,
199
+ 'derivative_threshold': self.derivative_threshold,
200
+ 'slope_percentile': self.slope_percentile,
201
+ 'LCB': None,
202
+ 'UCB': None,
203
+ 'Z0': None,
204
+ 'S_opt': None,
205
+ 'cluster_width': None,
206
+ 'clustering_successful': False,
207
+ 'method_used': None,
208
+ 'normalization_method': None,
209
+ 'pdf_shape': None,
210
+ 'errors': [],
211
+ 'warnings': []
212
+ }
213
+
214
+ self.LCB = None
215
+ self.UCB = None
216
+ self.z0 = None
217
+ self.S_opt = None
218
+ self._fitted = False
219
+
220
+ self.pdf_normalized = None
221
+ self.pdf_original = None
222
+
223
+ # logger setup
224
+ self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
225
+ self.logger.debug(f"{self.__class__.__name__} initialized:")
226
+
227
+ # validation
228
+ try:
229
+ self._validate_gdf()
230
+ self._validate_gdf_type_for_clustering()
231
+ except Exception as e:
232
+ self._append_error(f"GDF validation failed: {str(e)}", type(e).__name__)
233
+
234
+ def _validate_gdf(self):
235
+ self.logger.info("Validating GDF object for clustering analysis.")
236
+ if not hasattr(self.gdf, '_fitted') or not self.gdf._fitted:
237
+ self.logger.error("GDF object must be fitted before cluster analysis.")
238
+ raise ValueError("GDF object must be fitted before cluster analysis")
239
+
240
+ if not hasattr(self.gdf, 'pdf_points') or self.gdf.pdf_points is None:
241
+ self.logger.error("GDF object missing pdf_points. Ensure catch=True during fitting.")
242
+ raise AttributeError("GDF object missing pdf_points. Ensure catch=True during fitting.")
243
+
244
+ if not hasattr(self.gdf, 'data'):
245
+ self.logger.error("GDF object missing data attribute.")
246
+ raise ValueError("GDF object missing data attribute.")
247
+
248
+ def _validate_gdf_type_for_clustering(self):
249
+ self.logger.info("Validating GDF type for clustering suitability.")
250
+
251
+ if self.gdf_type in ['egdf', 'qgdf']:
252
+ gdf_full_name = 'EGDF' if self.gdf_type == 'egdf' else 'QGDF'
253
+ local_alternative = 'ELDF' if self.gdf_type == 'egdf' else 'QLDF'
254
+
255
+ warning_msg = (
256
+ f"Using {gdf_full_name} (Global Distribution Function) for clustering analysis. "
257
+ f"Clustering may not be as effective with global functions. "
258
+ f"Consider using {local_alternative} (Local Distribution Function) for better clustering results."
259
+ )
260
+
261
+ self._append_warning(warning_msg)
262
+
263
+ def _append_error(self, error_message, exception_type=None):
264
+ error_entry = {
265
+ 'method': 'DataCluster',
266
+ 'error': error_message,
267
+ 'exception_type': exception_type or 'DataClusterError'
268
+ }
269
+
270
+ self.params['errors'].append(error_entry)
271
+
272
+ if hasattr(self.gdf, 'params') and 'errors' in self.gdf.params:
273
+ self.gdf.params['errors'].append(error_entry)
274
+ elif hasattr(self.gdf, 'params'):
275
+ self.gdf.params['errors'] = [error_entry]
276
+
277
+ self.logger.error(f" Error: {error_message} ({exception_type})")
278
+
279
+ def _append_warning(self, warning_message):
280
+ warning_entry = {
281
+ 'method': 'DataCluster',
282
+ 'warning': warning_message
283
+ }
284
+
285
+ self.params['warnings'].append(warning_entry)
286
+
287
+ if hasattr(self.gdf, 'params') and 'warnings' in self.gdf.params:
288
+ self.gdf.params['warnings'].append(warning_entry)
289
+ elif hasattr(self.gdf, 'params'):
290
+ self.gdf.params['warnings'] = [warning_entry]
291
+
292
+ self.logger.warning(f" Warning: {warning_message}")
293
+
294
+ def _get_pdf_data(self):
295
+ self.logger.info("Retrieving PDF data from GDF object.")
296
+ return self.gdf.pdf_points
297
+
298
+ def _get_data_points(self):
299
+ self.logger.info("Retrieving data points from GDF object.")
300
+ if hasattr(self.gdf, 'di_points_n') and self.gdf.di_points_n is not None:
301
+ return self.gdf.di_points_n
302
+ elif hasattr(self.gdf, 'di_points') and self.gdf.di_points is not None:
303
+ return self.gdf.di_points
304
+ elif hasattr(self.gdf, 'params') and 'di_points' in self.gdf.params:
305
+ return self.gdf.params['di_points']
306
+ else:
307
+ self.logger.error("Cannot find data points in GDF object")
308
+ raise AttributeError("Cannot find data points in GDF object")
309
+
310
+ def _normalize_pdf(self, pdf_data):
311
+ self.logger.info("Normalizing PDF data.")
312
+ self.pdf_original = pdf_data.copy()
313
+
314
+ pdf_min = np.min(pdf_data)
315
+ pdf_max = np.max(pdf_data)
316
+
317
+ if pdf_max == pdf_min:
318
+ normalized_pdf = np.ones_like(pdf_data) * 0.5
319
+ self.params['normalization_method'] = 'constant_pdf'
320
+ else:
321
+ normalized_pdf = (pdf_data - pdf_min) / (pdf_max - pdf_min)
322
+ self.params['normalization_method'] = 'min_max_normalization'
323
+
324
+ self.logger.info(f"PDF normalization: {self.params['normalization_method']}")
325
+ self.logger.info(f"Normalized PDF range: [{np.min(normalized_pdf):.3f}, {np.max(normalized_pdf):.3f}]")
326
+
327
+ return normalized_pdf
328
+
329
+ def _get_z0(self):
330
+ self.logger.info("Retrieving Z0 from GDF object.")
331
+
332
+ if hasattr(self.gdf, 'z0') and self.gdf.z0 is not None:
333
+ return self.gdf.z0
334
+ elif hasattr(self.gdf, 'params') and 'z0' in self.gdf.params:
335
+ return self.gdf.params['z0']
336
+ else:
337
+ self._append_warning("Z0 not found in GDF object. Using PDF global extremum as Z0.")
338
+ return self._find_pdf_z0()
339
+
340
+ def _get_s_opt(self):
341
+ self.logger.info("Retrieving S_opt from GDF object.")
342
+
343
+ if hasattr(self.gdf, 'S_opt') and self.gdf.S_opt is not None:
344
+ return self.gdf.S_opt
345
+ elif hasattr(self.gdf, 'params') and 'S_opt' in self.gdf.params:
346
+ return self.gdf.params['S_opt']
347
+ else:
348
+ self._append_warning("S_opt not found in GDF object. Using default value 1.0.")
349
+ return 1.0
350
+
351
+ def _get_data_bounds(self):
352
+ self.logger.info("Retrieving data bounds from GDF object.")
353
+
354
+ if hasattr(self.gdf, 'DLB') and hasattr(self.gdf, 'DUB'):
355
+ return self.gdf.DLB, self.gdf.DUB
356
+ else:
357
+ return np.min(self.gdf.data), np.max(self.gdf.data)
358
+
359
+ def _find_pdf_z0(self):
360
+ self.logger.info("Finding Z0 as PDF global extremum.")
361
+
362
+ data_points = self._get_data_points()
363
+
364
+ if self.gdf_type in ['eldf', 'egdf']:
365
+ max_idx = np.argmax(self.pdf_normalized)
366
+ return data_points[max_idx]
367
+ else:
368
+ min_idx = np.argmin(self.pdf_normalized)
369
+ return data_points[min_idx]
370
+
371
+ def _find_z0_index(self, data_points):
372
+ self.logger.info("Finding index of Z0 in data points.")
373
+
374
+ z0_idx = np.argmin(np.abs(data_points - self.z0))
375
+ return z0_idx
376
+
377
+ def _detect_qldf_shape_and_boundaries(self, pdf_normalized, data_points):
378
+ self.logger.info("Detecting QLDF shape and determining boundaries.")
379
+
380
+ z0_idx = self._find_z0_index(data_points)
381
+
382
+ # Find all peaks with lower sensitivity to catch all significant peaks
383
+ peaks, peak_properties = find_peaks(pdf_normalized,
384
+ height=0.05,
385
+ distance=5,
386
+ prominence=0.05)
387
+
388
+ # Exclude boundary peaks (first and last 10% of data)
389
+ boundary_margin = len(data_points) // 10
390
+ internal_peaks = peaks[(peaks > boundary_margin) & (peaks < len(data_points) - boundary_margin)]
391
+
392
+ if self.verbose:
393
+ self.logger.info(f"Found {len(internal_peaks)} internal peaks at indices: {internal_peaks}")
394
+ if len(internal_peaks) > 0:
395
+ peak_values = [f'{data_points[p]:.1f}' for p in internal_peaks]
396
+ self.logger.info(f"Internal peak values: {peak_values}")
397
+
398
+ # Determine shape based on number of internal peaks
399
+ if len(internal_peaks) == 1:
400
+ # W-shape: One peak between extremes
401
+ self.params['pdf_shape'] = 'W-shape'
402
+ return self._find_w_shape_valley_boundaries(pdf_normalized, data_points, internal_peaks[0])
403
+
404
+ elif len(internal_peaks) == 0:
405
+ # U-shape: No peaks between extremes
406
+ self.params['pdf_shape'] = 'U-shape'
407
+ return self._find_u_shape_slope_boundaries(pdf_normalized, data_points)
408
+
409
+ else:
410
+ # Heterogeneous: Multiple peaks (2+)
411
+ self.params['pdf_shape'] = 'Heterogeneous'
412
+ warning_msg = f"QLDF detected {len(internal_peaks)} internal peaks. Data may be heterogeneous. Consider splitting the dataset."
413
+ self._append_warning(warning_msg)
414
+ # Fallback to slope method
415
+ return self._find_u_shape_slope_boundaries(pdf_normalized, data_points)
416
+
417
+ def _find_w_shape_valley_boundaries(self, pdf_normalized, data_points, central_peak_idx):
418
+ self.logger.info("W-shape detected, using valley detection method.")
419
+ z0_idx = self._find_z0_index(data_points)
420
+ central_peak_value = data_points[central_peak_idx]
421
+
422
+ self.logger.info(f"W-shape detected with central peak at {central_peak_value:.3f}")
423
+ self.logger.info(f"Z0 at {data_points[z0_idx]:.3f}")
424
+
425
+ left_candidates = []
426
+ right_candidates = []
427
+
428
+ # Method 1: Find actual minima using scipy
429
+ self.logger.info("Finding valley minima using scipy.signal.argrelextrema")
430
+ minima_indices = argrelextrema(pdf_normalized, np.less, order=3)[0]
431
+
432
+ # Filter minima and find those on left and right of central peak
433
+ left_minima = [m for m in minima_indices if m < central_peak_idx and m > len(data_points)//10]
434
+ right_minima = [m for m in minima_indices if m > central_peak_idx and m < len(data_points)*9//10]
435
+
436
+ self.logger.info(f"Found {len(left_minima)} left minima, {len(right_minima)} right minima")
437
+
438
+ # Take the closest minima to the central peak
439
+ if left_minima:
440
+ closest_left_min = max(left_minima) # Closest to central peak from left
441
+ left_candidates.append(closest_left_min)
442
+ self.logger.info(f"Left valley minimum at {data_points[closest_left_min]:.3f}")
443
+
444
+ if right_minima:
445
+ closest_right_min = min(right_minima) # Closest to central peak from right
446
+ right_candidates.append(closest_right_min)
447
+ self.logger.info(f"Right valley minimum at {data_points[closest_right_min]:.3f}")
448
+
449
+ # Method 2: If no clear minima found, use regional minimum search
450
+ self.logger.info("Checking for regional minima if no clear minima found")
451
+ if not left_candidates or not right_candidates:
452
+ self.logger.info("No clear minima found, using regional minimum search")
453
+
454
+ # Define search regions around the central peak
455
+ search_radius = (len(data_points) // 4)
456
+
457
+ # Left region: from start to central peak
458
+ left_start = max(0, central_peak_idx - search_radius)
459
+ left_end = central_peak_idx
460
+ if not left_candidates and left_end > left_start:
461
+ left_region = pdf_normalized[left_start:left_end]
462
+ local_min_idx = np.argmin(left_region) + left_start
463
+ left_candidates.append(local_min_idx)
464
+ self.logger.info(f"Left regional minimum at {data_points[local_min_idx]:.3f}")
465
+
466
+ # Right region: from central peak to end
467
+ right_start = central_peak_idx
468
+ right_end = min(len(pdf_normalized), central_peak_idx + search_radius)
469
+ if not right_candidates and right_end > right_start:
470
+ right_region = pdf_normalized[right_start:right_end]
471
+ local_min_idx = np.argmin(right_region) + right_start
472
+ right_candidates.append(local_min_idx)
473
+ self.logger.info(f"Right regional minimum at {data_points[local_min_idx]:.3f}")
474
+
475
+ # Method 3: Enhanced valley detection using percentile approach
476
+ self.logger.info("Checking for percentile-based valleys")
477
+ if not left_candidates or not right_candidates:
478
+ self.logger.info("Using percentile-based valley detection")
479
+
480
+ # Find points in bottom 20% of PDF values
481
+ valley_threshold = np.percentile(pdf_normalized, 20)
482
+ valley_indices = np.where(pdf_normalized <= valley_threshold)[0]
483
+
484
+ # Split valleys by central peak
485
+ left_valleys = [v for v in valley_indices if v < central_peak_idx]
486
+ right_valleys = [v for v in valley_indices if v > central_peak_idx]
487
+
488
+ if left_valleys and not left_candidates:
489
+ # Take valley closest to central peak
490
+ left_candidates.append(max(left_valleys))
491
+ self.logger.info(f"Left percentile valley at {data_points[max(left_valleys)]:.3f}")
492
+
493
+ if right_valleys and not right_candidates:
494
+ # Take valley closest to central peak
495
+ right_candidates.append(min(right_valleys))
496
+ self.logger.info(f"Right percentile valley at {data_points[min(right_valleys)]:.3f}")
497
+
498
+ return left_candidates, right_candidates
499
+
500
+ def _find_u_shape_slope_boundaries(self, pdf_normalized, data_points):
501
+ self.logger.info("U-shape detected, using slope transition method.")
502
+
503
+ z0_idx = self._find_z0_index(data_points)
504
+
505
+ self.logger.info("U-shape detected, using slope transition method")
506
+
507
+ # Use existing slope detection logic
508
+ first_derivative = np.gradient(pdf_normalized)
509
+ deriv_abs = np.abs(first_derivative)
510
+ slope_threshold = np.percentile(deriv_abs, self.slope_percentile)
511
+
512
+ left_candidates = []
513
+ right_candidates = []
514
+
515
+ search_radius = min(20, len(data_points) // 4)
516
+
517
+ # Search for slope transitions
518
+ for i in range(z0_idx - search_radius, -1, -1):
519
+ if i >= 0 and deriv_abs[i] > slope_threshold:
520
+ left_candidates.append(i)
521
+ break
522
+
523
+ for i in range(z0_idx + search_radius, len(deriv_abs)):
524
+ if deriv_abs[i] > slope_threshold:
525
+ right_candidates.append(i)
526
+ break
527
+
528
+ return left_candidates, right_candidates
529
+
530
+ def _find_boundaries_normalized_method(self, pdf_normalized, data_points):
531
+ self.logger.info("Finding cluster boundaries using normalized PDF and derivative methods.")
532
+
533
+ z0_idx = self._find_z0_index(data_points)
534
+
535
+ # Calculate derivatives on normalized PDF
536
+ first_derivative = np.gradient(pdf_normalized)
537
+ second_derivative = np.gradient(first_derivative)
538
+
539
+ if self.gdf_type in ['eldf', 'egdf']:
540
+ self.logger.info("Using ELDF/EGDF boundary detection method.")
541
+ # ELDF/EGDF: Find where pdf + derivative falls below threshold
542
+ combined_signal = pdf_normalized + first_derivative
543
+
544
+ left_candidates = []
545
+ right_candidates = []
546
+
547
+ # Search outward from Z0
548
+ for i in range(z0_idx - 1, -1, -1):
549
+ if combined_signal[i] <= self.derivative_threshold:
550
+ left_candidates.append(i)
551
+ break
552
+
553
+ for i in range(z0_idx + 1, len(combined_signal)):
554
+ if combined_signal[i] <= self.derivative_threshold:
555
+ right_candidates.append(i)
556
+ break
557
+
558
+ if left_candidates:
559
+ self.LCB = data_points[left_candidates[0]]
560
+ if right_candidates:
561
+ self.UCB = data_points[right_candidates[0]]
562
+
563
+ self.params['method_used'] = 'normalized_derivative_eldf_egdf'
564
+
565
+ elif self.gdf_type == 'qldf':
566
+ self.logger.info("Using QLDF boundary detection method.")
567
+ # QLDF: Use shape-based detection strategy
568
+ left_candidates, right_candidates = self._detect_qldf_shape_and_boundaries(pdf_normalized, data_points)
569
+
570
+ if left_candidates:
571
+ self.LCB = data_points[left_candidates[0]]
572
+ if right_candidates:
573
+ self.UCB = data_points[right_candidates[0]]
574
+
575
+ shape = self.params.get('pdf_shape', 'unknown')
576
+ self.params['method_used'] = f'qldf_{shape.lower()}_valley_detection'
577
+
578
+ else:
579
+ self.logger.info("Using QGDF boundary detection method.")
580
+ # QGDF: Use slope transition method
581
+ deriv_abs = np.abs(first_derivative)
582
+ slope_threshold = np.percentile(deriv_abs, self.slope_percentile)
583
+
584
+ left_candidates = []
585
+ right_candidates = []
586
+
587
+ search_radius = min(20, len(data_points) // 4)
588
+
589
+ for i in range(z0_idx - search_radius, -1, -1):
590
+ if i >= 0 and deriv_abs[i] > slope_threshold:
591
+ left_candidates.append(i)
592
+ break
593
+
594
+ for i in range(z0_idx + search_radius, len(deriv_abs)):
595
+ if deriv_abs[i] > slope_threshold:
596
+ right_candidates.append(i)
597
+ break
598
+
599
+ if not left_candidates or not right_candidates:
600
+ self.logger.info("Using normalized curvature-based detection")
601
+
602
+ curvature_threshold = np.std(second_derivative) * 0.7
603
+
604
+ for i in range(z0_idx - 1, -1, -1):
605
+ if abs(second_derivative[i]) > curvature_threshold:
606
+ if not left_candidates:
607
+ left_candidates.append(i)
608
+ break
609
+
610
+ for i in range(z0_idx + 1, len(second_derivative)):
611
+ if abs(second_derivative[i]) > curvature_threshold:
612
+ if not right_candidates:
613
+ right_candidates.append(i)
614
+ break
615
+
616
+ self.params['method_used'] = 'normalized_curvature_qgdf'
617
+ else:
618
+ self.params['method_used'] = 'normalized_slope_transition_qgdf'
619
+
620
+ if left_candidates:
621
+ self.LCB = data_points[left_candidates[0]]
622
+ if right_candidates:
623
+ self.UCB = data_points[right_candidates[0]]
624
+
625
+ if self.verbose:
626
+ method = self.params['method_used']
627
+ self.logger.info(f"Using method: {method}")
628
+ if hasattr(self, 'params') and 'pdf_shape' in self.params:
629
+ self.logger.info(f"PDF shape: {self.params['pdf_shape']}")
630
+ if self.LCB is not None:
631
+ self.logger.info(f"Found CLB at {self.LCB:.3f}")
632
+ if self.UCB is not None:
633
+ self.logger.info(f"Found CUB at {self.UCB:.3f}")
634
+
635
+ def _fallback_to_data_bounds(self):
636
+ self.logger.info("Falling back to data bounds for cluster boundaries.")
637
+
638
+ dlb, dub = self._get_data_bounds()
639
+
640
+ if self.LCB is None:
641
+ self.LCB = dlb
642
+ if self.verbose:
643
+ self.logger.info(f"CLB set to data lower bound: {self.LCB:.3f}")
644
+
645
+ if self.UCB is None:
646
+ self.UCB = dub
647
+ if self.verbose:
648
+ self.logger.info(f"CUB set to data upper bound: {self.UCB:.3f}")
649
+
650
+ def _update_params(self):
651
+ self.logger.info("Updating parameters with clustering results.")
652
+ self.params.update({
653
+ 'LCB': float(self.LCB) if self.LCB is not None else None,
654
+ 'UCB': float(self.UCB) if self.UCB is not None else None,
655
+ 'Z0': float(self.z0) if self.z0 is not None else None,
656
+ 'S_opt': float(self.S_opt) if self.S_opt is not None else None,
657
+ 'cluster_width': float(self.UCB - self.LCB) if (self.LCB is not None and self.UCB is not None) else None,
658
+ 'clustering_successful': self.LCB is not None and self.UCB is not None
659
+ })
660
+
661
+ if hasattr(self.gdf, 'params'):
662
+ cluster_params = {
663
+ 'data_cluster': {
664
+ 'LCB': self.params['LCB'],
665
+ 'UCB': self.params['UCB'],
666
+ 'cluster_width': self.params['cluster_width'],
667
+ 'clustering_successful': self.params['clustering_successful'],
668
+ 'method_used': self.params['method_used'],
669
+ 'derivative_threshold': self.params['derivative_threshold'],
670
+ 'slope_percentile': self.params['slope_percentile'],
671
+ 'normalization_method': self.params['normalization_method'],
672
+ 'pdf_shape': self.params.get('pdf_shape', None)
673
+ }
674
+ }
675
+ self.gdf.params.update(cluster_params)
676
+
677
+ def fit(self, plot: bool = False) -> Optional[Tuple[float, float]]:
678
+ """
679
+ Perform cluster boundary detection analysis on the GDF.
680
+
681
+ Executes the complete clustering pipeline:
682
+ 1. Validates GDF object and extracts PDF data
683
+ 2. Normalizes PDF for consistent analysis
684
+ 3. Applies GDF-specific boundary detection algorithms
685
+ 4. Implements fallback strategies if needed
686
+ 5. Updates all parameters and results
687
+
688
+ The method automatically selects the appropriate algorithm based on GDF type:
689
+ - **ELDF/EGDF**: Derivative threshold method
690
+ - **QLDF**: Shape detection (W-shape vs U-shape) with valley finding
691
+ - **QGDF**: Slope transition detection with curvature fallback
692
+
693
+ Parameters
694
+ ----------
695
+ plot : bool, default=False
696
+ If True, generates a plot of the PDF, detected boundaries, and derivative analysis.
697
+
698
+ Returns
699
+ -------
700
+ bool
701
+ True if clustering analysis completed successfully, False if errors occurred.
702
+ Check self.params['errors'] for detailed error information.
703
+
704
+ Side Effects
705
+ ------------
706
+ - Sets self.LCB and self.UCB with detected boundaries
707
+ - Updates self.params with complete analysis results
708
+ - Stores normalized and original PDF data
709
+ - Adds cluster parameters to original GDF object
710
+
711
+ Examples
712
+ --------
713
+ >>> cluster = DataCluster(gdf=qldf, verbose=True)
714
+ >>> CLB, CUB = cluster.fit()
715
+ >>> if CLB is not None and CUB is not None:
716
+ ... print(f"Boundaries: CLB={CLB:.3f}, CUB={CUB:.3f}")
717
+ ... else:
718
+ ... print("Clustering failed:", cluster.params['errors'])
719
+ """
720
+ self.logger.info("Starting cluster boundary detection analysis.")
721
+ try:
722
+ if self.verbose:
723
+ self.logger.info(f"Starting normalized cluster analysis for {self.gdf_type.upper()}")
724
+ self.logger.info(f"Derivative threshold: {self.derivative_threshold}")
725
+ self.logger.info(f"Slope percentile: {self.slope_percentile}")
726
+
727
+ # Get basic data
728
+ self.logger.info("Extracting PDF and data points from GDF.")
729
+ pdf_data = self._get_pdf_data()
730
+ data_points = self._get_data_points()
731
+
732
+ # Normalize PDF for consistent processing
733
+ self.logger.info("Normalizing PDF data for analysis.")
734
+ self.pdf_normalized = self._normalize_pdf(pdf_data)
735
+
736
+ # Get Z0 and S_opt
737
+ self.logger.info("Retrieving Z0 and S_opt from GDF.")
738
+ self.z0 = self._get_z0()
739
+ self.S_opt = self._get_s_opt()
740
+
741
+ self.logger.info(f"Z0: {self.z0:.3f}, S_opt: {self.S_opt:.3f}")
742
+
743
+ # Apply normalized clustering method
744
+ self.logger.info("Applying boundary detection method based on GDF type.")
745
+ self._find_boundaries_normalized_method(self.pdf_normalized, data_points)
746
+
747
+ # Fallback to data bounds if needed
748
+ if self.LCB is None or self.UCB is None:
749
+ self.logger.info("Normalized method incomplete, using data bounds as fallback")
750
+ self._fallback_to_data_bounds()
751
+
752
+ # Update params
753
+ self.logger.info("Updating parameters with final results.")
754
+ self._update_params()
755
+
756
+ self._fitted = True
757
+
758
+ # Optional plotting
759
+ if plot:
760
+ self.logger.info("Generating plot for PDF and detected boundaries.")
761
+ self.plot()
762
+
763
+ self.logger.info(f"Final boundaries: CLB={self.LCB:.3f}, CUB={self.UCB:.3f}")
764
+ self.logger.info("Clustering analysis completed successfully.")
765
+
766
+ return self.LCB, self.UCB
767
+
768
+ except Exception as e:
769
+ error_msg = f"Error during cluster analysis: {str(e)}"
770
+ self._append_error(error_msg, type(e).__name__)
771
+
772
+ return None, None
773
+
774
+ def results(self):
775
+ """
776
+ Return comprehensive cluster analysis results dictionary.
777
+
778
+ Provides complete analysis results including boundaries, cluster characteristics,
779
+ method diagnostics, and error information.
780
+
781
+ Returns
782
+ -------
783
+ dict
784
+ Complete analysis results with the following keys:
785
+
786
+ **Boundary Results:**
787
+ - 'LCB' : float or None - Cluster Lower Boundary
788
+ - 'UCB' : float or None - Cluster Upper Boundary
789
+ - 'cluster_width' : float or None - Distance between boundaries
790
+ - 'clustering_successful' : bool - Overall success status
791
+
792
+ **GDF Information:**
793
+ - 'gdf_type' : str - Type of GDF ('eldf', 'egdf', 'qldf', 'qgdf')
794
+ - 'Z0' : float or None - Characteristic point from GDF
795
+ - 'S_opt' : float or None - Optimal scale parameter from GDF
796
+
797
+ **Method Details:**
798
+ - 'method_used' : str - Specific algorithm used for boundary detection
799
+ - 'normalization_method' : str - PDF normalization approach
800
+ - 'pdf_shape' : str or None - Detected shape for QLDF ('W-shape', 'U-shape', 'Heterogeneous')
801
+
802
+ **Parameters:**
803
+ - 'derivative_threshold' : float - Threshold used for ELDF/EGDF
804
+ - 'slope_percentile' : int - Percentile used for QLDF/QGDF
805
+
806
+ **Diagnostics:**
807
+ - 'errors' : list - Any errors encountered during analysis
808
+ - 'warnings' : list - Warning messages (e.g., global function usage)
809
+
810
+ Raises
811
+ ------
812
+ RuntimeError
813
+ If fit() method has not been called successfully.
814
+
815
+ Examples
816
+ --------
817
+ >>> cluster = DataCluster(gdf=qldf)
818
+ >>> cluster.fit()
819
+ >>> results = cluster.results()
820
+ >>>
821
+ >>> # Access boundary information
822
+ >>> print(f"Lower boundary: {results['LCB']}")
823
+ >>> print(f"Upper boundary: {results['UCB']}")
824
+ >>> print(f"Cluster width: {results['cluster_width']}")
825
+ >>>
826
+ >>> # Check method and shape information
827
+ >>> print(f"Method used: {results['method_used']}")
828
+ >>> if results['pdf_shape']:
829
+ ... print(f"PDF shape: {results['pdf_shape']}")
830
+ >>>
831
+ >>> # Verify success and check for issues
832
+ >>> if results['clustering_successful']:
833
+ ... print("Clustering completed successfully")
834
+ >>> else:
835
+ ... print("Issues found:", results['errors'])
836
+ """
837
+ self.logger.info("Retrieving cluster analysis results.")
838
+
839
+ if not self._fitted:
840
+ self.logger.error("No analysis results available. Call fit() method first.")
841
+ raise RuntimeError("No analysis results available. Call fit() method first.")
842
+
843
+ return self.params.copy()
844
+
845
+ def plot(self, figsize=(12, 8)):
846
+ """
847
+ Create comprehensive visualization of cluster boundary detection results.
848
+
849
+ Generates a two-panel plot showing:
850
+ 1. **Top panel**: Original PDF with detected boundaries, Z0, and cluster regions
851
+ 2. **Bottom panel**: Derivative analysis with thresholds and boundary markers
852
+
853
+ Visualization Features:
854
+ - Original PDF curve with detected CLB/CUB boundaries (green dotted lines)
855
+ - Z0 characteristic point (red solid line)
856
+ - Cluster region shading (light green):
857
+ - ELDF/EGDF: Between CLB and CUB
858
+ - QLDF/QGDF: Outside CLB and CUB boundaries
859
+ - First and second derivatives for boundary detection analysis
860
+ - Threshold lines and slope indicators
861
+ - QLDF shape information (W-shape, U-shape, Heterogeneous) in title
862
+
863
+ Parameters
864
+ ----------
865
+ figsize : tuple, default=(12, 8)
866
+ Figure size as (width, height) in inches.
867
+
868
+ Raises
869
+ ------
870
+ RuntimeError
871
+ If fit() has not been called successfully before plotting.
872
+
873
+ Notes
874
+ -----
875
+ - Requires successful completion of fit() method
876
+ - Automatically adjusts visualization based on GDF type
877
+ - For QLDF, includes PDF shape detection results in title
878
+ - Derivative plots help understand boundary detection mechanism
879
+ - Green shaded regions indicate the main cluster areas
880
+
881
+ Examples
882
+ --------
883
+ >>> cluster = DataCluster(gdf=qldf)
884
+ >>> cluster.fit()
885
+ >>> cluster.plot() # Standard plot
886
+ >>> cluster.plot(figsize=(15, 10)) # Larger plot
887
+ """
888
+ self.logger.info("Creating plot for cluster boundary detection results.")
889
+ try:
890
+ data_points = self._get_data_points()
891
+
892
+ # Calculate derivatives for plotting
893
+ first_derivative = np.gradient(self.pdf_normalized)
894
+ second_derivative = np.gradient(first_derivative)
895
+ combined_signal = self.pdf_normalized + first_derivative
896
+
897
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, height_ratios=[3, 2])
898
+
899
+ # Top plot: Original PDF and boundaries
900
+ ax1.plot(data_points, self.pdf_original, 'b-', label='Original PDF', linewidth=2)
901
+
902
+ # Plot Z0
903
+ if self.z0 is not None:
904
+ ax1.axvline(x=self.z0, color='red', linestyle='-', linewidth=2, alpha=0.7, label=f'Z0={self.z0:.3f}')
905
+
906
+ # Plot boundaries
907
+ if self.LCB is not None:
908
+ ax1.axvline(x=self.LCB, color='green', linestyle=':', linewidth=2, label=f'CLB={self.LCB:.3f}')
909
+ if self.UCB is not None:
910
+ ax1.axvline(x=self.UCB, color='green', linestyle=':', linewidth=2, label=f'CUB={self.UCB:.3f}')
911
+
912
+ # Shade regions based on GDF type
913
+ dlb, dub = self._get_data_bounds()
914
+ if self.LCB is not None and self.UCB is not None:
915
+ if self.gdf_type in ['eldf', 'egdf']:
916
+ ax1.axvspan(self.LCB, self.UCB, alpha=0.2, color='lightgreen', label='Main Cluster')
917
+ else:
918
+ ax1.axvspan(dlb, self.LCB, alpha=0.2, color='lightgreen', label='Main Cluster')
919
+ ax1.axvspan(self.UCB, dub, alpha=0.2, color='lightgreen')
920
+
921
+ # Add shape info to title for QLDF
922
+ title = f'{self.gdf_type.upper()} Normalized Cluster Detection'
923
+ if self.gdf_type == 'qldf' and 'pdf_shape' in self.params:
924
+ title += f' ({self.params["pdf_shape"]})'
925
+
926
+ ax1.set_ylabel('PDF Values')
927
+ ax1.set_title(title)
928
+ ax1.legend()
929
+ ax1.grid(True, alpha=0.3)
930
+
931
+ # Bottom plot: Derivatives and thresholds
932
+ ax2.plot(data_points, first_derivative, 'orange', label='1st Derivative', alpha=0.7)
933
+ ax2.plot(data_points, combined_signal, 'purple', label='PDF + 1st Derivative', linewidth=2)
934
+
935
+ # Plot threshold lines
936
+ if self.gdf_type in ['eldf', 'egdf']:
937
+ ax2.axhline(y=self.derivative_threshold, color='red', linestyle='--', alpha=0.7,
938
+ label=f'Threshold={self.derivative_threshold}')
939
+ else:
940
+ # For QLDF/QGDF, show slope threshold
941
+ deriv_abs = np.abs(first_derivative)
942
+ slope_threshold = np.percentile(deriv_abs, self.slope_percentile)
943
+ ax2.plot(data_points, deriv_abs, 'brown', label='|1st Derivative|', alpha=0.7)
944
+ ax2.axhline(y=slope_threshold, color='red', linestyle='--', alpha=0.7,
945
+ label=f'Slope Threshold ({self.slope_percentile}%)')
946
+ ax2.plot(data_points, second_derivative, 'gray', label='2nd Derivative', alpha=0.5)
947
+ ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3, label='Zero Line')
948
+
949
+ # Plot boundaries on derivative plot
950
+ if self.LCB is not None:
951
+ ax2.axvline(x=self.LCB, color='green', linestyle=':', linewidth=2, alpha=0.7)
952
+ if self.UCB is not None:
953
+ ax2.axvline(x=self.UCB, color='green', linestyle=':', linewidth=2, alpha=0.7)
954
+
955
+ # Plot Z0 on derivative plot
956
+ if self.z0 is not None:
957
+ ax2.axvline(x=self.z0, color='red', linestyle='-', linewidth=2, alpha=0.7)
958
+
959
+ ax2.set_xlabel('Data Points')
960
+ ax2.set_ylabel('Derivative Values')
961
+ ax2.legend()
962
+ ax2.grid(True, alpha=0.3)
963
+
964
+ plt.tight_layout()
965
+ plt.show()
966
+
967
+ except Exception as e:
968
+ error_msg = f"Error creating plot: {str(e)}"
969
+ self._append_error(error_msg, type(e).__name__)
970
+
971
+ def __repr__(self):
972
+ return (f"<DataCluster(gdf_type={self.gdf_type}, "
973
+ f"LCB={self.LCB}, UCB={self.UCB}, "
974
+ f"Z0={self.z0}, S_opt={self.S_opt}, "
975
+ f"fitted={self._fitted})>")