machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,289 @@
1
+ """
2
+ QGDF - Quantifying Global Distribution Function.
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ """
7
+
8
+ import numpy as np
9
+ from machinegnostics.magcal.gdf.base_qgdf import BaseQGDF
10
+
11
+ class QGDF(BaseQGDF):
12
+ """
13
+ Short Description: Quantifying Global Distribution Function.
14
+
15
+ Detailed Description: The QGDF class quantifies and analyzes global distribution functions for given data. It supports both additive and multiplicative data forms, handles bounded and unbounded distributions, and provides automatic parameter estimation and visualization options. This class is designed for robust optimization and memory-efficient processing of large datasets.
16
+
17
+ Key Features:
18
+ - Automatic parameter estimation with customizable bounds
19
+ - Global Z0 point identification
20
+ - Support for weighted data points
21
+ - Multiple data processing forms (additive/multiplicative)
22
+ - Comprehensive visualization capabilities
23
+ - Robust optimization with multiple solver options
24
+ - Memory-efficient processing for large datasets
25
+
26
+ Attributes:
27
+ DLB (float): Data Lower Bound - absolute minimum value the data can take.
28
+ DUB (float): Data Upper Bound - absolute maximum value the data can take.
29
+ LB (float): Lower Probable Bound - practical lower limit for the distribution.
30
+ UB (float): Upper Probable Bound - practical upper limit for the distribution.
31
+ S (float or str): Scale parameter for the distribution. Set to 'auto' for automatic estimation.
32
+ z0_optimize (bool): Whether to optimize the location parameter z0 during fitting (default: True).
33
+ data_form (str): Form of the data processing ('a' for additive, 'm' for multiplicative).
34
+ n_points (int): Number of points to generate in the distribution function (default: 500).
35
+ catch (bool): Whether to store intermediate calculated values (default: True).
36
+ weights (np.ndarray): Prior weights for data points. If None, uniform weights are used.
37
+ wedf (bool): Whether to use Weighted Empirical Distribution Function (default: False).
38
+ opt_method (str): Optimization method for parameter estimation (default: 'L-BFGS-B').
39
+ tolerance (float): Convergence tolerance for optimization (default: 1e-9).
40
+ verbose (bool): Whether to print detailed progress information (default: False).
41
+ params (dict): Dictionary storing fitted parameters and results after fitting.
42
+ homogeneous (bool): To indicate data homogeneity (default: True).
43
+ max_data_size (int): Maximum data size for smooth QGDF generation (default: 1000).
44
+ flush (bool): Whether to flush large arrays (default: True).
45
+
46
+ Methods:
47
+ fit(data): Fit the Quantifying Global Distribution Function to the data.
48
+ plot(plot_smooth=True, plot='both', bounds=True, extra_df=True, figsize=(12,8)): Visualize the fitted distribution.
49
+ results(): Get the fitting results as a dictionary.
50
+
51
+ Usage Example:
52
+
53
+ >>> import numpy as np
54
+ >>> from machinegnostics.magcal import QGDF
55
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
56
+ >>> qgdf = QGDF()
57
+ >>> qgdf.fit(data)
58
+ >>> qgdf.plot()
59
+ >>> print(qgdf.params)
60
+
61
+ Workflow:
62
+ 1. Initialize QGDF with desired parameters (no data required).
63
+ 2. Call fit(data) to estimate the distribution parameters.
64
+ 3. Use plot() to visualize the results.
65
+
66
+ Performance Tips:
67
+ - Use data_form='m' for multiplicative/log-normal data.
68
+ - Set appropriate bounds to improve convergence.
69
+ - Use catch=False for large datasets to save memory.
70
+ - Adjust n_points based on visualization needs vs. performance.
71
+ - Use verbose=True to monitor optimization progress.
72
+
73
+ Common Use Cases:
74
+ - Risk analysis and reliability engineering.
75
+ - Quality control and process optimization.
76
+ - Financial modeling and market analysis.
77
+ - Environmental data analysis.
78
+ - Biostatistics and epidemiological studies.
79
+
80
+ Notes:
81
+ - Bounds (DLB, DUB, LB, UB) are optional but can improve estimation accuracy.
82
+ - When S='auto', the scale parameter is automatically estimated from the data.
83
+ - The weights array must have the same length as the data array.
84
+ - Setting catch=False can save memory for large datasets but prevents access to intermediate results or detailed plots.
85
+
86
+ Raises:
87
+ ValueError: If data array is empty or contains invalid values.
88
+ ValueError: If weights array length doesn't match data array length.
89
+ ValueError: If bounds are specified incorrectly (e.g., LB > UB).
90
+ ValueError: If invalid parameters are provided (negative tolerance, invalid data_form, etc.).
91
+ RuntimeError: If the fitting process fails to converge.
92
+ OptimizationError: If the optimization algorithm encounters numerical issues.
93
+ """
94
+
95
+ def __init__(self,
96
+ DLB: float = None,
97
+ DUB: float = None,
98
+ LB: float = None,
99
+ UB: float = None,
100
+ S = 1,
101
+ z0_optimize: bool = True,
102
+ tolerance: float = 1e-9,
103
+ data_form: str = 'a',
104
+ n_points: int = 500,
105
+ homogeneous: bool = True,
106
+ catch: bool = True,
107
+ weights: np.ndarray = None,
108
+ wedf: bool = False,
109
+ opt_method: str = 'L-BFGS-B',
110
+ verbose: bool = False,
111
+ max_data_size: int = 1000,
112
+ flush: bool = True):
113
+ """
114
+ Initialize the QGDF (Quantifying Global Distribution Function) class.
115
+
116
+ This constructor sets up all the necessary parameters and configurations for quantifying
117
+ a global distribution function from data. It validates input parameters and prepares
118
+ the instance for subsequent fitting and analysis operations.
119
+
120
+ Parameters:
121
+ DLB (float, optional): Data Lower Bound - the absolute minimum value that the data can
122
+ theoretically take. If None, will be inferred from data.
123
+ DUB (float, optional): Data Upper Bound - the absolute maximum value that the data can
124
+ theoretically take. If None, will be inferred from data.
125
+ LB (float, optional): Lower Probable Bound - the practical lower limit for the distribution.
126
+ UB (float, optional): Upper Probable Bound - the practical upper limit for the distribution.
127
+ S (float or str, optional): Scale parameter for the distribution. If 'auto' is provided,
128
+ the scale will be automatically estimated from the data during
129
+ fitting. If a float is provided, it will be used as a fixed
130
+ scale parameter. Default is 1 for QGDF.
131
+ z0_optimize (bool, optional): Whether to optimize the location parameter z0 during fitting.
132
+ tolerance (float, optional): Convergence tolerance for the optimization process.
133
+ data_form (str, optional): Form of data processing. Options are:
134
+ - 'a': Additive form (default)
135
+ - 'm': Multiplicative form
136
+ n_points (int, optional): Number of points to generate in the final distribution function.
137
+ homogeneous (bool, optional): Whether to assume data homogeneity.
138
+ catch (bool, optional): Whether to store intermediate calculated values during fitting.
139
+ weights (np.ndarray, optional): Prior weights for data points.
140
+ wedf (bool, optional): Whether to use Weighted Empirical Distribution Function.
141
+ opt_method (str, optional): Optimization method for parameter estimation.
142
+ verbose (bool, optional): Whether to print detailed progress information during fitting.
143
+ max_data_size (int, optional): Maximum size of data for which smooth QGDF generation is allowed.
144
+ flush (bool, optional): Whether to flush intermediate calculations during processing.
145
+
146
+ Raises:
147
+ ValueError: If n_points is not a positive integer.
148
+ ValueError: If bounds are specified incorrectly.
149
+ ValueError: If data_form is not 'a' or 'm'.
150
+ ValueError: If tolerance is not positive.
151
+ ValueError: If max_data_size is not positive.
152
+
153
+ Examples:
154
+
155
+ >>> qgdf = QGDF()
156
+ >>> qgdf = QGDF(DLB=0, DUB=5)
157
+ >>> qgdf = QGDF(data_form='m')
158
+ >>> qgdf = QGDF(tolerance=1e-6, opt_method='SLSQP', max_data_size=5000)
159
+ """
160
+ self.DLB = DLB
161
+ self.DUB = DUB
162
+ self.LB = LB
163
+ self.UB = UB
164
+ self.S = S
165
+ self.z0_optimize = z0_optimize
166
+ self.tolerance = tolerance
167
+ self.data_form = data_form
168
+ self.n_points = n_points
169
+ self.homogeneous = homogeneous
170
+ self.catch = catch
171
+ self.weights = weights
172
+ self.wedf = wedf
173
+ self.opt_method = opt_method
174
+ self.verbose = verbose
175
+ self.max_data_size = max_data_size
176
+ self.flush = flush
177
+
178
+ def fit(self, data: np.ndarray, plot: bool = False):
179
+ """
180
+ Short Description: Fit the Quantifying Global Distribution Function to the provided data.
181
+
182
+ Detailed Description: This method performs the core estimation process for the QGDF. It validates and preprocesses the data, sets up optimization constraints, runs numerical optimization, and calculates the final QGDF and PDF with optimized parameters. The QGDF provides a unique global representation of the data distribution.
183
+
184
+ Parameters:
185
+ data (np.ndarray): Input data array for distribution estimation. Must be a 1D numpy array.
186
+ plot (bool, optional): Whether to automatically plot the fitted distribution after fitting. Default is False.
187
+
188
+ Returns:
189
+ None: The fitted parameters are stored in the `params` attribute.
190
+
191
+ Raises:
192
+ RuntimeError: If the optimization process fails to converge.
193
+ ValueError: If the data array is empty, contains only NaN values, or has invalid dimensions.
194
+ ValueError: If weights array is provided but has a different length than the data array.
195
+ OptimizationError: If the underlying optimization algorithm encounters numerical issues.
196
+ ConvergenceError: If Z0 identification fails to converge.
197
+
198
+ Usage Example:
199
+
200
+ >>> qgdf = QGDF()
201
+ >>> data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
202
+ >>> qgdf.fit(data)
203
+ >>> print("Fitting completed")
204
+ >>> print(f"Fitted parameters: {qgdf.params}")
205
+ >>> qgdf.fit(data, plot=True)
206
+ """
207
+ super().__init__(
208
+ data=data,
209
+ DLB=self.DLB,
210
+ DUB=self.DUB,
211
+ LB=self.LB,
212
+ UB=self.UB,
213
+ S=self.S,
214
+ z0_optimize=self.z0_optimize,
215
+ tolerance=self.tolerance,
216
+ data_form=self.data_form,
217
+ n_points=self.n_points,
218
+ homogeneous=self.homogeneous,
219
+ catch=self.catch,
220
+ weights=self.weights,
221
+ wedf=self.wedf,
222
+ opt_method=self.opt_method,
223
+ verbose=self.verbose,
224
+ max_data_size=self.max_data_size,
225
+ flush=self.flush
226
+ )
227
+ self._fit_qgdf(plot=plot)
228
+
229
+ def plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
230
+ """
231
+ Short Description: Visualize the fitted Quantifying Global Distribution Function and related plots.
232
+
233
+ Detailed Description: This method generates visualizations of the fitted global distribution function, including the main QGDF curve, probability density function (PDF), and optional additional distribution functions. It provides insights into the quality of the fit and the characteristics of the underlying distribution.
234
+
235
+ Parameters:
236
+ plot_smooth (bool, optional): Whether to plot a smooth interpolated curve for the distribution function. Default is True.
237
+ plot (str, optional): Type of plot to generate. Default is 'both'. Options include:
238
+ - 'qgdf': Global Distribution Function (main curve).
239
+ - 'pdf': Probability Density Function.
240
+ - 'both': Both QGDF and PDF in the same plot.
241
+ bounds (bool, optional): Whether to display bound lines on the plot. Default is True.
242
+ extra_df (bool, optional): Whether to include additional distribution functions in the plot for comparison. Default is True.
243
+ figsize (tuple, optional): Figure size as (width, height) in inches. Default is (12, 8).
244
+
245
+ Returns:
246
+ None: Displays the plot.
247
+
248
+ Raises:
249
+ RuntimeError: If fit() has not been called before plotting.
250
+ ValueError: If an invalid plot type is specified.
251
+ ImportError: If matplotlib is not available for plotting.
252
+ PlottingError: If there are issues with the plot generation process.
253
+ MemoryError: If plotting large datasets exceeds available memory.
254
+
255
+ Usage Example:
256
+
257
+ >>> qgdf.plot()
258
+ >>> qgdf.plot(plot='pdf', bounds=True)
259
+ >>> qgdf.plot(plot='both', bounds=True, extra_df=True, figsize=(16, 10))
260
+ """
261
+ self._plot(plot_smooth=plot_smooth, plot=plot, bounds=bounds, extra_df=extra_df, figsize=figsize)
262
+
263
+ def results(self) -> dict:
264
+ """
265
+ Short Description: Retrieve the fitted parameters and comprehensive results from the QGDF fitting process.
266
+
267
+ Detailed Description: This method provides access to all key results obtained after fitting the Quantifying Global Distribution Function (QGDF) to the data. It returns a dictionary containing fitted parameters, global distribution characteristics, optimization results, and diagnostic information for complete distribution analysis.
268
+
269
+ Returns:
270
+ dict: Fitted parameters and results.
271
+
272
+ Raises:
273
+ RuntimeError: If fit() has not been called before accessing results.
274
+ AttributeError: If internal result structure is missing or corrupted due to fitting failure.
275
+ KeyError: If expected result keys are unavailable.
276
+ ValueError: If internal state is inconsistent for result retrieval.
277
+ MemoryError: If results contain very large arrays that exceed available memory.
278
+
279
+ Usage Example:
280
+
281
+ >>> qgdf = QGDF(verbose=True)
282
+ >>> qgdf.fit(data)
283
+ >>> results = qgdf.results()
284
+ >>> print(f"Global scale parameter: {results['S_opt']:.6f}")
285
+ >>> print(f"Distribution bounds: [{results['LB']:.3f}, {results['UB']:.3f}]")
286
+ """
287
+ if not self._fitted:
288
+ raise RuntimeError("Must fit QGDF before getting results.")
289
+ return self._get_results()
@@ -0,0 +1,296 @@
1
+ '''
2
+ QLDF Quantifying Local Distribution Function (QLDF)
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ '''
7
+
8
+ import numpy as np
9
+ from machinegnostics.magcal.gdf.base_qldf import BaseQLDF
10
+
11
+ class QLDF(BaseQLDF):
12
+ """
13
+ Short Description: Quantifying Local Distribution Function.
14
+
15
+ Detailed Description: The QLDF class quantifies and analyzes local distribution characteristics around critical points in data. It focuses on identifying local minima in probability density (Z0 points) and their neighborhood behavior. This class is optimized for detailed local distribution analysis and memory-efficient processing.
16
+
17
+ Key Features:
18
+ - Automatic Z0 point identification.
19
+ - Local distribution characterization around critical points.
20
+ - Advanced interpolation methods for precise Z0 estimation.
21
+ - Support for weighted data analysis.
22
+ - Memory-efficient processing for large datasets.
23
+ - Comprehensive visualization of local distribution features.
24
+ - Robust optimization with multiple solver options.
25
+
26
+ Attributes:
27
+ DLB (float): Data Lower Bound - absolute minimum value the data can take.
28
+ DUB (float): Data Upper Bound - absolute maximum value the data can take.
29
+ LB (float): Lower Probable Bound - practical lower limit for the distribution.
30
+ UB (float): Upper Probable Bound - practical upper limit for the distribution.
31
+ S (float or str): Scale parameter for the distribution. Set to 'auto' for automatic estimation.
32
+ varS (bool): Whether to use variable scale parameter during optimization (default: False).
33
+ z0_optimize (bool): Whether to optimize the location parameter z0 during fitting (default: True).
34
+ tolerance (float): Convergence tolerance for optimization (default: 1e-5).
35
+ data_form (str): Form of the data processing ('a' for additive, 'm' for multiplicative).
36
+ n_points (int): Number of points to generate in the distribution function (default: 500).
37
+ homogeneous (bool): Whether to assume data homogeneity (default: True).
38
+ catch (bool): Whether to store intermediate calculated values (default: True).
39
+ weights (np.ndarray): Prior weights for data points. If None, uniform weights are used.
40
+ wedf (bool): Whether to use Weighted Empirical Distribution Function (default: False).
41
+ opt_method (str): Optimization method for parameter estimation (default: 'L-BFGS-B').
42
+ verbose (bool): Whether to print detailed progress information (default: False).
43
+ max_data_size (int): Maximum data size for smooth QLDF generation (default: 1000).
44
+ flush (bool): Whether to flush large arrays during processing (default: True).
45
+ params (dict): Dictionary storing fitted parameters and results after fitting.
46
+
47
+ Methods:
48
+ fit(data): Fit the Quantifying Local Distribution Function to the data.
49
+ plot(plot_smooth=True, plot='both', bounds=True, extra_df=True, figsize=(12,8)): Visualize the fitted local distribution.
50
+ results(): Get the fitting results as a dictionary.
51
+
52
+ Usage Example:
53
+
54
+ >>> import numpy as np
55
+ >>> from machinegnostics.magcal import QLDF
56
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
57
+ >>> qldf = QLDF()
58
+ >>> qldf.fit(data)
59
+ >>> qldf.plot()
60
+ >>> print(qldf.params)
61
+
62
+ Workflow:
63
+ 1. Initialize QLDF with desired parameters (no data required).
64
+ 2. Call fit(data) to estimate the distribution parameters.
65
+ 3. Use plot() to visualize the results.
66
+
67
+ Performance Tips:
68
+ - Use data_form='m' for multiplicative/log-normal data.
69
+ - Set appropriate bounds to improve convergence.
70
+ - Use catch=False for large datasets to save memory.
71
+ - Adjust n_points based on visualization needs vs. performance.
72
+ - Use verbose=True to monitor optimization progress.
73
+
74
+ Common Use Cases:
75
+ - Peak detection and modal analysis in data distributions.
76
+ - Local density estimation for clustering applications.
77
+ - Risk analysis focusing on critical value identification.
78
+ - Quality control with emphasis on specification limits.
79
+ - Financial modeling with focus on maximum likelihood points.
80
+
81
+ Notes:
82
+ - Bounds (DLB, DUB, LB, UB) are optional but can improve estimation accuracy.
83
+ - When S='auto', the scale parameter is automatically estimated from the data.
84
+ - The weights array must have the same length as the data array.
85
+ - Setting catch=False can save memory for large datasets but prevents access to intermediate results or detailed plots.
86
+
87
+ Raises:
88
+ ValueError: If data array is empty or contains invalid values.
89
+ ValueError: If weights array length doesn't match data array length.
90
+ ValueError: If bounds are specified incorrectly (e.g., LB > UB).
91
+ ValueError: If invalid parameters are provided (negative tolerance, invalid data_form, etc.).
92
+ RuntimeError: If the fitting process fails to converge.
93
+ OptimizationError: If the optimization algorithm encounters numerical issues.
94
+ """
95
+
96
+ def __init__(self,
97
+ DLB: float = None,
98
+ DUB: float = None,
99
+ LB: float = None,
100
+ UB: float = None,
101
+ S = 1,
102
+ varS: bool = False,
103
+ z0_optimize: bool = True,
104
+ tolerance: float = 1e-9,
105
+ data_form: str = 'a',
106
+ n_points: int = 500,
107
+ homogeneous: bool = True,
108
+ catch: bool = True,
109
+ weights: np.ndarray = None,
110
+ wedf: bool = False,
111
+ opt_method: str = 'L-BFGS-B',
112
+ verbose: bool = False,
113
+ max_data_size: int = 1000,
114
+ flush: bool = True):
115
+ """
116
+ Initialize the QLDF (Quantifying Local Distribution Function) class.
117
+
118
+ This constructor sets up all the necessary parameters and configurations for quantifying
119
+ a local distribution function from data. It validates input parameters and prepares
120
+ the instance for subsequent fitting and analysis operations.
121
+
122
+ Parameters:
123
+ DLB (float, optional): Data Lower Bound - the absolute minimum value that the data can
124
+ theoretically take. If None, will be inferred from data.
125
+ DUB (float, optional): Data Upper Bound - the absolute maximum value that the data can
126
+ theoretically take. If None, will be inferred from data.
127
+ LB (float, optional): Lower Probable Bound - the practical lower limit for the distribution.
128
+ UB (float, optional): Upper Probable Bound - the practical upper limit for the distribution.
129
+ S (float or str, optional): Scale parameter for the distribution. If 'auto' is provided,
130
+ the scale will be automatically estimated from the data during
131
+ fitting. If a float is provided, it will be used as a fixed
132
+ scale parameter. Default is 1 for QLDF.
133
+ varS (bool, optional): Whether to allow variable scale parameter during optimization to handle heteroscedasticity.
134
+ z0_optimize (bool, optional): Whether to optimize the location parameter z0 during fitting.
135
+ tolerance (float, optional): Convergence tolerance for the optimization process.
136
+ data_form (str, optional): Form of data processing. Options are:
137
+ - 'a': Additive form (default)
138
+ - 'm': Multiplicative form
139
+ n_points (int, optional): Number of points to generate in the final distribution function.
140
+ homogeneous (bool, optional): Whether to assume data homogeneity.
141
+ catch (bool, optional): Whether to store intermediate calculated values during fitting.
142
+ weights (np.ndarray, optional): Prior weights for data points.
143
+ wedf (bool, optional): Whether to use Weighted Empirical Distribution Function.
144
+ opt_method (str, optional): Optimization method for parameter estimation.
145
+ verbose (bool, optional): Whether to print detailed progress information during fitting.
146
+ max_data_size (int, optional): Maximum size of data for which smooth QLDF generation is allowed.
147
+ flush (bool, optional): Whether to flush intermediate calculations during processing.
148
+
149
+ Raises:
150
+ ValueError: If n_points is not a positive integer.
151
+ ValueError: If bounds are specified incorrectly.
152
+ ValueError: If data_form is not 'a' or 'm'.
153
+ ValueError: If tolerance is not positive.
154
+ ValueError: If max_data_size is not positive.
155
+
156
+ Examples:
157
+ >>> import numpy as np
158
+ >>> from machinegnostics.magcal import QLDF
159
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
160
+ >>> qldf = QLDF()
161
+ >>> qldf.fit(data)
162
+ >>> qldf.plot()
163
+ >>> print(qldf.params)
164
+ """
165
+ self.DLB = DLB
166
+ self.DUB = DUB
167
+ self.LB = LB
168
+ self.UB = UB
169
+ self.S = S
170
+ self.varS = varS
171
+ self.z0_optimize = z0_optimize
172
+ self.tolerance = tolerance
173
+ self.data_form = data_form
174
+ self.n_points = n_points
175
+ self.homogeneous = homogeneous
176
+ self.catch = catch
177
+ self.weights = weights
178
+ self.wedf = wedf
179
+ self.opt_method = opt_method
180
+ self.verbose = verbose
181
+ self.max_data_size = max_data_size
182
+ self.flush = flush
183
+
184
+ def fit(self, data: np.ndarray, plot: bool = False):
185
+ """
186
+ Short Description: Fit the Quantifying Local Distribution Function to the provided data.
187
+
188
+ Detailed Description: This method performs the core estimation process for the QLDF. It validates and preprocesses the data, sets up optimization constraints, runs numerical optimization, and calculates the final QLDF and PDF with optimized parameters.
189
+
190
+ Parameters:
191
+ data (np.ndarray): Input data array for distribution estimation. Must be a 1D numpy array.
192
+ plot (bool, optional): Whether to automatically plot the fitted distribution after fitting. Default is False.
193
+
194
+ Returns:
195
+ None: The fitted parameters are stored in the `params` attribute.
196
+
197
+ Raises:
198
+ RuntimeError: If the optimization process fails to converge.
199
+ ValueError: If the data array is empty, contains only NaN values, or has invalid dimensions.
200
+ ValueError: If weights array is provided but has a different length than the data array.
201
+ OptimizationError: If the underlying optimization algorithm encounters numerical issues.
202
+ ConvergenceError: If Z0 identification fails to converge.
203
+
204
+ Usage Example:
205
+
206
+ >>> qldf = QLDF()
207
+ >>> data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
208
+ >>> qldf.fit(data)
209
+ >>> print("Fitting completed")
210
+ >>> print(f"Fitted parameters: {qldf.params}")
211
+ >>> qldf.fit(data, plot=True)
212
+ """
213
+ super().__init__(
214
+ data=data,
215
+ DLB=self.DLB,
216
+ DUB=self.DUB,
217
+ LB=self.LB,
218
+ UB=self.UB,
219
+ S=self.S,
220
+ varS=self.varS,
221
+ z0_optimize=self.z0_optimize,
222
+ tolerance=self.tolerance,
223
+ data_form=self.data_form,
224
+ n_points=self.n_points,
225
+ homogeneous=self.homogeneous,
226
+ catch=self.catch,
227
+ weights=self.weights,
228
+ wedf=self.wedf,
229
+ opt_method=self.opt_method,
230
+ verbose=self.verbose,
231
+ max_data_size=self.max_data_size,
232
+ flush=self.flush
233
+ )
234
+ self._fit_qldf(plot=plot)
235
+
236
+ def plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
237
+ """
238
+ Short Description: Visualize the fitted Quantifying Local Distribution Function and related plots.
239
+
240
+ Detailed Description: This method generates visualizations of the fitted local distribution function, including the main QLDF curve, probability density function (PDF), and optional additional distribution functions. It provides insights into the quality of the fit and the characteristics of the underlying distribution.
241
+
242
+ Parameters:
243
+ plot_smooth (bool, optional): Whether to plot a smooth interpolated curve for the distribution function. Default is True.
244
+ plot (str, optional): Type of plot to generate. Default is 'both'. Options include:
245
+ - 'qldf': Local Distribution Function (main curve).
246
+ - 'pdf': Probability Density Function.
247
+ - 'both': Both QLDF and PDF in the same plot.
248
+ bounds (bool, optional): Whether to display bound lines on the plot. Default is True.
249
+ extra_df (bool, optional): Whether to include additional distribution functions in the plot for comparison. Default is True.
250
+ figsize (tuple, optional): Figure size as (width, height) in inches. Default is (12, 8).
251
+
252
+ Returns:
253
+ None: Displays the plot.
254
+
255
+ Raises:
256
+ RuntimeError: If fit() has not been called before plotting.
257
+ ValueError: If an invalid plot type is specified.
258
+ ImportError: If matplotlib is not available for plotting.
259
+ PlottingError: If there are issues with the plot generation process.
260
+ MemoryError: If plotting large datasets exceeds available memory.
261
+
262
+ Usage Example:
263
+
264
+ >>> qldf.plot()
265
+ >>> qldf.plot(plot='pdf', bounds=True)
266
+ >>> qldf.plot(plot='both', bounds=True, extra_df=True, figsize=(16, 10))
267
+ """
268
+ self._plot(plot_smooth=plot_smooth, plot=plot, bounds=bounds, extra_df=extra_df, figsize=figsize)
269
+
270
+ def results(self) -> dict:
271
+ """
272
+ Short Description: Retrieve the fitted parameters and comprehensive results from the QLDF fitting process.
273
+
274
+ Detailed Description: This method provides access to all key results obtained after fitting the Quantifying Local Distribution Function (QLDF) to the data. It returns a dictionary containing fitted parameters, local distribution characteristics, optimization results, and diagnostic information for complete distribution analysis.
275
+
276
+ Returns:
277
+ dict: Fitted parameters and results.
278
+
279
+ Raises:
280
+ RuntimeError: If fit() has not been called before accessing results.
281
+ AttributeError: If internal result structure is missing or corrupted due to fitting failure.
282
+ KeyError: If expected result keys are unavailable.
283
+ ValueError: If internal state is inconsistent for result retrieval.
284
+ MemoryError: If results contain very large arrays that exceed available memory.
285
+
286
+ Usage Example:
287
+
288
+ >>> qldf = QLDF(verbose=True)
289
+ >>> qldf.fit(data)
290
+ >>> results = qldf.results()
291
+ >>> print(f"Local scale parameter: {results['S_opt']:.6f}")
292
+ >>> print(f"Distribution bounds: [{results['LB']:.3f}, {results['UB']:.3f}]")
293
+ """
294
+ if not self._fitted:
295
+ raise RuntimeError("Must fit QLDF before getting results.")
296
+ return self._get_results()