machinegnostics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. __init__.py +0 -0
  2. machinegnostics/__init__.py +24 -0
  3. machinegnostics/magcal/__init__.py +37 -0
  4. machinegnostics/magcal/characteristics.py +460 -0
  5. machinegnostics/magcal/criteria_eval.py +268 -0
  6. machinegnostics/magcal/criterion.py +140 -0
  7. machinegnostics/magcal/data_conversion.py +381 -0
  8. machinegnostics/magcal/gcor.py +64 -0
  9. machinegnostics/magcal/gdf/__init__.py +2 -0
  10. machinegnostics/magcal/gdf/base_df.py +39 -0
  11. machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
  12. machinegnostics/magcal/gdf/base_egdf.py +823 -0
  13. machinegnostics/magcal/gdf/base_eldf.py +830 -0
  14. machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
  15. machinegnostics/magcal/gdf/base_qldf.py +1019 -0
  16. machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
  17. machinegnostics/magcal/gdf/data_cluster.py +975 -0
  18. machinegnostics/magcal/gdf/data_intervals.py +853 -0
  19. machinegnostics/magcal/gdf/data_membership.py +536 -0
  20. machinegnostics/magcal/gdf/der_egdf.py +243 -0
  21. machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
  22. machinegnostics/magcal/gdf/egdf.py +324 -0
  23. machinegnostics/magcal/gdf/eldf.py +297 -0
  24. machinegnostics/magcal/gdf/eldf_intv.py +609 -0
  25. machinegnostics/magcal/gdf/eldf_ma.py +627 -0
  26. machinegnostics/magcal/gdf/homogeneity.py +1218 -0
  27. machinegnostics/magcal/gdf/intv_engine.py +1523 -0
  28. machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
  29. machinegnostics/magcal/gdf/qgdf.py +289 -0
  30. machinegnostics/magcal/gdf/qldf.py +296 -0
  31. machinegnostics/magcal/gdf/scedasticity.py +197 -0
  32. machinegnostics/magcal/gdf/wedf.py +181 -0
  33. machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
  34. machinegnostics/magcal/layer_base.py +42 -0
  35. machinegnostics/magcal/layer_history_base.py +74 -0
  36. machinegnostics/magcal/layer_io_process_base.py +238 -0
  37. machinegnostics/magcal/layer_param_base.py +448 -0
  38. machinegnostics/magcal/mg_weights.py +36 -0
  39. machinegnostics/magcal/sample_characteristics.py +532 -0
  40. machinegnostics/magcal/scale_optimization.py +185 -0
  41. machinegnostics/magcal/scale_param.py +313 -0
  42. machinegnostics/magcal/util/__init__.py +0 -0
  43. machinegnostics/magcal/util/dis_docstring.py +18 -0
  44. machinegnostics/magcal/util/logging.py +24 -0
  45. machinegnostics/magcal/util/min_max_float.py +34 -0
  46. machinegnostics/magnet/__init__.py +0 -0
  47. machinegnostics/metrics/__init__.py +28 -0
  48. machinegnostics/metrics/accu.py +61 -0
  49. machinegnostics/metrics/accuracy.py +67 -0
  50. machinegnostics/metrics/auto_correlation.py +183 -0
  51. machinegnostics/metrics/auto_covariance.py +204 -0
  52. machinegnostics/metrics/cls_report.py +130 -0
  53. machinegnostics/metrics/conf_matrix.py +93 -0
  54. machinegnostics/metrics/correlation.py +178 -0
  55. machinegnostics/metrics/cross_variance.py +167 -0
  56. machinegnostics/metrics/divi.py +82 -0
  57. machinegnostics/metrics/evalmet.py +109 -0
  58. machinegnostics/metrics/f1_score.py +128 -0
  59. machinegnostics/metrics/gmmfe.py +108 -0
  60. machinegnostics/metrics/hc.py +141 -0
  61. machinegnostics/metrics/mae.py +72 -0
  62. machinegnostics/metrics/mean.py +117 -0
  63. machinegnostics/metrics/median.py +122 -0
  64. machinegnostics/metrics/mg_r2.py +167 -0
  65. machinegnostics/metrics/mse.py +78 -0
  66. machinegnostics/metrics/precision.py +119 -0
  67. machinegnostics/metrics/r2.py +122 -0
  68. machinegnostics/metrics/recall.py +108 -0
  69. machinegnostics/metrics/rmse.py +77 -0
  70. machinegnostics/metrics/robr2.py +119 -0
  71. machinegnostics/metrics/std.py +144 -0
  72. machinegnostics/metrics/variance.py +101 -0
  73. machinegnostics/models/__init__.py +2 -0
  74. machinegnostics/models/classification/__init__.py +1 -0
  75. machinegnostics/models/classification/layer_history_log_reg.py +121 -0
  76. machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
  77. machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
  78. machinegnostics/models/classification/layer_param_log_reg.py +275 -0
  79. machinegnostics/models/classification/mg_log_reg.py +273 -0
  80. machinegnostics/models/cross_validation.py +118 -0
  81. machinegnostics/models/data_split.py +106 -0
  82. machinegnostics/models/regression/__init__.py +2 -0
  83. machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
  84. machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
  85. machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
  86. machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
  87. machinegnostics/models/regression/mg_lin_reg.py +253 -0
  88. machinegnostics/models/regression/mg_poly_reg.py +258 -0
  89. machinegnostics-0.0.1.dist-info/METADATA +246 -0
  90. machinegnostics-0.0.1.dist-info/RECORD +93 -0
  91. machinegnostics-0.0.1.dist-info/WHEEL +5 -0
  92. machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
  93. machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,324 @@
1
+ """
2
+ EGDF - Estimating Global Distribution Function.
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ """
7
+
8
+ import numpy as np
9
+ from machinegnostics.magcal.gdf.base_egdf import BaseEGDF
10
+
11
+ class EGDF(BaseEGDF):
12
+ """
13
+ Short Description: Estimating Global Distribution Function.
14
+
15
+ Detailed Description: The EGDF class estimates and analyzes global distribution functions for given data. It supports both additive and multiplicative data forms, handles bounded and unbounded distributions, and provides automatic parameter estimation and visualization options. This class is optimized for robust optimization and memory-efficient processing of large datasets.
16
+
17
+ Key Features:
18
+ - Automatic parameter estimation with customizable bounds.
19
+ - Advanced Z0 point estimation for maximum PDF location.
20
+ - Support for weighted data points.
21
+ - Multiple data processing forms (additive/multiplicative).
22
+ - Comprehensive visualization capabilities.
23
+ - Robust optimization with multiple solver options.
24
+ - Memory-efficient processing for large datasets.
25
+
26
+ Attributes:
27
+ DLB (float): Data Lower Bound - absolute minimum value the data can take.
28
+ DUB (float): Data Upper Bound - absolute maximum value the data can take.
29
+ LB (float): Lower Probable Bound - practical lower limit for the distribution.
30
+ UB (float): Upper Probable Bound - practical upper limit for the distribution.
31
+ S (float or str): Scale parameter for the distribution. Set to 'auto' for automatic estimation.
32
+ z0_optimize (bool): Whether to optimize the location parameter z0 during fitting (default: True).
33
+ data_form (str): Form of the data processing ('a' for additive, 'm' for multiplicative).
34
+ n_points (int): Number of points to generate in the distribution function (default: 500).
35
+ catch (bool): Whether to store intermediate calculated values (default: True).
36
+ weights (np.ndarray): Prior weights for data points. If None, uniform weights are used.
37
+ wedf (bool): Whether to use Weighted Empirical Distribution Function (default: False).
38
+ opt_method (str): Optimization method for parameter estimation (default: 'L-BFGS-B').
39
+ tolerance (float): Convergence tolerance for optimization (default: 1e-9).
40
+ verbose (bool): Whether to print detailed progress information (default: False).
41
+ params (dict): Dictionary storing fitted parameters and results after fitting.
42
+ homogeneous (bool): To indicate data homogeneity (default: True).
43
+ max_data_size (int): Maximum data size for smooth EGDF generation (default: 1000).
44
+ flush (bool): Whether to flush large arrays (default: True).
45
+
46
+ Methods:
47
+ fit(data): Fit the Estimating Global Distribution Function to the data.
48
+ plot(plot_smooth=True, plot='gdf', bounds=False, extra_df=True, figsize=(12,8)): Visualize the fitted distribution.
49
+ results(): Get the fitting results as a dictionary.
50
+
51
+ Usage Example:
52
+
53
+ >>> import numpy as np
54
+ >>> from machinegnostics.magcal import EGDF
55
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
56
+ >>> egdf = EGDF()
57
+ >>> egdf.fit(data)
58
+ >>> egdf.plot()
59
+ >>> print(egdf.params)
60
+
61
+ Workflow:
62
+ 1. Initialize EGDF with desired parameters (no data required).
63
+ 2. Call fit(data) to estimate the distribution parameters.
64
+ 3. Use plot() to visualize the results.
65
+
66
+ Performance Tips:
67
+ - Use data_form='m' for multiplicative/log-normal data.
68
+ - Set appropriate bounds to improve convergence.
69
+ - Use catch=False for large datasets to save memory.
70
+ - Adjust n_points based on visualization needs vs. performance.
71
+ - Use verbose=True to monitor optimization progress.
72
+
73
+ Common Use Cases:
74
+ - Risk analysis and reliability engineering.
75
+ - Quality control and process optimization.
76
+ - Financial modeling and market analysis.
77
+ - Environmental data analysis.
78
+ - Biostatistics and epidemiological studies.
79
+
80
+ Notes:
81
+ - Bounds (DLB, DUB, LB, UB) are optional but can improve estimation accuracy.
82
+ - When S='auto', the scale parameter is automatically estimated from the data.
83
+ - The weights array must have the same length as the data array.
84
+ - Setting catch=False can save memory for large datasets but prevents access to intermediate results or detailed plots.
85
+
86
+ Raises:
87
+ ValueError: If data array is empty or contains invalid values.
88
+ ValueError: If weights array length doesn't match data array length.
89
+ ValueError: If bounds are specified incorrectly (e.g., LB > UB).
90
+ ValueError: If invalid parameters are provided (negative tolerance, invalid data_form, etc.).
91
+ RuntimeError: If the fitting process fails to converge.
92
+ OptimizationError: If the optimization algorithm encounters numerical issues.
93
+ """
94
+
95
+ def __init__(self,
96
+ DLB: float = None,
97
+ DUB: float = None,
98
+ LB: float = None,
99
+ UB: float = None,
100
+ S = 'auto',
101
+ z0_optimize: bool = True,
102
+ tolerance: float = 1e-9,
103
+ data_form: str = 'a',
104
+ n_points: int = 500,
105
+ homogeneous: bool = True,
106
+ catch: bool = True,
107
+ weights: np.ndarray = None,
108
+ wedf: bool = False,
109
+ opt_method: str = 'L-BFGS-B',
110
+ verbose: bool = False,
111
+ max_data_size: int = 1000,
112
+ flush: bool = True):
113
+ """
114
+ Initialize the EGDF (Estimating Global Distribution Function) class.
115
+
116
+ This constructor sets up all the necessary parameters and configurations for estimating
117
+ a global distribution function from data. It validates input parameters and prepares
118
+ the instance for subsequent fitting and analysis operations.
119
+
120
+ Parameters:
121
+ DLB (float, optional): Data Lower Bound - the absolute minimum value that the data can
122
+ theoretically take. If None, will be inferred from data. This is a
123
+ hard constraint on the distribution.
124
+ DUB (float, optional): Data Upper Bound - the absolute maximum value that the data can
125
+ theoretically take. If None, will be inferred from data. This is a
126
+ hard constraint on the distribution.
127
+ LB (float, optional): Lower Probable Bound - the practical lower limit for the distribution.
128
+ This is typically less restrictive than DLB and represents the expected
129
+ lower range of the distribution.
130
+ UB (float, optional): Upper Probable Bound - the practical upper limit for the distribution.
131
+ This is typically less restrictive than DUB and represents the expected
132
+ upper range of the distribution.
133
+ S (float, optional): Scale parameter for the distribution. If 'auto' (default),
134
+ the scale will be automatically estimated from the data during
135
+ fitting. If a float is provided, it will be used as a fixed
136
+ scale parameter.
137
+ tolerance (float, optional): Convergence tolerance for the optimization process.
138
+ Smaller values lead to more precise fitting but may require
139
+ more iterations. Default is 1e-9.
140
+ data_form (str, optional): Form of data processing. Options are:
141
+ - 'a': Additive form (default) - processes data linearly
142
+ - 'm': Multiplicative form - applies log transformation for
143
+ better handling of multiplicative processes
144
+ n_points (int, optional): Number of points to generate in the final distribution function.
145
+ Higher values provide smoother curves but require more computation.
146
+ Default is 500. Must be positive integer.
147
+ homogeneous (bool, optional): Whether to assume data homogeneity. Default is True.
148
+ Affects internal optimization strategies.
149
+ catch (bool, optional): Whether to store intermediate calculated values during fitting.
150
+ Setting to True (default) allows access to detailed results but
151
+ uses more memory. Set to False for large datasets to save memory.
152
+ weights (np.ndarray, optional): Prior weights for data points. Must be the same length
153
+ as data array when fit() is called. If None, uniform weights
154
+ (all ones) are used. Weights should be positive values.
155
+ wedf (bool, optional): Whether to use Weighted Empirical Distribution Function in
156
+ calculations. Default is False. When True, incorporates weights
157
+ into the empirical distribution estimation.
158
+ opt_method (str, optional): Optimization method for parameter estimation. Default is
159
+ 'L-BFGS-B'. Other options include 'SLSQP', 'TNC', etc.
160
+ Must be a valid scipy.optimize method name.
161
+ verbose (bool, optional): Whether to print detailed progress information during fitting.
162
+ Default is False. When True, provides diagnostic output about
163
+ the optimization process.
164
+ max_data_size (int, optional): Maximum size of data for which smooth EGDF generation is allowed.
165
+ Maximum data size for processing. Safety limit to prevent excessive memory usage.
166
+ flush (bool, optional): Whether to flush intermediate calculations during processing.
167
+ Default is True. May affect memory usage and computation speed.
168
+
169
+ Raises:
170
+ ValueError: If n_points is not a positive integer.
171
+ ValueError: If bounds are specified incorrectly (e.g., DLB > DUB or LB > UB).
172
+ ValueError: If data_form is not 'a' or 'm'.
173
+ ValueError: If tolerance is not positive.
174
+ ValueError: If max_data_size is not positive.
175
+
176
+ Examples:
177
+
178
+ >>> import numpy as np
179
+ >>> from machinegnostics.magcal import EGDF
180
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
181
+ >>> egdf = EGDF()
182
+ >>> egdf.fit(data)
183
+ >>> egdf.plot()
184
+ >>> print(egdf.params)
185
+
186
+ Notes:
187
+ - The initialization process does not perform any fitting; call fit(data) method afterwards
188
+ - Bounds should be chosen carefully: too restrictive bounds may lead to poor fits
189
+ - For multiplicative data, consider using data_form='m' for better results
190
+ - Large n_points values will slow down plotting but provide smoother visualizations
191
+ - The wedf parameter affects how empirical distributions are calculated
192
+ """
193
+ # parameter
194
+ self.DLB = DLB
195
+ self.DUB = DUB
196
+ self.LB = LB
197
+ self.UB = UB
198
+ self.S = S
199
+ self.z0_optimize = z0_optimize
200
+ self.tolerance = tolerance
201
+ self.data_form = data_form
202
+ self.n_points = n_points
203
+ self.homogeneous = homogeneous
204
+ self.catch = catch
205
+ self.weights = weights
206
+ self.wedf = wedf
207
+ self.opt_method = opt_method
208
+ self.verbose = verbose
209
+ self.max_data_size = max_data_size
210
+ self.flush = flush
211
+
212
+ def fit(self, data: np.ndarray, plot: bool = False):
213
+ """
214
+ Short Description: Fit the Estimating Global Distribution Function to the provided data.
215
+
216
+ Detailed Description: This method performs the core estimation process for the EGDF. It validates and preprocesses the data, sets up optimization constraints, runs numerical optimization, and calculates the final EGDF and PDF with optimized parameters. The EGDF provides a unique global representation of the data distribution.
217
+
218
+ Parameters:
219
+ data (np.ndarray): Input data array for distribution estimation. Must be a 1D numpy array.
220
+ plot (bool, optional): Whether to automatically plot the fitted distribution after fitting. Default is False.
221
+
222
+ Returns:
223
+ None: The fitted parameters are stored in the `params` attribute.
224
+
225
+ Raises:
226
+ RuntimeError: If the optimization process fails to converge.
227
+ ValueError: If the data array is empty, contains only NaN values, or has invalid dimensions.
228
+ ValueError: If weights array is provided but has a different length than the data array.
229
+ OptimizationError: If the underlying optimization algorithm encounters numerical issues.
230
+ ConvergenceError: If the algorithm cannot find a suitable solution.
231
+
232
+ Usage Example:
233
+
234
+ >>> egdf = EGDF()
235
+ >>> data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
236
+ >>> egdf.fit(data)
237
+ >>> print("Fitting completed")
238
+ >>> print(f"Fitted parameters: {egdf.params}")
239
+ >>> egdf.fit(data, plot=True)
240
+ """
241
+ # Call parent constructor to properly initialize BaseEGDF
242
+ super().__init__(
243
+ data=data,
244
+ DLB=self.DLB,
245
+ DUB=self.DUB,
246
+ LB=self.LB,
247
+ UB=self.UB,
248
+ S=self.S,
249
+ z0_optimize=self.z0_optimize,
250
+ tolerance=self.tolerance,
251
+ data_form=self.data_form,
252
+ n_points=self.n_points,
253
+ catch=self.catch,
254
+ weights=self.weights,
255
+ wedf=self.wedf,
256
+ opt_method=self.opt_method,
257
+ verbose=self.verbose,
258
+ max_data_size=self.max_data_size,
259
+ homogeneous=self.homogeneous,
260
+ flush=self.flush
261
+ )
262
+ self._fit_egdf(plot=plot)
263
+
264
+ def plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = False, extra_df: bool = True, figsize: tuple = (12, 8)):
265
+ """
266
+ Short Description: Visualize the fitted Estimating Global Distribution Function and related plots.
267
+
268
+ Detailed Description: This method generates visualizations of the fitted global distribution function, including the main EGDF curve, probability density function (PDF), and optional additional distribution functions. It provides insights into the quality of the fit and the characteristics of the underlying distribution.
269
+
270
+ Parameters:
271
+ plot_smooth (bool, optional): Whether to plot a smooth interpolated curve for the distribution function. Default is True.
272
+ plot (str, optional): Type of plot to generate. Default is 'both'. Options include:
273
+ - 'gdf': Global Distribution Function (main curve).
274
+ - 'pdf': Probability Density Function.
275
+ - 'both': Both EGDF and PDF in the same plot.
276
+ bounds (bool, optional): Whether to display bound lines on the plot. Default is False.
277
+ extra_df (bool, optional): Whether to include additional distribution functions in the plot for comparison. Default is True.
278
+ figsize (tuple, optional): Figure size as (width, height) in inches. Default is (12, 8).
279
+
280
+ Returns:
281
+ None: Displays the plot.
282
+
283
+ Raises:
284
+ RuntimeError: If fit() has not been called before plotting.
285
+ ValueError: If an invalid plot type is specified.
286
+ ImportError: If matplotlib is not available for plotting.
287
+ PlottingError: If there are issues with the plot generation process.
288
+ MemoryError: If plotting large datasets exceeds available memory.
289
+
290
+ Usage Example:
291
+
292
+ >>> egdf.plot()
293
+ >>> egdf.plot(plot='pdf', bounds=True)
294
+ >>> egdf.plot(plot='both', bounds=True, extra_df=True, figsize=(16, 10))
295
+ """
296
+ self._plot(plot_smooth=plot_smooth, plot=plot, bounds=bounds, extra_df=extra_df, figsize=figsize)
297
+
298
+ def results(self) -> dict:
299
+ """
300
+ Short Description: Retrieve the fitted parameters and comprehensive results from the EGDF fitting process.
301
+
302
+ Detailed Description: This method provides access to all key results obtained after fitting the Estimating Global Distribution Function (EGDF) to the data. It returns a dictionary containing fitted parameters, global distribution characteristics, optimization results, and diagnostic information for complete distribution analysis.
303
+
304
+ Returns:
305
+ dict: Fitted parameters and results.
306
+
307
+ Raises:
308
+ RuntimeError: If fit() has not been called before accessing results.
309
+ AttributeError: If internal result structure is missing or corrupted due to fitting failure.
310
+ KeyError: If expected result keys are unavailable.
311
+ ValueError: If internal state is inconsistent for result retrieval.
312
+ MemoryError: If results contain very large arrays that exceed available memory.
313
+
314
+ Usage Example:
315
+
316
+ >>> egdf = EGDF(verbose=True)
317
+ >>> egdf.fit(data)
318
+ >>> results = egdf.results()
319
+ >>> print(f"Global scale parameter: {results['S_opt']:.6f}")
320
+ >>> print(f"Distribution bounds: [{results['LB']:.3f}, {results['UB']:.3f}]")
321
+ """
322
+ if not self._fitted:
323
+ raise RuntimeError("Must fit EGDF before getting results.")
324
+ return self._get_results()
@@ -0,0 +1,297 @@
1
+ """
2
+ ELDF - Estimating Local Distribution Function.
3
+
4
+ Author: Nirmal Parmar
5
+ Machine Gnostics
6
+ """
7
+
8
+ import numpy as np
9
+ from machinegnostics.magcal.gdf.base_eldf import BaseELDF
10
+
11
+ class ELDF(BaseELDF):
12
+ """
13
+ Short Description: Estimating Local Distribution Function.
14
+
15
+ Detailed Description: The ELDF class estimates and analyzes local distribution functions for given data. It supports both additive and multiplicative data forms, handles bounded and unbounded distributions, and provides advanced Z0 (Gnostic Mean) point estimation. This class is optimized for detailed local analysis and memory-efficient processing.
16
+
17
+ Key Features:
18
+ - Automatic parameter estimation with customizable bounds.
19
+ - Advanced Z0 point estimation for maximum PDF location.
20
+ - Support for weighted data points.
21
+ - Multiple data processing forms (additive/multiplicative).
22
+ - Comprehensive visualization capabilities.
23
+ - Robust optimization with multiple solver options.
24
+ - Memory-efficient processing for large datasets.
25
+
26
+ Attributes:
27
+ DLB (float): Data Lower Bound - absolute minimum value the data can take.
28
+ DUB (float): Data Upper Bound - absolute maximum value the data can take.
29
+ LB (float): Lower Probable Bound - practical lower limit for the distribution.
30
+ UB (float): Upper Probable Bound - practical upper limit for the distribution.
31
+ S (float or str): Scale parameter for the distribution. Set to 'auto' for automatic estimation.
32
+ varS (bool): Whether to use variable scale parameter during optimization (default: False).
33
+ z0_optimize (bool): Whether to optimize the location parameter Z0 during fitting (default: True).
34
+ tolerance (float): Convergence tolerance for optimization (default: 1e-5).
35
+ data_form (str): Form of the data processing ('a' for additive, 'm' for multiplicative).
36
+ n_points (int): Number of points to generate in the distribution function (default: 1000).
37
+ homogeneous (bool): Whether to assume data homogeneity (default: True).
38
+ catch (bool): Whether to store intermediate calculated values (default: True).
39
+ weights (np.ndarray): Prior weights for data points. If None, uniform weights are used.
40
+ wedf (bool): Whether to use Weighted Empirical Distribution Function (default: True).
41
+ opt_method (str): Optimization method for parameter estimation (default: 'L-BFGS-B').
42
+ verbose (bool): Whether to print detailed progress information (default: False).
43
+ max_data_size (int): Maximum data size for smooth ELDF generation (default: 1000).
44
+ flush (bool): Whether to flush large arrays during processing (default: True).
45
+ params (dict): Dictionary storing fitted parameters and results after fitting.
46
+
47
+ Methods:
48
+ fit(data): Fit the Estimating Local Distribution Function to the data.
49
+ plot(plot_smooth=True, plot='eldf', bounds=True, extra_df=True, figsize=(12,8)): Visualize the fitted local distribution.
50
+ results(): Get the fitting results as a dictionary.
51
+
52
+ Usage Example:
53
+
54
+ >>> import numpy as np
55
+ >>> from machinegnostics.magcal import ELDF
56
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
57
+ >>> eldf = ELDF()
58
+ >>> eldf.fit(data)
59
+ >>> eldf.plot()
60
+ >>> print(eldf.params)
61
+
62
+ Workflow:
63
+ 1. Initialize ELDF with desired parameters (no data required).
64
+ 2. Call fit(data) to estimate the distribution parameters.
65
+ 3. Use plot() to visualize the results.
66
+
67
+ Performance Tips:
68
+ - Use data_form='m' for multiplicative/log-normal data.
69
+ - Set appropriate bounds to improve convergence.
70
+ - Use catch=False for large datasets to save memory.
71
+ - Adjust n_points based on visualization needs vs. performance.
72
+ - Use verbose=True to monitor optimization progress.
73
+
74
+ Common Use Cases:
75
+ - Peak detection and modal analysis in data distributions.
76
+ - Local density estimation for clustering applications.
77
+ - Risk analysis focusing on critical value identification.
78
+ - Quality control with emphasis on specification limits.
79
+ - Financial modeling with focus on maximum likelihood points.
80
+
81
+ Notes:
82
+ - Bounds (DLB, DUB, LB, UB) are optional but can improve estimation accuracy.
83
+ - When S='auto', the scale parameter is automatically estimated from the data.
84
+ - The weights array must have the same length as the data array.
85
+ - Setting catch=False can save memory for large datasets but prevents access to intermediate results or detailed plots.
86
+
87
+ Raises:
88
+ ValueError: If data array is empty or contains invalid values.
89
+ ValueError: If weights array length doesn't match data array length.
90
+ ValueError: If bounds are specified incorrectly (e.g., LB > UB).
91
+ ValueError: If invalid parameters are provided (negative tolerance, invalid data_form, etc.).
92
+ RuntimeError: If the fitting process fails to converge.
93
+ OptimizationError: If the optimization algorithm encounters numerical issues.
94
+ """
95
+
96
+ def __init__(self,
97
+ DLB: float = None,
98
+ DUB: float = None,
99
+ LB: float = None,
100
+ UB: float = None,
101
+ S = 'auto',
102
+ varS: bool = False,
103
+ z0_optimize: bool = True,
104
+ tolerance: float = 1e-9,
105
+ data_form: str = 'a',
106
+ n_points: int = 1000,
107
+ homogeneous: bool = True,
108
+ catch: bool = True,
109
+ weights: np.ndarray = None,
110
+ wedf: bool = False,
111
+ opt_method: str = 'L-BFGS-B',
112
+ verbose: bool = False,
113
+ max_data_size: int = 1000,
114
+ flush: bool = True):
115
+ """
116
+ Initialize the ELDF (Estimating Local Distribution Function) class.
117
+
118
+ This constructor sets up all the necessary parameters and configurations for estimating
119
+ a local distribution function from data. It validates input parameters and prepares
120
+ the instance for subsequent fitting and analysis operations.
121
+
122
+ Parameters:
123
+ DLB (float, optional): Data Lower Bound - the absolute minimum value that the data can
124
+ theoretically take. If None, will be inferred from data.
125
+ DUB (float, optional): Data Upper Bound - the absolute maximum value that the data can
126
+ theoretically take. If None, will be inferred from data.
127
+ LB (float, optional): Lower Probable Bound - the practical lower limit for the distribution.
128
+ UB (float, optional): Upper Probable Bound - the practical upper limit for the distribution.
129
+ S (float or str, optional): Scale parameter for the distribution. If 'auto' (default),
130
+ the scale will be automatically estimated from the data during
131
+ fitting. If a float is provided, it will be used as a fixed
132
+ scale parameter.
133
+ varS (bool, optional): Whether to allow variable scale parameter during optimization to handle heteroscedasticity.
134
+ z0_optimize (bool, optional): Whether to optimize the location parameter Z0 during fitting.
135
+ tolerance (float, optional): Convergence tolerance for the optimization process.
136
+ data_form (str, optional): Form of data processing. Options are:
137
+ - 'a': Additive form (default)
138
+ - 'm': Multiplicative form
139
+ n_points (int, optional): Number of points to generate in the final distribution function.
140
+ homogeneous (bool, optional): Whether to assume data homogeneity.
141
+ catch (bool, optional): Whether to store intermediate calculated values during fitting.
142
+ weights (np.ndarray, optional): Prior weights for data points.
143
+ wedf (bool, optional): Whether to use Weighted Empirical Distribution Function.
144
+ opt_method (str, optional): Optimization method for parameter estimation.
145
+ verbose (bool, optional): Whether to print detailed progress information during fitting.
146
+ max_data_size (int, optional): Maximum size of data for which smooth ELDF generation is allowed.
147
+ flush (bool, optional): Whether to flush intermediate calculations during processing.
148
+
149
+ Raises:
150
+ ValueError: If n_points is not a positive integer.
151
+ ValueError: If bounds are specified incorrectly.
152
+ ValueError: If data_form is not 'a' or 'm'.
153
+ ValueError: If tolerance is not positive.
154
+ ValueError: If max_data_size is not positive.
155
+
156
+ Examples:
157
+
158
+ >>> import numpy as np
159
+ >>> from machinegnostics.magcal import ELDF
160
+ >>> data = np.array([ -13.5, 0, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
161
+ >>> eldf = ELDF()
162
+ >>> eldf.fit(data)
163
+ >>> eldf.plot()
164
+ >>> print(eldf.params)
165
+ """
166
+ self.DLB = DLB
167
+ self.DUB = DUB
168
+ self.LB = LB
169
+ self.UB = UB
170
+ self.S = S
171
+ self.varS = varS
172
+ self.z0_optimize = z0_optimize
173
+ self.tolerance = tolerance
174
+ self.data_form = data_form
175
+ self.n_points = n_points
176
+ self.homogeneous = homogeneous
177
+ self.catch = catch
178
+ self.weights = weights
179
+ self.wedf = wedf
180
+ self.opt_method = opt_method
181
+ self.verbose = verbose
182
+ self.max_data_size = max_data_size
183
+ self.flush = flush
184
+
185
+ def fit(self, data: np.ndarray, plot: bool = False):
186
+ """
187
+ Short Description: Fit the Estimating Local Distribution Function to the provided data.
188
+
189
+ Detailed Description: This method performs the core estimation process for the ELDF. It validates and preprocesses the data, sets up optimization constraints, runs numerical optimization, and calculates the final ELDF and PDF with optimized parameters. The ELDF provides detailed local analysis, including Z0 estimation (location of maximum PDF).
190
+
191
+ Parameters:
192
+ data (np.ndarray): Input data array for distribution estimation. Must be a 1D numpy array.
193
+ plot (bool, optional): Whether to automatically plot the fitted distribution after fitting. Default is False.
194
+
195
+ Returns:
196
+ None: The fitted parameters are stored in the `params` attribute.
197
+
198
+ Raises:
199
+ RuntimeError: If the optimization process fails to converge.
200
+ ValueError: If the data array is empty, contains only NaN values, or has invalid dimensions.
201
+ ValueError: If weights array is provided but has a different length than the data array.
202
+ OptimizationError: If the underlying optimization algorithm encounters numerical issues.
203
+ ConvergenceError: If the algorithm cannot find a suitable solution.
204
+
205
+ Usage Example:
206
+
207
+ >>> eldf = ELDF()
208
+ >>> data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
209
+ >>> eldf.fit(data)
210
+ >>> print("Fitting completed")
211
+ >>> print(f"Fitted parameters: {eldf.params}")
212
+ >>> eldf.fit(data, plot=True)
213
+ """
214
+ super().__init__(
215
+ data=data,
216
+ DLB=self.DLB,
217
+ DUB=self.DUB,
218
+ LB=self.LB,
219
+ UB=self.UB,
220
+ S=self.S,
221
+ varS=self.varS,
222
+ z0_optimize=self.z0_optimize,
223
+ tolerance=self.tolerance,
224
+ data_form=self.data_form,
225
+ n_points=self.n_points,
226
+ homogeneous=self.homogeneous,
227
+ catch=self.catch,
228
+ weights=self.weights,
229
+ wedf=self.wedf,
230
+ opt_method=self.opt_method,
231
+ verbose=self.verbose,
232
+ max_data_size=self.max_data_size,
233
+ flush=self.flush
234
+ )
235
+ self._fit_eldf(plot=plot)
236
+
237
+ def plot(self, plot_smooth: bool = True, plot: str = 'both', bounds: bool = True, extra_df: bool = True, figsize: tuple = (12, 8)):
238
+ """
239
+ Short Description: Visualize the fitted Estimating Local Distribution Function and related plots.
240
+
241
+ Detailed Description: This method generates visualizations of the fitted local distribution function, including the main ELDF curve, probability density function (PDF), and optional additional distribution functions. It provides insights into the quality of the fit and the characteristics of the underlying local distribution.
242
+
243
+ Parameters:
244
+ plot_smooth (bool, optional): Whether to plot a smooth interpolated curve for the distribution function. Default is True.
245
+ plot (str, optional): Type of plot to generate. Default is 'both'. Options include:
246
+ - 'eldf': Local Distribution Function (main curve).
247
+ - 'pdf': Probability Density Function.
248
+ - 'both': Both ELDF and PDF in the same plot.
249
+ bounds (bool, optional): Whether to display bound lines on the plot. Default is True.
250
+ extra_df (bool, optional): Whether to include additional distribution functions in the plot for comparison. Default is True.
251
+ figsize (tuple, optional): Figure size as (width, height) in inches. Default is (12, 8).
252
+
253
+ Returns:
254
+ None: Displays the plot.
255
+
256
+ Raises:
257
+ RuntimeError: If fit() has not been called before plotting.
258
+ ValueError: If an invalid plot type is specified.
259
+ ImportError: If matplotlib is not available for plotting.
260
+ PlottingError: If there are issues with the plot generation process.
261
+ MemoryError: If plotting large datasets exceeds available memory.
262
+
263
+ Usage Example:
264
+
265
+ >>> eldf.plot()
266
+ >>> eldf.plot(plot='pdf', bounds=True)
267
+ >>> eldf.plot(plot='both', bounds=True, extra_df=True, figsize=(16, 10))
268
+ """
269
+ self._plot(plot_smooth=plot_smooth, plot=plot, bounds=bounds, extra_df=extra_df, figsize=figsize)
270
+
271
+ def results(self) -> dict:
272
+ """
273
+ Short Description: Retrieve the fitted parameters and comprehensive results from the ELDF fitting process.
274
+
275
+ Detailed Description: This method provides access to all key results obtained after fitting the Estimating Local Distribution Function (ELDF) to the data. It returns a dictionary containing fitted parameters, local distribution characteristics, optimization results, and diagnostic information for complete distribution analysis.
276
+
277
+ Returns:
278
+ dict: Fitted parameters and results.
279
+
280
+ Raises:
281
+ RuntimeError: If fit() has not been called before accessing results.
282
+ AttributeError: If internal result structure is missing or corrupted due to fitting failure.
283
+ KeyError: If expected result keys are unavailable.
284
+ ValueError: If internal state is inconsistent for result retrieval.
285
+ MemoryError: If results contain very large arrays that exceed available memory.
286
+
287
+ Usage Example:
288
+
289
+ >>> eldf = ELDF(verbose=True)
290
+ >>> eldf.fit(data)
291
+ >>> results = eldf.results()
292
+ >>> print(f"Local scale parameter: {results['S_opt']:.6f}")
293
+ >>> print(f"Distribution bounds: [{results['LB']:.3f}, {results['UB']:.3f}]")
294
+ """
295
+ if not self._fitted:
296
+ raise RuntimeError("Must fit ELDF before getting results.")
297
+ return self._get_results()